upstream/mercurial-mirror Commit - r36741:1f9bbd1d

1

/*

1

/*

2

* LibXDiff by Davide Libenzi ( File Differential Library )

2

* LibXDiff by Davide Libenzi ( File Differential Library )

3

4

*

4

*

5

* This library is free software; you can redistribute it and/or

5

* This library is free software; you can redistribute it and/or

6

* modify it under the terms of the GNU Lesser General Public

6

* modify it under the terms of the GNU Lesser General Public

7

* License as published by the Free Software Foundation; either

7

* License as published by the Free Software Foundation; either

8

* version 2.1 of the License, or (at your option) any later version.

8

* version 2.1 of the License, or (at your option) any later version.

9

*

9

*

10

* This library is distributed in the hope that it will be useful,

10

* This library is distributed in the hope that it will be useful,

11

* but WITHOUT ANY WARRANTY; without even the implied warranty of

11

* but WITHOUT ANY WARRANTY; without even the implied warranty of

12

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

12

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13

* Lesser General Public License for more details.

13

* Lesser General Public License for more details.

14

*

14

*

15

* You should have received a copy of the GNU Lesser General Public

15

* You should have received a copy of the GNU Lesser General Public

16

* License along with this library; if not, see

16

* License along with this library; if not, see

17

* <http://www.gnu.org/licenses/>.

17

* <http://www.gnu.org/licenses/>.

18

*

18

*

19

* Davide Libenzi <davidel@xmailserver.org>

19

* Davide Libenzi <davidel@xmailserver.org>

20

*

20

*

21

*/

21

*/

22

23

#include "xinclude.h"

23

#include "xinclude.h"

24

25

26

27

#define XDL_MAX_COST_MIN 256

27

#define XDL_MAX_COST_MIN 256

28

#define XDL_HEUR_MIN_COST 256

28

#define XDL_HEUR_MIN_COST 256

29

#define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)

29

#define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)

30

#define XDL_SNAKE_CNT 20

30

#define XDL_SNAKE_CNT 20

31

#define XDL_K_HEUR 4

31

#define XDL_K_HEUR 4

32

33

/* VC 2008 doesn't know about the inline keyword. */

34

#if defined(_MSC_VER)

35

#define inline __forceinline

36

#endif

33

37

34

38

35

typedef struct s_xdpsplit {

39

typedef struct s_xdpsplit {

36

long i1, i2;

40

long i1, i2;

37

int min_lo, min_hi;

41

int min_lo, min_hi;

38

} xdpsplit_t;

42

} xdpsplit_t;

39

43

40

44

41

45

42

46

43

static long xdl_split(unsigned long const *ha1, long off1, long lim1,

47

static long xdl_split(unsigned long const *ha1, long off1, long lim1,

44

unsigned long const *ha2, long off2, long lim2,

48

unsigned long const *ha2, long off2, long lim2,

45

long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,

49

long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,

46

xdalgoenv_t *xenv);

50

xdalgoenv_t *xenv);

47

static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);

51

static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);

48

52

49

53

50

54

51

55

52

56

53

/*

57

/*

54

* See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.

58

* See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.

55

* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both

59

* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both

56

* the forward diagonal starting from (off1, off2) and the backward diagonal

60

* the forward diagonal starting from (off1, off2) and the backward diagonal

57

* starting from (lim1, lim2). If the K values on the same diagonal crosses

61

* starting from (lim1, lim2). If the K values on the same diagonal crosses

58

* returns the furthest point of reach. We might end up having to expensive

62

* returns the furthest point of reach. We might end up having to expensive

59

* cases using this algorithm is full, so a little bit of heuristic is needed

63

* cases using this algorithm is full, so a little bit of heuristic is needed

60

* to cut the search and to return a suboptimal point.

64

* to cut the search and to return a suboptimal point.

61

*/

65

*/

62

static long xdl_split(unsigned long const *ha1, long off1, long lim1,

66

static long xdl_split(unsigned long const *ha1, long off1, long lim1,

63

unsigned long const *ha2, long off2, long lim2,

67

unsigned long const *ha2, long off2, long lim2,

64

long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,

68

long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,

65

xdalgoenv_t *xenv) {

69

xdalgoenv_t *xenv) {

66

long dmin = off1 - lim2, dmax = lim1 - off2;

70

long dmin = off1 - lim2, dmax = lim1 - off2;

67

long fmid = off1 - off2, bmid = lim1 - lim2;

71

long fmid = off1 - off2, bmid = lim1 - lim2;

68

long odd = (fmid - bmid) & 1;

72

long odd = (fmid - bmid) & 1;

69

long fmin = fmid, fmax = fmid;

73

long fmin = fmid, fmax = fmid;

70

long bmin = bmid, bmax = bmid;

74

long bmin = bmid, bmax = bmid;

71

long ec, d, i1, i2, prev1, best, dd, v, k;

75

long ec, d, i1, i2, prev1, best, dd, v, k;

72

76

73

/*

77

/*

74

* Set initial diagonal values for both forward and backward path.

78

* Set initial diagonal values for both forward and backward path.

75

*/

79

*/

76

kvdf[fmid] = off1;

80

kvdf[fmid] = off1;

77

kvdb[bmid] = lim1;

81

kvdb[bmid] = lim1;

78

82

79

for (ec = 1;; ec++) {

83

for (ec = 1;; ec++) {

80

int got_snake = 0;

84

int got_snake = 0;

81

85

82

/*

86

/*

83

* We need to extent the diagonal "domain" by one. If the next

87

* We need to extent the diagonal "domain" by one. If the next

84

* values exits the box boundaries we need to change it in the

88

* values exits the box boundaries we need to change it in the

85

* opposite direction because (max - min) must be a power of two.

89

* opposite direction because (max - min) must be a power of two.

86

* Also we initialize the external K value to -1 so that we can

90

* Also we initialize the external K value to -1 so that we can

87

* avoid extra conditions check inside the core loop.

91

* avoid extra conditions check inside the core loop.

88

*/

92

*/

89

if (fmin > dmin)

93

if (fmin > dmin)

90

kvdf[--fmin - 1] = -1;

94

kvdf[--fmin - 1] = -1;

91

else

95

else

92

++fmin;

96

++fmin;

93

if (fmax < dmax)

97

if (fmax < dmax)

94

kvdf[++fmax + 1] = -1;

98

kvdf[++fmax + 1] = -1;

95

else

99

else

96

--fmax;

100

--fmax;

97

101

98

for (d = fmax; d >= fmin; d -= 2) {

102

for (d = fmax; d >= fmin; d -= 2) {

99

if (kvdf[d - 1] >= kvdf[d + 1])

103

if (kvdf[d - 1] >= kvdf[d + 1])

100

i1 = kvdf[d - 1] + 1;

104

i1 = kvdf[d - 1] + 1;

101

else

105

else

102

i1 = kvdf[d + 1];

106

i1 = kvdf[d + 1];

103

prev1 = i1;

107

prev1 = i1;

104

i2 = i1 - d;

108

i2 = i1 - d;

105

for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);

109

for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);

106

if (i1 - prev1 > xenv->snake_cnt)

110

if (i1 - prev1 > xenv->snake_cnt)

107

got_snake = 1;

111

got_snake = 1;

108

kvdf[d] = i1;

112

kvdf[d] = i1;

109

if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {

113

if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {

110

spl->i1 = i1;

114

spl->i1 = i1;

111

spl->i2 = i2;

115

spl->i2 = i2;

112

spl->min_lo = spl->min_hi = 1;

116

spl->min_lo = spl->min_hi = 1;

113

return ec;

117

return ec;

114

}

118

}

115

}

119

}

116

120

117

/*

121

/*

118

* We need to extent the diagonal "domain" by one. If the next

122

* We need to extent the diagonal "domain" by one. If the next

119

* values exits the box boundaries we need to change it in the

123

* values exits the box boundaries we need to change it in the

120

* opposite direction because (max - min) must be a power of two.

124

* opposite direction because (max - min) must be a power of two.

121

* Also we initialize the external K value to -1 so that we can

125

* Also we initialize the external K value to -1 so that we can

122

* avoid extra conditions check inside the core loop.

126

* avoid extra conditions check inside the core loop.

123

*/

127

*/

124

if (bmin > dmin)

128

if (bmin > dmin)

125

kvdb[--bmin - 1] = XDL_LINE_MAX;

129

kvdb[--bmin - 1] = XDL_LINE_MAX;

126

else

130

else

127

++bmin;

131

++bmin;

128

if (bmax < dmax)

132

if (bmax < dmax)

129

kvdb[++bmax + 1] = XDL_LINE_MAX;

133

kvdb[++bmax + 1] = XDL_LINE_MAX;

130

else

134

else

131

--bmax;

135

--bmax;

132

136

133

for (d = bmax; d >= bmin; d -= 2) {

137

for (d = bmax; d >= bmin; d -= 2) {

134

if (kvdb[d - 1] < kvdb[d + 1])

138

if (kvdb[d - 1] < kvdb[d + 1])

135

i1 = kvdb[d - 1];

139

i1 = kvdb[d - 1];

136

else

140

else

137

i1 = kvdb[d + 1] - 1;

141

i1 = kvdb[d + 1] - 1;

138

prev1 = i1;

142

prev1 = i1;

139

i2 = i1 - d;

143

i2 = i1 - d;

140

for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);

144

for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);

141

if (prev1 - i1 > xenv->snake_cnt)

145

if (prev1 - i1 > xenv->snake_cnt)

142

got_snake = 1;

146

got_snake = 1;

143

kvdb[d] = i1;

147

kvdb[d] = i1;

144

if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {

148

if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {

145

spl->i1 = i1;

149

spl->i1 = i1;

146

spl->i2 = i2;

150

spl->i2 = i2;

147

spl->min_lo = spl->min_hi = 1;

151

spl->min_lo = spl->min_hi = 1;

148

return ec;

152

return ec;

149

}

153

}

150

}

154

}

151

155

152

if (need_min)

156

if (need_min)

153

continue;

157

continue;

154

158

155

/*

159

/*

156

* If the edit cost is above the heuristic trigger and if

160

* If the edit cost is above the heuristic trigger and if

157

* we got a good snake, we sample current diagonals to see

161

* we got a good snake, we sample current diagonals to see

158

* if some of the, have reached an "interesting" path. Our

162

* if some of the, have reached an "interesting" path. Our

159

* measure is a function of the distance from the diagonal

163

* measure is a function of the distance from the diagonal

160

* corner (i1 + i2) penalized with the distance from the

164

* corner (i1 + i2) penalized with the distance from the

161

* mid diagonal itself. If this value is above the current

165

* mid diagonal itself. If this value is above the current

162

* edit cost times a magic factor (XDL_K_HEUR) we consider

166

* edit cost times a magic factor (XDL_K_HEUR) we consider

163

* it interesting.

167

* it interesting.

164

*/

168

*/

165

if (got_snake && ec > xenv->heur_min) {

169

if (got_snake && ec > xenv->heur_min) {

166

for (best = 0, d = fmax; d >= fmin; d -= 2) {

170

for (best = 0, d = fmax; d >= fmin; d -= 2) {

167

dd = d > fmid ? d - fmid: fmid - d;

171

dd = d > fmid ? d - fmid: fmid - d;

168

i1 = kvdf[d];

172

i1 = kvdf[d];

169

i2 = i1 - d;

173

i2 = i1 - d;

170

v = (i1 - off1) + (i2 - off2) - dd;

174

v = (i1 - off1) + (i2 - off2) - dd;

171

175

172

if (v > XDL_K_HEUR * ec && v > best &&

176

if (v > XDL_K_HEUR * ec && v > best &&

173

off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&

177

off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&

174

off2 + xenv->snake_cnt <= i2 && i2 < lim2) {

178

off2 + xenv->snake_cnt <= i2 && i2 < lim2) {

175

for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)

179

for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)

176

if (k == xenv->snake_cnt) {

180

if (k == xenv->snake_cnt) {

177

best = v;

181

best = v;

178

spl->i1 = i1;

182

spl->i1 = i1;

179

spl->i2 = i2;

183

spl->i2 = i2;

180

break;

184

break;

181

}

185

}

182

}

186

}

183

}

187

}

184

if (best > 0) {

188

if (best > 0) {

185

spl->min_lo = 1;

189

spl->min_lo = 1;

186

spl->min_hi = 0;

190

spl->min_hi = 0;

187

return ec;

191

return ec;

188

}

192

}

189

193

190

for (best = 0, d = bmax; d >= bmin; d -= 2) {

194

for (best = 0, d = bmax; d >= bmin; d -= 2) {

191

dd = d > bmid ? d - bmid: bmid - d;

195

dd = d > bmid ? d - bmid: bmid - d;

192

i1 = kvdb[d];

196

i1 = kvdb[d];

193

i2 = i1 - d;

197

i2 = i1 - d;

194

v = (lim1 - i1) + (lim2 - i2) - dd;

198

v = (lim1 - i1) + (lim2 - i2) - dd;

195

199

196

if (v > XDL_K_HEUR * ec && v > best &&

200

if (v > XDL_K_HEUR * ec && v > best &&

197

off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&

201

off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&

198

off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {

202

off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {

199

for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)

203

for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)

200

if (k == xenv->snake_cnt - 1) {

204

if (k == xenv->snake_cnt - 1) {

201

best = v;

205

best = v;

202

spl->i1 = i1;

206

spl->i1 = i1;

203

spl->i2 = i2;

207

spl->i2 = i2;

204

break;

208

break;

205

}

209

}

206

}

210

}

207

}

211

}

208

if (best > 0) {

212

if (best > 0) {

209

spl->min_lo = 0;

213

spl->min_lo = 0;

210

spl->min_hi = 1;

214

spl->min_hi = 1;

211

return ec;

215

return ec;

212

}

216

}

213

}

217

}

214

218

215

/*

219

/*

216

* Enough is enough. We spent too much time here and now we collect

220

* Enough is enough. We spent too much time here and now we collect

217

* the furthest reaching path using the (i1 + i2) measure.

221

* the furthest reaching path using the (i1 + i2) measure.

218

*/

222

*/

219

if (ec >= xenv->mxcost) {

223

if (ec >= xenv->mxcost) {

220

long fbest, fbest1, bbest, bbest1;

224

long fbest, fbest1, bbest, bbest1;

221

225

222

fbest = fbest1 = -1;

226

fbest = fbest1 = -1;

223

for (d = fmax; d >= fmin; d -= 2) {

227

for (d = fmax; d >= fmin; d -= 2) {

224

i1 = XDL_MIN(kvdf[d], lim1);

228

i1 = XDL_MIN(kvdf[d], lim1);

225

i2 = i1 - d;

229

i2 = i1 - d;

226

if (lim2 < i2)

230

if (lim2 < i2)

227

i1 = lim2 + d, i2 = lim2;

231

i1 = lim2 + d, i2 = lim2;

228

if (fbest < i1 + i2) {

232

if (fbest < i1 + i2) {

229

fbest = i1 + i2;

233

fbest = i1 + i2;

230

fbest1 = i1;

234

fbest1 = i1;

231

}

235

}

232

}

236

}

233

237

234

bbest = bbest1 = XDL_LINE_MAX;

238

bbest = bbest1 = XDL_LINE_MAX;

235

for (d = bmax; d >= bmin; d -= 2) {

239

for (d = bmax; d >= bmin; d -= 2) {

236

i1 = XDL_MAX(off1, kvdb[d]);

240

i1 = XDL_MAX(off1, kvdb[d]);

237

i2 = i1 - d;

241

i2 = i1 - d;

238

if (i2 < off2)

242

if (i2 < off2)

239

i1 = off2 + d, i2 = off2;

243

i1 = off2 + d, i2 = off2;

240

if (i1 + i2 < bbest) {

244

if (i1 + i2 < bbest) {

241

bbest = i1 + i2;

245

bbest = i1 + i2;

242

bbest1 = i1;

246

bbest1 = i1;

243

}

247

}

244

}

248

}

245

249

246

if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {

250

if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {

247

spl->i1 = fbest1;

251

spl->i1 = fbest1;

248

spl->i2 = fbest - fbest1;

252

spl->i2 = fbest - fbest1;

249

spl->min_lo = 1;

253

spl->min_lo = 1;

250

spl->min_hi = 0;

254

spl->min_hi = 0;

251

} else {

255

} else {

252

spl->i1 = bbest1;

256

spl->i1 = bbest1;

253

spl->i2 = bbest - bbest1;

257

spl->i2 = bbest - bbest1;

254

spl->min_lo = 0;

258

spl->min_lo = 0;

255

spl->min_hi = 1;

259

spl->min_hi = 1;

256

}

260

}

257

return ec;

261

return ec;

258

}

262

}

259

}

263

}

260

}

264

}

261

265

262

266

263

/*

267

/*

264

* Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling

268

* Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling

265

* the box splitting function. Note that the real job (marking changed lines)

269

* the box splitting function. Note that the real job (marking changed lines)

266

* is done in the two boundary reaching checks.

270

* is done in the two boundary reaching checks.

267

*/

271

*/

268

int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,

272

int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,

269

diffdata_t *dd2, long off2, long lim2,

273

diffdata_t *dd2, long off2, long lim2,

270

long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {

274

long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {

271

unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;

275

unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;

272

276

273

/*

277

/*

274

* Shrink the box by walking through each diagonal snake (SW and NE).

278

* Shrink the box by walking through each diagonal snake (SW and NE).

275

*/

279

*/

276

for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);

280

for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);

277

for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);

281

for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);

278

282

279

/*

283

/*

280

* If one dimension is empty, then all records on the other one must

284

* If one dimension is empty, then all records on the other one must

281

* be obviously changed.

285

* be obviously changed.

282

*/

286

*/

283

if (off1 == lim1) {

287

if (off1 == lim1) {

284

char *rchg2 = dd2->rchg;

288

char *rchg2 = dd2->rchg;

285

long *rindex2 = dd2->rindex;

289

long *rindex2 = dd2->rindex;

286

290

287

for (; off2 < lim2; off2++)

291

for (; off2 < lim2; off2++)

288

rchg2[rindex2[off2]] = 1;

292

rchg2[rindex2[off2]] = 1;

289

} else if (off2 == lim2) {

293

} else if (off2 == lim2) {

290

char *rchg1 = dd1->rchg;

294

char *rchg1 = dd1->rchg;

291

long *rindex1 = dd1->rindex;

295

long *rindex1 = dd1->rindex;

292

296

293

for (; off1 < lim1; off1++)

297

for (; off1 < lim1; off1++)

294

rchg1[rindex1[off1]] = 1;

298

rchg1[rindex1[off1]] = 1;

295

} else {

299

} else {

296

xdpsplit_t spl;

300

xdpsplit_t spl;

297

spl.i1 = spl.i2 = 0;

301

spl.i1 = spl.i2 = 0;

298

302

299

/*

303

/*

300

* Divide ...

304

* Divide ...

301

*/

305

*/

302

if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,

306

if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,

303

need_min, &spl, xenv) < 0) {

307

need_min, &spl, xenv) < 0) {

304

308

305

return -1;

309

return -1;

306

}

310

}

307

311

308

/*

312

/*

309

* ... et Impera.

313

* ... et Impera.

310

*/

314

*/

311

if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,

315

if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,

312

kvdf, kvdb, spl.min_lo, xenv) < 0 ||

316

kvdf, kvdb, spl.min_lo, xenv) < 0 ||

313

xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,

317

xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,

314

kvdf, kvdb, spl.min_hi, xenv) < 0) {

318

kvdf, kvdb, spl.min_hi, xenv) < 0) {

315

319

316

return -1;

320

return -1;

317

}

321

}

318

}

322

}

319

323

320

return 0;

324

return 0;

321

}

325

}

322

326

323

327

324

int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,

328

int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,

325

xdfenv_t *xe) {

329

xdfenv_t *xe) {

326

long ndiags;

330

long ndiags;

327

long *kvd, *kvdf, *kvdb;

331

long *kvd, *kvdf, *kvdb;

328

xdalgoenv_t xenv;

332

xdalgoenv_t xenv;

329

diffdata_t dd1, dd2;

333

diffdata_t dd1, dd2;

330

334

331

if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {

335

if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {

332

336

333

return -1;

337

return -1;

334

}

338

}

335

339

336

/*

340

/*

337

* Allocate and setup K vectors to be used by the differential algorithm.

341

* Allocate and setup K vectors to be used by the differential algorithm.

338

* One is to store the forward path and one to store the backward path.

342

* One is to store the forward path and one to store the backward path.

339

*/

343

*/

340

ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;

344

ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;

341

if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {

345

if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {

342

346

343

xdl_free_env(xe);

347

xdl_free_env(xe);

344

return -1;

348

return -1;

345

}

349

}

346

kvdf = kvd;

350

kvdf = kvd;

347

kvdb = kvdf + ndiags;

351

kvdb = kvdf + ndiags;

348

kvdf += xe->xdf2.nreff + 1;

352

kvdf += xe->xdf2.nreff + 1;

349

kvdb += xe->xdf2.nreff + 1;

353

kvdb += xe->xdf2.nreff + 1;

350

354

351

xenv.mxcost = xdl_bogosqrt(ndiags);

355

xenv.mxcost = xdl_bogosqrt(ndiags);

352

if (xenv.mxcost < XDL_MAX_COST_MIN)

356

if (xenv.mxcost < XDL_MAX_COST_MIN)

353

xenv.mxcost = XDL_MAX_COST_MIN;

357

xenv.mxcost = XDL_MAX_COST_MIN;

354

xenv.snake_cnt = XDL_SNAKE_CNT;

358

xenv.snake_cnt = XDL_SNAKE_CNT;

355

xenv.heur_min = XDL_HEUR_MIN_COST;

359

xenv.heur_min = XDL_HEUR_MIN_COST;

356

360

357

dd1.nrec = xe->xdf1.nreff;

361

dd1.nrec = xe->xdf1.nreff;

358

dd1.ha = xe->xdf1.ha;

362

dd1.ha = xe->xdf1.ha;

359

dd1.rchg = xe->xdf1.rchg;

363

dd1.rchg = xe->xdf1.rchg;

360

dd1.rindex = xe->xdf1.rindex;

364

dd1.rindex = xe->xdf1.rindex;

361

dd2.nrec = xe->xdf2.nreff;

365

dd2.nrec = xe->xdf2.nreff;

362

dd2.ha = xe->xdf2.ha;

366

dd2.ha = xe->xdf2.ha;

363

dd2.rchg = xe->xdf2.rchg;

367

dd2.rchg = xe->xdf2.rchg;

364

dd2.rindex = xe->xdf2.rindex;

368

dd2.rindex = xe->xdf2.rindex;

365

369

366

if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,

370

if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,

367

kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {

371

kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {

368

372

369

xdl_free(kvd);

373

xdl_free(kvd);

370

xdl_free_env(xe);

374

xdl_free_env(xe);

371

return -1;

375

return -1;

372

}

376

}

373

377

374

xdl_free(kvd);

378

xdl_free(kvd);

375

379

376

return 0;

380

return 0;

377

}

381

}

378

382

379

383

380

static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {

384

static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {

381

xdchange_t *xch;

385

xdchange_t *xch;

382

386

383

if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))

387

if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))

384

return NULL;

388

return NULL;

385

389

386

xch->next = xscr;

390

xch->next = xscr;

387

xch->i1 = i1;

391

xch->i1 = i1;

388

xch->i2 = i2;

392

xch->i2 = i2;

389

xch->chg1 = chg1;

393

xch->chg1 = chg1;

390

xch->chg2 = chg2;

394

xch->chg2 = chg2;

391

xch->ignore = 0;

395

xch->ignore = 0;

392

396

393

return xch;

397

return xch;

394

}

398

}

395

399

396

400

397

static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)

401

static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)

398

{

402

{

399

return (rec1->ha == rec2->ha &&

403

return (rec1->ha == rec2->ha &&

400

xdl_recmatch(rec1->ptr, rec1->size,

404

xdl_recmatch(rec1->ptr, rec1->size,

401

rec2->ptr, rec2->size,

405

rec2->ptr, rec2->size,

402

flags));

406

flags));

403

}

407

}

404

408

405

/*

409

/*

406

* If a line is indented more than this, get_indent() just returns this value.

410

* If a line is indented more than this, get_indent() just returns this value.

407

* This avoids having to do absurd amounts of work for data that are not

411

* This avoids having to do absurd amounts of work for data that are not

408

* human-readable text, and also ensures that the output of get_indent fits within

412

* human-readable text, and also ensures that the output of get_indent fits within

409

* an int.

413

* an int.

410

*/

414

*/

411

#define MAX_INDENT 200

415

#define MAX_INDENT 200

412

416

413

/*

417

/*

414

* Return the amount of indentation of the specified line, treating TAB as 8

418

* Return the amount of indentation of the specified line, treating TAB as 8

415

* columns. Return -1 if line is empty or contains only whitespace. Clamp the

419

* columns. Return -1 if line is empty or contains only whitespace. Clamp the

416

* output value at MAX_INDENT.

420

* output value at MAX_INDENT.

417

*/

421

*/

418

static int get_indent(xrecord_t *rec)

422

static int get_indent(xrecord_t *rec)

419

{

423

{

420

long i;

424

long i;

421

int ret = 0;

425

int ret = 0;

422

426

423

for (i = 0; i < rec->size; i++) {

427

for (i = 0; i < rec->size; i++) {

424

char c = rec->ptr[i];

428

char c = rec->ptr[i];

425

429

426

if (!XDL_ISSPACE(c))

430

if (!XDL_ISSPACE(c))

427

return ret;

431

return ret;

428

else if (c == ' ')

432

else if (c == ' ')

429

ret += 1;

433

ret += 1;

430

else if (c == '\t')

434

else if (c == '\t')

431

ret += 8 - ret % 8;

435

ret += 8 - ret % 8;

432

/* ignore other whitespace characters */

436

/* ignore other whitespace characters */

433

437

434

if (ret >= MAX_INDENT)

438

if (ret >= MAX_INDENT)

435

return MAX_INDENT;

439

return MAX_INDENT;

436

}

440

}

437

441

438

/* The line contains only whitespace. */

442

/* The line contains only whitespace. */

439

return -1;

443

return -1;

440

}

444

}

441

445

442

/*

446

/*

443

* If more than this number of consecutive blank rows are found, just return this

447

* If more than this number of consecutive blank rows are found, just return this

444

* value. This avoids requiring O(N^2) work for pathological cases, and also

448

* value. This avoids requiring O(N^2) work for pathological cases, and also

445

* ensures that the output of score_split fits in an int.

449

* ensures that the output of score_split fits in an int.

446

*/

450

*/

447

#define MAX_BLANKS 20

451

#define MAX_BLANKS 20

448

452

449

/* Characteristics measured about a hypothetical split position. */

453

/* Characteristics measured about a hypothetical split position. */

450

struct split_measurement {

454

struct split_measurement {

451

/*

455

/*

452

* Is the split at the end of the file (aside from any blank lines)?

456

* Is the split at the end of the file (aside from any blank lines)?

453

*/

457

*/

454

int end_of_file;

458

int end_of_file;

455

459

456

/*

460

/*

457

* How much is the line immediately following the split indented (or -1 if

461

* How much is the line immediately following the split indented (or -1 if

458

* the line is blank):

462

* the line is blank):

459

*/

463

*/

460

int indent;

464

int indent;

461

465

462

/*

466

/*

463

* How many consecutive lines above the split are blank?

467

* How many consecutive lines above the split are blank?

464

*/

468

*/

465

int pre_blank;

469

int pre_blank;

466

470

467

/*

471

/*

468

* How much is the nearest non-blank line above the split indented (or -1

472

* How much is the nearest non-blank line above the split indented (or -1

469

* if there is no such line)?

473

* if there is no such line)?

470

*/

474

*/

471

int pre_indent;

475

int pre_indent;

472

476

473

/*

477

/*

474

* How many lines after the line following the split are blank?

478

* How many lines after the line following the split are blank?

475

*/

479

*/

476

int post_blank;

480

int post_blank;

477

481

478

/*

482

/*

479

* How much is the nearest non-blank line after the line following the

483

* How much is the nearest non-blank line after the line following the

480

* split indented (or -1 if there is no such line)?

484

* split indented (or -1 if there is no such line)?

481

*/

485

*/

482

int post_indent;

486

int post_indent;

483

};

487

};

484

488

485

struct split_score {

489

struct split_score {

486

/* The effective indent of this split (smaller is preferred). */

490

/* The effective indent of this split (smaller is preferred). */

487

int effective_indent;

491

int effective_indent;

488

492

489

/* Penalty for this split (smaller is preferred). */

493

/* Penalty for this split (smaller is preferred). */

490

int penalty;

494

int penalty;

491

};

495

};

492

496

493

/*

497

/*

494

* Fill m with information about a hypothetical split of xdf above line split.

498

* Fill m with information about a hypothetical split of xdf above line split.

495

*/

499

*/

496

static void measure_split(const xdfile_t *xdf, long split,

500

static void measure_split(const xdfile_t *xdf, long split,

497

struct split_measurement *m)

501

struct split_measurement *m)

498

{

502

{

499

long i;

503

long i;

500

504

501

if (split >= xdf->nrec) {

505

if (split >= xdf->nrec) {

502

m->end_of_file = 1;

506

m->end_of_file = 1;

503

m->indent = -1;

507

m->indent = -1;

504

} else {

508

} else {

505

m->end_of_file = 0;

509

m->end_of_file = 0;

506

m->indent = get_indent(xdf->recs[split]);

510

m->indent = get_indent(xdf->recs[split]);

507

}

511

}

508

512

509

m->pre_blank = 0;

513

m->pre_blank = 0;

510

m->pre_indent = -1;

514

m->pre_indent = -1;

511

for (i = split - 1; i >= 0; i--) {

515

for (i = split - 1; i >= 0; i--) {

512

m->pre_indent = get_indent(xdf->recs[i]);

516

m->pre_indent = get_indent(xdf->recs[i]);

513

if (m->pre_indent != -1)

517

if (m->pre_indent != -1)

514

break;

518

break;

515

m->pre_blank += 1;

519

m->pre_blank += 1;

516

if (m->pre_blank == MAX_BLANKS) {

520

if (m->pre_blank == MAX_BLANKS) {

517

m->pre_indent = 0;

521

m->pre_indent = 0;

518

break;

522

break;

519

}

523

}

520

}

524

}

521

525

522

m->post_blank = 0;

526

m->post_blank = 0;

523

m->post_indent = -1;

527

m->post_indent = -1;

524

for (i = split + 1; i < xdf->nrec; i++) {

528

for (i = split + 1; i < xdf->nrec; i++) {

525

m->post_indent = get_indent(xdf->recs[i]);

529

m->post_indent = get_indent(xdf->recs[i]);

526

if (m->post_indent != -1)

530

if (m->post_indent != -1)

527

break;

531

break;

528

m->post_blank += 1;

532

m->post_blank += 1;

529

if (m->post_blank == MAX_BLANKS) {

533

if (m->post_blank == MAX_BLANKS) {

530

m->post_indent = 0;

534

m->post_indent = 0;

531

break;

535

break;

532

}

536

}

533

}

537

}

534

}

538

}

535

539

536

/*

540

/*

537

* The empirically-determined weight factors used by score_split() below.

541

* The empirically-determined weight factors used by score_split() below.

538

* Larger values means that the position is a less favorable place to split.

542

* Larger values means that the position is a less favorable place to split.

539

*

543

*

540

* Note that scores are only ever compared against each other, so multiplying

544

* Note that scores are only ever compared against each other, so multiplying

541

* all of these weight/penalty values by the same factor wouldn't change the

545

* all of these weight/penalty values by the same factor wouldn't change the

542

* heuristic's behavior. Still, we need to set that arbitrary scale *somehow*.

546

* heuristic's behavior. Still, we need to set that arbitrary scale *somehow*.

543

* In practice, these numbers are chosen to be large enough that they can be

547

* In practice, these numbers are chosen to be large enough that they can be

544

* adjusted relative to each other with sufficient precision despite using

548

* adjusted relative to each other with sufficient precision despite using

545

* integer math.

549

* integer math.

546

*/

550

*/

547

551

548

/* Penalty if there are no non-blank lines before the split */

552

/* Penalty if there are no non-blank lines before the split */

549

#define START_OF_FILE_PENALTY 1

553

#define START_OF_FILE_PENALTY 1

550

554

551

/* Penalty if there are no non-blank lines after the split */

555

/* Penalty if there are no non-blank lines after the split */

552

#define END_OF_FILE_PENALTY 21

556

#define END_OF_FILE_PENALTY 21

553

557

554

/* Multiplier for the number of blank lines around the split */

558

/* Multiplier for the number of blank lines around the split */

555

#define TOTAL_BLANK_WEIGHT (-30)

559

#define TOTAL_BLANK_WEIGHT (-30)

556

560

557

/* Multiplier for the number of blank lines after the split */

561

/* Multiplier for the number of blank lines after the split */

558

#define POST_BLANK_WEIGHT 6

562

#define POST_BLANK_WEIGHT 6

559

563

560

/*

564

/*

561

* Penalties applied if the line is indented more than its predecessor

565

* Penalties applied if the line is indented more than its predecessor

562

*/

566

*/

563

#define RELATIVE_INDENT_PENALTY (-4)

567

#define RELATIVE_INDENT_PENALTY (-4)

564

#define RELATIVE_INDENT_WITH_BLANK_PENALTY 10

568

#define RELATIVE_INDENT_WITH_BLANK_PENALTY 10

565

569

566

/*

570

/*

567

* Penalties applied if the line is indented less than both its predecessor and

571

* Penalties applied if the line is indented less than both its predecessor and

568

* its successor

572

* its successor

569

*/

573

*/

570

#define RELATIVE_OUTDENT_PENALTY 24

574

#define RELATIVE_OUTDENT_PENALTY 24

571

#define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17

575

#define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17

572

576

573

/*

577

/*

574

* Penalties applied if the line is indented less than its predecessor but not

578

* Penalties applied if the line is indented less than its predecessor but not

575

* less than its successor

579

* less than its successor

576

*/

580

*/

577

#define RELATIVE_DEDENT_PENALTY 23

581

#define RELATIVE_DEDENT_PENALTY 23

578

#define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17

582

#define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17

579

583

580

/*

584

/*

581

* We only consider whether the sum of the effective indents for splits are

585

* We only consider whether the sum of the effective indents for splits are

582

* less than (-1), equal to (0), or greater than (+1) each other. The resulting

586

* less than (-1), equal to (0), or greater than (+1) each other. The resulting

583

* value is multiplied by the following weight and combined with the penalty to

587

* value is multiplied by the following weight and combined with the penalty to

584

* determine the better of two scores.

588

* determine the better of two scores.

585

*/

589

*/

586

#define INDENT_WEIGHT 60

590

#define INDENT_WEIGHT 60

587

591

588

/*

592

/*

589

* Compute a badness score for the hypothetical split whose measurements are

593

* Compute a badness score for the hypothetical split whose measurements are

590

* stored in m. The weight factors were determined empirically using the tools and

594

* stored in m. The weight factors were determined empirically using the tools and

591

* corpus described in

595

* corpus described in

592

*

596

*

593

* https://github.com/mhagger/diff-slider-tools

597

* https://github.com/mhagger/diff-slider-tools

594

*

598

*

595

* Also see that project if you want to improve the weights based on, for example,

599

* Also see that project if you want to improve the weights based on, for example,

596

* a larger or more diverse corpus.

600

* a larger or more diverse corpus.

597

*/

601

*/

598

static void score_add_split(const struct split_measurement *m, struct split_score *s)

602

static void score_add_split(const struct split_measurement *m, struct split_score *s)

599

{

603

{

600

/*

604

/*

601

* A place to accumulate penalty factors (positive makes this index more

605

* A place to accumulate penalty factors (positive makes this index more

602

* favored):

606

* favored):

603

*/

607

*/

604

int post_blank, total_blank, indent, any_blanks;

608

int post_blank, total_blank, indent, any_blanks;

605

609

606

if (m->pre_indent == -1 && m->pre_blank == 0)

610

if (m->pre_indent == -1 && m->pre_blank == 0)

607

s->penalty += START_OF_FILE_PENALTY;

611

s->penalty += START_OF_FILE_PENALTY;

608

612

609

if (m->end_of_file)

613

if (m->end_of_file)

610

s->penalty += END_OF_FILE_PENALTY;

614

s->penalty += END_OF_FILE_PENALTY;

611

615

612

/*

616

/*

613

* Set post_blank to the number of blank lines following the split,

617

* Set post_blank to the number of blank lines following the split,

614

* including the line immediately after the split:

618

* including the line immediately after the split:

615

*/

619

*/

616

post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;

620

post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;

617

total_blank = m->pre_blank + post_blank;

621

total_blank = m->pre_blank + post_blank;

618

622

619

/* Penalties based on nearby blank lines: */

623

/* Penalties based on nearby blank lines: */

620

s->penalty += TOTAL_BLANK_WEIGHT * total_blank;

624

s->penalty += TOTAL_BLANK_WEIGHT * total_blank;

621

s->penalty += POST_BLANK_WEIGHT * post_blank;

625

s->penalty += POST_BLANK_WEIGHT * post_blank;

622

626

623

if (m->indent != -1)

627

if (m->indent != -1)

624

indent = m->indent;

628

indent = m->indent;

625

else

629

else

626

indent = m->post_indent;

630

indent = m->post_indent;

627

631

628

any_blanks = (total_blank != 0);

632

any_blanks = (total_blank != 0);

629

633

630

/* Note that the effective indent is -1 at the end of the file: */

634

/* Note that the effective indent is -1 at the end of the file: */

631

s->effective_indent += indent;

635

s->effective_indent += indent;

632

636

633

if (indent == -1) {

637

if (indent == -1) {

634

/* No additional adjustments needed. */

638

/* No additional adjustments needed. */

635

} else if (m->pre_indent == -1) {

639

} else if (m->pre_indent == -1) {

636

/* No additional adjustments needed. */

640

/* No additional adjustments needed. */

637

} else if (indent > m->pre_indent) {

641

} else if (indent > m->pre_indent) {

638

/*

642

/*

639

* The line is indented more than its predecessor.

643

* The line is indented more than its predecessor.

640

*/

644

*/

641

s->penalty += any_blanks ?

645

s->penalty += any_blanks ?

642

RELATIVE_INDENT_WITH_BLANK_PENALTY :

646

RELATIVE_INDENT_WITH_BLANK_PENALTY :

643

RELATIVE_INDENT_PENALTY;

647

RELATIVE_INDENT_PENALTY;

644

} else if (indent == m->pre_indent) {

648

} else if (indent == m->pre_indent) {

645

/*

649

/*

646

* The line has the same indentation level as its predecessor.

650

* The line has the same indentation level as its predecessor.

647

* No additional adjustments needed.

651

* No additional adjustments needed.

648

*/

652

*/

649

} else {

653

} else {

650

/*

654

/*

651

* The line is indented less than its predecessor. It could be

655

* The line is indented less than its predecessor. It could be

652

* the block terminator of the previous block, but it could

656

* the block terminator of the previous block, but it could

653

* also be the start of a new block (e.g., an "else" block, or

657

* also be the start of a new block (e.g., an "else" block, or

654

* maybe the previous block didn't have a block terminator).

658

* maybe the previous block didn't have a block terminator).

655

* Try to distinguish those cases based on what comes next:

659

* Try to distinguish those cases based on what comes next:

656

*/

660

*/

657

if (m->post_indent != -1 && m->post_indent > indent) {

661

if (m->post_indent != -1 && m->post_indent > indent) {

658

/*

662

/*

659

* The following line is indented more. So it is likely

663

* The following line is indented more. So it is likely

660

* that this line is the start of a block.

664

* that this line is the start of a block.

661

*/

665

*/

662

s->penalty += any_blanks ?

666

s->penalty += any_blanks ?

663

RELATIVE_OUTDENT_WITH_BLANK_PENALTY :

667

RELATIVE_OUTDENT_WITH_BLANK_PENALTY :

664

RELATIVE_OUTDENT_PENALTY;

668

RELATIVE_OUTDENT_PENALTY;

665

} else {

669

} else {

666

/*

670

/*

667

* That was probably the end of a block.

671

* That was probably the end of a block.

668

*/

672

*/

669

s->penalty += any_blanks ?

673

s->penalty += any_blanks ?

670

RELATIVE_DEDENT_WITH_BLANK_PENALTY :

674

RELATIVE_DEDENT_WITH_BLANK_PENALTY :

671

RELATIVE_DEDENT_PENALTY;

675

RELATIVE_DEDENT_PENALTY;

672

}

676

}

673

}

677

}

674

}

678

}

675

679

676

static int score_cmp(struct split_score *s1, struct split_score *s2)

680

static int score_cmp(struct split_score *s1, struct split_score *s2)

677

{

681

{

678

/* -1 if s1.effective_indent < s2->effective_indent, etc. */

682

/* -1 if s1.effective_indent < s2->effective_indent, etc. */

679

int cmp_indents = ((s1->effective_indent > s2->effective_indent) -

683

int cmp_indents = ((s1->effective_indent > s2->effective_indent) -

680

(s1->effective_indent < s2->effective_indent));

684

(s1->effective_indent < s2->effective_indent));

681

685

682

return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);

686

return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);

683

}

687

}

684

688

685

/*

689

/*

686

* Represent a group of changed lines in an xdfile_t (i.e., a contiguous group

690

* Represent a group of changed lines in an xdfile_t (i.e., a contiguous group

687

* of lines that was inserted or deleted from the corresponding version of the

691

* of lines that was inserted or deleted from the corresponding version of the

688

* file). We consider there to be such a group at the beginning of the file, at

692

* file). We consider there to be such a group at the beginning of the file, at

689

* the end of the file, and between any two unchanged lines, though most such

693

* the end of the file, and between any two unchanged lines, though most such

690

* groups will usually be empty.

694

* groups will usually be empty.

691

*

695

*

692

* If the first line in a group is equal to the line following the group, then

696

* If the first line in a group is equal to the line following the group, then

693

* the group can be slid down. Similarly, if the last line in a group is equal

697

* the group can be slid down. Similarly, if the last line in a group is equal

694

* to the line preceding the group, then the group can be slid up. See

698

* to the line preceding the group, then the group can be slid up. See

695

* group_slide_down() and group_slide_up().

699

* group_slide_down() and group_slide_up().

696

*

700

*

697

* Note that loops that are testing for changed lines in xdf->rchg do not need

701

* Note that loops that are testing for changed lines in xdf->rchg do not need

698

* index bounding since the array is prepared with a zero at position -1 and N.

702

* index bounding since the array is prepared with a zero at position -1 and N.

699

*/

703

*/

700

struct xdlgroup {

704

struct xdlgroup {

701

/*

705

/*

702

* The index of the first changed line in the group, or the index of

706

* The index of the first changed line in the group, or the index of

703

* the unchanged line above which the (empty) group is located.

707

* the unchanged line above which the (empty) group is located.

704

*/

708

*/

705

long start;

709

long start;

706

710

707

/*

711

/*

708

* The index of the first unchanged line after the group. For an empty

712

* The index of the first unchanged line after the group. For an empty

709

* group, end is equal to start.

713

* group, end is equal to start.

710

*/

714

*/

711

long end;

715

long end;

712

};

716

};

713

717

714

/*

718

/*

715

* Initialize g to point at the first group in xdf.

719

* Initialize g to point at the first group in xdf.

716

*/

720

*/

717

static void group_init(xdfile_t *xdf, struct xdlgroup *g)

721

static void group_init(xdfile_t *xdf, struct xdlgroup *g)

718

{

722

{

719

g->start = g->end = 0;

723

g->start = g->end = 0;

720

while (xdf->rchg[g->end])

724

while (xdf->rchg[g->end])

721

g->end++;

725

g->end++;

722

}

726

}

723

727

724

/*

728

/*

725

* Move g to describe the next (possibly empty) group in xdf and return 0. If g

729

* Move g to describe the next (possibly empty) group in xdf and return 0. If g

726

* is already at the end of the file, do nothing and return -1.

730

* is already at the end of the file, do nothing and return -1.

727

*/

731

*/

728

static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)

732

static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)

729

{

733

{

730

if (g->end == xdf->nrec)

734

if (g->end == xdf->nrec)

731

return -1;

735

return -1;

732

736

733

g->start = g->end + 1;

737

g->start = g->end + 1;

734

for (g->end = g->start; xdf->rchg[g->end]; g->end++)

738

for (g->end = g->start; xdf->rchg[g->end]; g->end++)

735

;

739

;

736

740

737

return 0;

741

return 0;

738

}

742

}

739

743

740

/*

744

/*

741

* Move g to describe the previous (possibly empty) group in xdf and return 0.

745

* Move g to describe the previous (possibly empty) group in xdf and return 0.

742

* If g is already at the beginning of the file, do nothing and return -1.

746

* If g is already at the beginning of the file, do nothing and return -1.

743

*/

747

*/

744

static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)

748

static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)

745

{

749

{

746

if (g->start == 0)

750

if (g->start == 0)

747

return -1;

751

return -1;

748

752

749

g->end = g->start - 1;

753

g->end = g->start - 1;

750

for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)

754

for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)

751

;

755

;

752

756

753

return 0;

757

return 0;

754

}

758

}

755

759

756

/*

760

/*

757

* If g can be slid toward the end of the file, do so, and if it bumps into a

761

* If g can be slid toward the end of the file, do so, and if it bumps into a

758

* following group, expand this group to include it. Return 0 on success or -1

762

* following group, expand this group to include it. Return 0 on success or -1

759

* if g cannot be slid down.

763

* if g cannot be slid down.

760

*/

764

*/

761

static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags)

765

static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags)

762

{

766

{

763

if (g->end < xdf->nrec &&

767

if (g->end < xdf->nrec &&

764

recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {

768

recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {

765

xdf->rchg[g->start++] = 0;

769

xdf->rchg[g->start++] = 0;

766

xdf->rchg[g->end++] = 1;

770

xdf->rchg[g->end++] = 1;

767

771

768

while (xdf->rchg[g->end])

772

while (xdf->rchg[g->end])

769

g->end++;

773

g->end++;

770

774

771

return 0;

775

return 0;

772

} else {

776

} else {

773

return -1;

777

return -1;

774

}

778

}

775

}

779

}

776

780

777

/*

781

/*

778

* If g can be slid toward the beginning of the file, do so, and if it bumps

782

* If g can be slid toward the beginning of the file, do so, and if it bumps

779

* into a previous group, expand this group to include it. Return 0 on success

783

* into a previous group, expand this group to include it. Return 0 on success

780

* or -1 if g cannot be slid up.

784

* or -1 if g cannot be slid up.

781

*/

785

*/

782

static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags)

786

static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags)

783

{

787

{

784

if (g->start > 0 &&

788

if (g->start > 0 &&

785

recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {

789

recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {

786

xdf->rchg[--g->start] = 1;

790

xdf->rchg[--g->start] = 1;

787

xdf->rchg[--g->end] = 0;

791

xdf->rchg[--g->end] = 0;

788

792

789

while (xdf->rchg[g->start - 1])

793

while (xdf->rchg[g->start - 1])

790

g->start--;

794

g->start--;

791

795

792

return 0;

796

return 0;

793

} else {

797

} else {

794

return -1;

798

return -1;

795

}

799

}

796

}

800

}

797

801

798

static void xdl_bug(const char *msg)

802

static void xdl_bug(const char *msg)

799

{

803

{

800

fprintf(stderr, "BUG: %s\n", msg);

804

fprintf(stderr, "BUG: %s\n", msg);

801

exit(1);

805

exit(1);

802

}

806

}

803

807

804

/*

808

/*

805

* For indentation heuristic, skip searching for better slide position after

809

* For indentation heuristic, skip searching for better slide position after

806

* checking MAX_BORING lines without finding an improvement. This defends the

810

* checking MAX_BORING lines without finding an improvement. This defends the

807

* indentation heuristic logic against pathological cases. The value is not

811

* indentation heuristic logic against pathological cases. The value is not

808

* picked scientifically but should be good enough.

812

* picked scientifically but should be good enough.

809

*/

813

*/

810

#define MAX_BORING 100

814

#define MAX_BORING 100

811

815

812

/*

816

/*

813

* Move back and forward change groups for a consistent and pretty diff output.

817

* Move back and forward change groups for a consistent and pretty diff output.

814

* This also helps in finding joinable change groups and reducing the diff

818

* This also helps in finding joinable change groups and reducing the diff

815

* size.

819

* size.

816

*/

820

*/

817

int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {

821

int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {

818

struct xdlgroup g, go;

822

struct xdlgroup g, go;

819

long earliest_end, end_matching_other;

823

long earliest_end, end_matching_other;

820

long groupsize;

824

long groupsize;

821

825

822

group_init(xdf, &g);

826

group_init(xdf, &g);

823

group_init(xdfo, &go);

827

group_init(xdfo, &go);

824

828

825

while (1) {

829

while (1) {

826

/* If the group is empty in the to-be-compacted file, skip it: */

830

/* If the group is empty in the to-be-compacted file, skip it: */

827

if (g.end == g.start)

831

if (g.end == g.start)

828

goto next;

832

goto next;

829

833

830

/*

834

/*

831

* Now shift the change up and then down as far as possible in

835

* Now shift the change up and then down as far as possible in

832

* each direction. If it bumps into any other changes, merge them.

836

* each direction. If it bumps into any other changes, merge them.

833

*/

837

*/

834

do {

838

do {

835

groupsize = g.end - g.start;

839

groupsize = g.end - g.start;

836

840

837

/*

841

/*

838

* Keep track of the last "end" index that causes this

842

* Keep track of the last "end" index that causes this

839

* group to align with a group of changed lines in the

843

* group to align with a group of changed lines in the

840

* other file. -1 indicates that we haven't found such

844

* other file. -1 indicates that we haven't found such

841

* a match yet:

845

* a match yet:

842

*/

846

*/

843

end_matching_other = -1;

847

end_matching_other = -1;

844

848

845

/* Shift the group backward as much as possible: */

849

/* Shift the group backward as much as possible: */

846

while (!group_slide_up(xdf, &g, flags))

850

while (!group_slide_up(xdf, &g, flags))

847

if (group_previous(xdfo, &go))

851

if (group_previous(xdfo, &go))

848

xdl_bug("group sync broken sliding up");

852

xdl_bug("group sync broken sliding up");

849

853

850

/*

854

/*

851

* This is this highest that this group can be shifted.

855

* This is this highest that this group can be shifted.

852

* Record its end index:

856

* Record its end index:

853

*/

857

*/

854

earliest_end = g.end;

858

earliest_end = g.end;

855

859

856

if (go.end > go.start)

860

if (go.end > go.start)

857

end_matching_other = g.end;

861

end_matching_other = g.end;

858

862

859

/* Now shift the group forward as far as possible: */

863

/* Now shift the group forward as far as possible: */

860

while (1) {

864

while (1) {

861

if (group_slide_down(xdf, &g, flags))

865

if (group_slide_down(xdf, &g, flags))

862

break;

866

break;

863

if (group_next(xdfo, &go))

867

if (group_next(xdfo, &go))

864

xdl_bug("group sync broken sliding down");

868

xdl_bug("group sync broken sliding down");

865

869

866

if (go.end > go.start)

870

if (go.end > go.start)

867

end_matching_other = g.end;

871

end_matching_other = g.end;

868

}

872

}

869

} while (groupsize != g.end - g.start);

873

} while (groupsize != g.end - g.start);

870

874

871

/*

875

/*

872

* If the group can be shifted, then we can possibly use this

876

* If the group can be shifted, then we can possibly use this

873

* freedom to produce a more intuitive diff.

877

* freedom to produce a more intuitive diff.

874

*

878

*

875

* The group is currently shifted as far down as possible, so the

879

* The group is currently shifted as far down as possible, so the

876

* heuristics below only have to handle upwards shifts.

880

* heuristics below only have to handle upwards shifts.

877

*/

881

*/

878

882

879

if (g.end == earliest_end) {

883

if (g.end == earliest_end) {

880

/* no shifting was possible */

884

/* no shifting was possible */

881

} else if (end_matching_other != -1) {

885

} else if (end_matching_other != -1) {

882

/*

886

/*

883

* Move the possibly merged group of changes back to line

887

* Move the possibly merged group of changes back to line

884

* up with the last group of changes from the other file

888

* up with the last group of changes from the other file

885

* that it can align with.

889

* that it can align with.

886

*/

890

*/

887

while (go.end == go.start) {

891

while (go.end == go.start) {

888

if (group_slide_up(xdf, &g, flags))

892

if (group_slide_up(xdf, &g, flags))

889

xdl_bug("match disappeared");

893

xdl_bug("match disappeared");

890

if (group_previous(xdfo, &go))

894

if (group_previous(xdfo, &go))

891

xdl_bug("group sync broken sliding to match");

895

xdl_bug("group sync broken sliding to match");

892

}

896

}

893

} else if (flags & XDF_INDENT_HEURISTIC) {

897

} else if (flags & XDF_INDENT_HEURISTIC) {

894

/*

898

/*

895

* Indent heuristic: a group of pure add/delete lines

899

* Indent heuristic: a group of pure add/delete lines

896

* implies two splits, one between the end of the "before"

900

* implies two splits, one between the end of the "before"

897

* context and the start of the group, and another between

901

* context and the start of the group, and another between

898

* the end of the group and the beginning of the "after"

902

* the end of the group and the beginning of the "after"

899

* context. Some splits are aesthetically better and some

903

* context. Some splits are aesthetically better and some

900

* are worse. We compute a badness "score" for each split,

904

* are worse. We compute a badness "score" for each split,

901

* and add the scores for the two splits to define a

905

* and add the scores for the two splits to define a

902

* "score" for each position that the group can be shifted

906

* "score" for each position that the group can be shifted

903

* to. Then we pick the shift with the lowest score.

907

* to. Then we pick the shift with the lowest score.

904

*/

908

*/

905

long shift, best_shift = -1;

909

long shift, best_shift = -1;

906

struct split_score best_score;

910

struct split_score best_score;

907

911

908

/*

912

/*

909

* This is O(N * MAX_BLANKS) (N = shift-able lines).

913

* This is O(N * MAX_BLANKS) (N = shift-able lines).

910

* Even with MAX_BLANKS bounded to a small value, a

914

* Even with MAX_BLANKS bounded to a small value, a

911

* large N could still make this loop take several

915

* large N could still make this loop take several

912

* times longer than the main diff algorithm. The

916

* times longer than the main diff algorithm. The

913

* "boring" value is to help cut down N to something

917

* "boring" value is to help cut down N to something

914

* like (MAX_BORING + groupsize).

918

* like (MAX_BORING + groupsize).

915

*

919

*

916

* Scan from bottom to top. So we can exit the loop

920

* Scan from bottom to top. So we can exit the loop

917

* without compromising the assumption "for a same best

921

* without compromising the assumption "for a same best

918

* score, pick the bottommost shift".

922

* score, pick the bottommost shift".

919

*/

923

*/

920

int boring = 0;

924

int boring = 0;

921

for (shift = g.end; shift >= earliest_end; shift--) {

925

for (shift = g.end; shift >= earliest_end; shift--) {

922

struct split_measurement m;

926

struct split_measurement m;

923

struct split_score score = {0, 0};

927

struct split_score score = {0, 0};

924

int cmp;

928

int cmp;

925

929

926

measure_split(xdf, shift, &m);

930

measure_split(xdf, shift, &m);

927

score_add_split(&m, &score);

931

score_add_split(&m, &score);

928

measure_split(xdf, shift - groupsize, &m);

932

measure_split(xdf, shift - groupsize, &m);

929

score_add_split(&m, &score);

933

score_add_split(&m, &score);

930

934

931

if (best_shift == -1) {

935

if (best_shift == -1) {

932

cmp = -1;

936

cmp = -1;

933

} else {

937

} else {

934

cmp = score_cmp(&score, &best_score);

938

cmp = score_cmp(&score, &best_score);

935

}

939

}

936

if (cmp < 0) {

940

if (cmp < 0) {

937

boring = 0;

941

boring = 0;

938

best_score.effective_indent = score.effective_indent;

942

best_score.effective_indent = score.effective_indent;

939

best_score.penalty = score.penalty;

943

best_score.penalty = score.penalty;

940

best_shift = shift;

944

best_shift = shift;

941

} else {

945

} else {

942

boring += 1;

946

boring += 1;

943

if (boring >= MAX_BORING)

947

if (boring >= MAX_BORING)

944

break;

948

break;

945

}

949

}

946

}

950

}

947

951

948

while (g.end > best_shift) {

952

while (g.end > best_shift) {

949

if (group_slide_up(xdf, &g, flags))

953

if (group_slide_up(xdf, &g, flags))

950

xdl_bug("best shift unreached");

954

xdl_bug("best shift unreached");

951

if (group_previous(xdfo, &go))

955

if (group_previous(xdfo, &go))

952

xdl_bug("group sync broken sliding to blank line");

956

xdl_bug("group sync broken sliding to blank line");

953

}

957

}

954

}

958

}

955

959

956

/* Move past the just-processed group: */

961

/* Move past the just-processed group: */

958

if (group_next(xdf, &g))

962

if (group_next(xdf, &g))

959

break;

963

break;

960

if (group_next(xdfo, &go))

964

if (group_next(xdfo, &go))

961

xdl_bug("group sync broken moving to next group");

965

xdl_bug("group sync broken moving to next group");

962

}

966

}

963

967

964

if (!group_next(xdfo, &go))

968

if (!group_next(xdfo, &go))

965

xdl_bug("group sync broken at end of file");

969

xdl_bug("group sync broken at end of file");

966

970

967

return 0;

971

return 0;

968

}

972

}

969

973

970

974

971

int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {

975

int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {

972

xdchange_t *cscr = NULL, *xch;

976

xdchange_t *cscr = NULL, *xch;

973

char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;

977

char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;

974

long i1, i2, l1, l2;

978

long i1, i2, l1, l2;

975

979

976

/*

980

/*

977

* Trivial. Collects "groups" of changes and creates an edit script.

981

* Trivial. Collects "groups" of changes and creates an edit script.

978

*/

982

*/

979

for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)

983

for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)

980

if (rchg1[i1 - 1] || rchg2[i2 - 1]) {

984

if (rchg1[i1 - 1] || rchg2[i2 - 1]) {

981

for (l1 = i1; rchg1[i1 - 1]; i1--);

985

for (l1 = i1; rchg1[i1 - 1]; i1--);

982

for (l2 = i2; rchg2[i2 - 1]; i2--);

986

for (l2 = i2; rchg2[i2 - 1]; i2--);

983

987

984

if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {

988

if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {

985

xdl_free_script(cscr);

989

xdl_free_script(cscr);

986

return -1;

990

return -1;

987

}

991

}

988

cscr = xch;

992

cscr = xch;

989

}

993

}

990

994

991

*xscr = cscr;

995

*xscr = cscr;

992

996

993

return 0;

997

return 0;

994

}

998

}

995

999

996

1000

997

void xdl_free_script(xdchange_t *xscr) {

1001

void xdl_free_script(xdchange_t *xscr) {

998

xdchange_t *xch;

1002

xdchange_t *xch;

999

1003

1000

while ((xch = xscr) != NULL) {

1004

while ((xch = xscr) != NULL) {

1001

xscr = xscr->next;

1005

xscr = xscr->next;

1002

xdl_free(xch);

1006

xdl_free(xch);

1003

}

1007

}

1004

}

1008

}

1005

1009

1006

static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,

1010

static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,

1007

xdemitconf_t const *xecfg)

1011

xdemitconf_t const *xecfg)

1008

{

1012

{

1009

xdchange_t *xch, *xche;

1013

xdchange_t *xch, *xche;

1010

if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {

1014

if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {

1011

long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;

1015

long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;

1012

for (xch = xscr; xch; xch = xche->next) {

1016

for (xch = xscr; xch; xch = xche->next) {

1013

xche = xdl_get_hunk(&xch, xecfg);

1017

xche = xdl_get_hunk(&xch, xecfg);

1014

if (!xch)

1018

if (!xch)

1015

break;

1019

break;

1016

if (xch->i1 > i1 || xch->i2 > i2) {

1020

if (xch->i1 > i1 || xch->i2 > i2) {

1017

if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)

1021

if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)

1018

return -1;

1022

return -1;

1019

}

1023

}

1020

i1 = xche->i1 + xche->chg1;

1024

i1 = xche->i1 + xche->chg1;

1021

i2 = xche->i2 + xche->chg2;

1025

i2 = xche->i2 + xche->chg2;

1022

}

1026

}

1023

if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)

1027

if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)

1024

return -1;

1028

return -1;

1025

} else {

1029

} else {

1026

for (xch = xscr; xch; xch = xche->next) {

1030

for (xch = xscr; xch; xch = xche->next) {

1027

xche = xdl_get_hunk(&xch, xecfg);

1031

xche = xdl_get_hunk(&xch, xecfg);

1028

if (!xch)

1032

if (!xch)

1029

break;

1033

break;

1030

if (xecfg->hunk_func(

1034

if (xecfg->hunk_func(

1031

xch->i1, xche->i1 + xche->chg1 - xch->i1,

1035

xch->i1, xche->i1 + xche->chg1 - xch->i1,

1032

xch->i2, xche->i2 + xche->chg2 - xch->i2,

1036

xch->i2, xche->i2 + xche->chg2 - xch->i2,

1033

ecb->priv) < 0)

1037

ecb->priv) < 0)

1034

return -1;

1038

return -1;

1035

}

1039

}

1036

}

1040

}

1037

return 0;

1041

return 0;

1038

}

1042

}

1039

1043

1040

static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)

1044

static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)

1041

{

1045

{

1042

xdchange_t *xch;

1046

xdchange_t *xch;

1043

1047

1044

for (xch = xscr; xch; xch = xch->next) {

1048

for (xch = xscr; xch; xch = xch->next) {

1045

int ignore = 1;

1049

int ignore = 1;

1046

xrecord_t **rec;

1050

xrecord_t **rec;

1047

long i;

1051

long i;

1048

1052

1049

rec = &xe->xdf1.recs[xch->i1];

1053

rec = &xe->xdf1.recs[xch->i1];

1050

for (i = 0; i < xch->chg1 && ignore; i++)

1054

for (i = 0; i < xch->chg1 && ignore; i++)

1051

ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);

1055

ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);

1052

1056

1053

rec = &xe->xdf2.recs[xch->i2];

1057

rec = &xe->xdf2.recs[xch->i2];

1054

for (i = 0; i < xch->chg2 && ignore; i++)

1058

for (i = 0; i < xch->chg2 && ignore; i++)

1055

ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);

1059

ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);

1056

1060

1057

xch->ignore = ignore;

1061

xch->ignore = ignore;

1058

}

1062

}

1059

}

1063

}

1060

1064

1061

int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,

1065

int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,

1062

xdemitconf_t const *xecfg, xdemitcb_t *ecb) {

1066

xdemitconf_t const *xecfg, xdemitcb_t *ecb) {

1063

xdchange_t *xscr;

1067

xdchange_t *xscr;

1064

xdfenv_t xe;

1068

xdfenv_t xe;

1065

emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;

1069

emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;

1066

1070

1067

if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {

1071

if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {

1068

1072

1069

return -1;

1073

return -1;

1070

}

1074

}

1071

if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||

1075

if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||

1072

xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||

1076

xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||

1073

xdl_build_script(&xe, &xscr) < 0) {

1077

xdl_build_script(&xe, &xscr) < 0) {

1074

1078

1075

xdl_free_env(&xe);

1079

xdl_free_env(&xe);

1076

return -1;

1080

return -1;

1077

}

1081

}

1078

1082

1079

if (xpp->flags & XDF_IGNORE_BLANK_LINES)

1083

if (xpp->flags & XDF_IGNORE_BLANK_LINES)

1080

xdl_mark_ignorable(xscr, &xe, xpp->flags);

1084

xdl_mark_ignorable(xscr, &xe, xpp->flags);

1081

if (ef(&xe, xscr, ecb, xecfg) < 0) {

1085

if (ef(&xe, xscr, ecb, xecfg) < 0) {

1082

xdl_free_script(xscr);

1086

xdl_free_script(xscr);

1083

xdl_free_env(&xe);

1087

xdl_free_env(&xe);

1084

return -1;

1088

return -1;

1085

}

1089

}

1086

xdl_free_script(xscr);

1090

xdl_free_script(xscr);

1087

xdl_free_env(&xe);

1091

xdl_free_env(&xe);

1088

1092

1089

return 0;

1093

return 0;

1090

}

1094

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             /*
              *  LibXDiff by Davide Libenzi ( File Differential Library )
              *  Copyright (C) 2003	Davide Libenzi
              *
              *  This library is free software; you can redistribute it and/or
              *  modify it under the terms of the GNU Lesser General Public
              *  License as published by the Free Software Foundation; either
              *  version 2.1 of the License, or (at your option) any later version.
              *
              *  This library is distributed in the hope that it will be useful,
              *  but WITHOUT ANY WARRANTY; without even the implied warranty of
              *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
              *  Lesser General Public License for more details.
              *
              *  You should have received a copy of the GNU Lesser General Public
              *  License along with this library; if not, see
              *  <http://www.gnu.org/licenses/>.
              *
              *  Davide Libenzi <davidel@xmailserver.org>
              *
              */
             #include "xinclude.h"
             #define XDL_MAX_COST_MIN 256
             #define XDL_HEUR_MIN_COST 256
             #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
             #define XDL_SNAKE_CNT 20
             #define XDL_K_HEUR 4
+            /* VC 2008 doesn't know about the inline keyword. */
+            #if defined(_MSC_VER)
+            #define inline __forceinline
+            #endif
             typedef struct s_xdpsplit {
             	long i1, i2;
             	int min_lo, min_hi;
             } xdpsplit_t;
             static long xdl_split(unsigned long const *ha1, long off1, long lim1,
             		      unsigned long const *ha2, long off2, long lim2,
             		      long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
             		      xdalgoenv_t *xenv);
             static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
             /*
              * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
              * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
              * the forward diagonal starting from (off1, off2) and the backward diagonal
              * starting from (lim1, lim2). If the K values on the same diagonal crosses
              * returns the furthest point of reach. We might end up having to expensive
              * cases using this algorithm is full, so a little bit of heuristic is needed
              * to cut the search and to return a suboptimal point.
              */
             static long xdl_split(unsigned long const *ha1, long off1, long lim1,
             		      unsigned long const *ha2, long off2, long lim2,
             		      long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
             		      xdalgoenv_t *xenv) {
             	long dmin = off1 - lim2, dmax = lim1 - off2;
             	long fmid = off1 - off2, bmid = lim1 - lim2;
             	long odd = (fmid - bmid) & 1;
             	long fmin = fmid, fmax = fmid;
             	long bmin = bmid, bmax = bmid;
             	long ec, d, i1, i2, prev1, best, dd, v, k;
             	/*
             	 * Set initial diagonal values for both forward and backward path.
             	 */
             	kvdf[fmid] = off1;
             	kvdb[bmid] = lim1;
             	for (ec = 1;; ec++) {
             		int got_snake = 0;
             		/*
             		 * We need to extent the diagonal "domain" by one. If the next
             		 * values exits the box boundaries we need to change it in the
             		 * opposite direction because (max - min) must be a power of two.
             		 * Also we initialize the external K value to -1 so that we can
             		 * avoid extra conditions check inside the core loop.
             		 */
             		if (fmin > dmin)
             			kvdf[--fmin - 1] = -1;
             		else
             			++fmin;
             		if (fmax < dmax)
             			kvdf[++fmax + 1] = -1;
             		else
             			--fmax;
             		for (d = fmax; d >= fmin; d -= 2) {
             			if (kvdf[d - 1] >= kvdf[d + 1])
             				i1 = kvdf[d - 1] + 1;
             			else
             				i1 = kvdf[d + 1];
             			prev1 = i1;
             			i2 = i1 - d;
             			for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
             			if (i1 - prev1 > xenv->snake_cnt)
             				got_snake = 1;
             			kvdf[d] = i1;
             			if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
             				spl->i1 = i1;
             				spl->i2 = i2;
             				spl->min_lo = spl->min_hi = 1;
             				return ec;
             			}
             		}
             		/*
             		 * We need to extent the diagonal "domain" by one. If the next
             		 * values exits the box boundaries we need to change it in the
             		 * opposite direction because (max - min) must be a power of two.
             		 * Also we initialize the external K value to -1 so that we can
             		 * avoid extra conditions check inside the core loop.
             		 */
             		if (bmin > dmin)
             			kvdb[--bmin - 1] = XDL_LINE_MAX;
             		else
             			++bmin;
             		if (bmax < dmax)
             			kvdb[++bmax + 1] = XDL_LINE_MAX;
             		else
             			--bmax;
             		for (d = bmax; d >= bmin; d -= 2) {
             			if (kvdb[d - 1] < kvdb[d + 1])
             				i1 = kvdb[d - 1];
             			else
             				i1 = kvdb[d + 1] - 1;
             			prev1 = i1;
             			i2 = i1 - d;
             			for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
             			if (prev1 - i1 > xenv->snake_cnt)
             				got_snake = 1;
             			kvdb[d] = i1;
             			if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
             				spl->i1 = i1;
             				spl->i2 = i2;
             				spl->min_lo = spl->min_hi = 1;
             				return ec;
             			}
             		}
             		if (need_min)
             			continue;
             		/*
             		 * If the edit cost is above the heuristic trigger and if
             		 * we got a good snake, we sample current diagonals to see
             		 * if some of the, have reached an "interesting" path. Our
             		 * measure is a function of the distance from the diagonal
             		 * corner (i1 + i2) penalized with the distance from the
             		 * mid diagonal itself. If this value is above the current
             		 * edit cost times a magic factor (XDL_K_HEUR) we consider
             		 * it interesting.
             		 */
             		if (got_snake && ec > xenv->heur_min) {
             			for (best = 0, d = fmax; d >= fmin; d -= 2) {
             				dd = d > fmid ? d - fmid: fmid - d;
             				i1 = kvdf[d];
             				i2 = i1 - d;
             				v = (i1 - off1) + (i2 - off2) - dd;
             				if (v > XDL_K_HEUR * ec && v > best &&
             				    off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
             				    off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
             					for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
             						if (k == xenv->snake_cnt) {
             							best = v;
             							spl->i1 = i1;
             							spl->i2 = i2;
             							break;
             						}
             				}
             			}
             			if (best > 0) {
             				spl->min_lo = 1;
             				spl->min_hi = 0;
             				return ec;
             			}
             			for (best = 0, d = bmax; d >= bmin; d -= 2) {
             				dd = d > bmid ? d - bmid: bmid - d;
             				i1 = kvdb[d];
             				i2 = i1 - d;
             				v = (lim1 - i1) + (lim2 - i2) - dd;
             				if (v > XDL_K_HEUR * ec && v > best &&
             				    off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
             				    off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
             					for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
             						if (k == xenv->snake_cnt - 1) {
             							best = v;
             							spl->i1 = i1;
             							spl->i2 = i2;
             							break;
             						}
             				}
             			}
             			if (best > 0) {
             				spl->min_lo = 0;
             				spl->min_hi = 1;
             				return ec;
             			}
             		}
             		/*
             		 * Enough is enough. We spent too much time here and now we collect
             		 * the furthest reaching path using the (i1 + i2) measure.
             		 */
             		if (ec >= xenv->mxcost) {
             			long fbest, fbest1, bbest, bbest1;
             			fbest = fbest1 = -1;
             			for (d = fmax; d >= fmin; d -= 2) {
             				i1 = XDL_MIN(kvdf[d], lim1);
             				i2 = i1 - d;
             				if (lim2 < i2)
             					i1 = lim2 + d, i2 = lim2;
             				if (fbest < i1 + i2) {
             					fbest = i1 + i2;
             					fbest1 = i1;
             				}
             			}
             			bbest = bbest1 = XDL_LINE_MAX;
             			for (d = bmax; d >= bmin; d -= 2) {
             				i1 = XDL_MAX(off1, kvdb[d]);
             				i2 = i1 - d;
             				if (i2 < off2)
             					i1 = off2 + d, i2 = off2;
             				if (i1 + i2 < bbest) {
             					bbest = i1 + i2;
             					bbest1 = i1;
             				}
             			}
             			if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
             				spl->i1 = fbest1;
             				spl->i2 = fbest - fbest1;
             				spl->min_lo = 1;
             				spl->min_hi = 0;
             			} else {
             				spl->i1 = bbest1;
             				spl->i2 = bbest - bbest1;
             				spl->min_lo = 0;
             				spl->min_hi = 1;
             			}
             			return ec;
             		}
             	}
             }
             /*
              * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
              * the box splitting function. Note that the real job (marking changed lines)
              * is done in the two boundary reaching checks.
              */
             int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
             		 diffdata_t *dd2, long off2, long lim2,
             		 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
             	unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
             	/*
             	 * Shrink the box by walking through each diagonal snake (SW and NE).
             	 */
             	for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
             	for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
             	/*
             	 * If one dimension is empty, then all records on the other one must
             	 * be obviously changed.
             	 */
             	if (off1 == lim1) {
             		char *rchg2 = dd2->rchg;
             		long *rindex2 = dd2->rindex;
             		for (; off2 < lim2; off2++)
             			rchg2[rindex2[off2]] = 1;
             	} else if (off2 == lim2) {
             		char *rchg1 = dd1->rchg;
             		long *rindex1 = dd1->rindex;
             		for (; off1 < lim1; off1++)
             			rchg1[rindex1[off1]] = 1;
             	} else {
             		xdpsplit_t spl;
             		spl.i1 = spl.i2 = 0;
             		/*
             		 * Divide ...
             		 */
             		if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
             			      need_min, &spl, xenv) < 0) {
             			return -1;
             		}
             		/*
             		 * ... et Impera.
             		 */
             		if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
             				 kvdf, kvdb, spl.min_lo, xenv) < 0 ||
             		    xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
             				 kvdf, kvdb, spl.min_hi, xenv) < 0) {
             			return -1;
             		}
             	}
             	return 0;
             }
             int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
             		xdfenv_t *xe) {
             	long ndiags;
             	long *kvd, *kvdf, *kvdb;
             	xdalgoenv_t xenv;
             	diffdata_t dd1, dd2;
             	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
             		return -1;
             	}
             	/*
             	 * Allocate and setup K vectors to be used by the differential algorithm.
             	 * One is to store the forward path and one to store the backward path.
             	 */
             	ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
             	if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {
             		xdl_free_env(xe);
             		return -1;
             	}
             	kvdf = kvd;
             	kvdb = kvdf + ndiags;
             	kvdf += xe->xdf2.nreff + 1;
             	kvdb += xe->xdf2.nreff + 1;
             	xenv.mxcost = xdl_bogosqrt(ndiags);
             	if (xenv.mxcost < XDL_MAX_COST_MIN)
             		xenv.mxcost = XDL_MAX_COST_MIN;
             	xenv.snake_cnt = XDL_SNAKE_CNT;
             	xenv.heur_min = XDL_HEUR_MIN_COST;
             	dd1.nrec = xe->xdf1.nreff;
             	dd1.ha = xe->xdf1.ha;
             	dd1.rchg = xe->xdf1.rchg;
             	dd1.rindex = xe->xdf1.rindex;
             	dd2.nrec = xe->xdf2.nreff;
             	dd2.ha = xe->xdf2.ha;
             	dd2.rchg = xe->xdf2.rchg;
             	dd2.rindex = xe->xdf2.rindex;
             	if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
             			 kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
             		xdl_free(kvd);
             		xdl_free_env(xe);
             		return -1;
             	}
             	xdl_free(kvd);
             	return 0;
             }
             static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
             	xdchange_t *xch;
             	if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
             		return NULL;
             	xch->next = xscr;
             	xch->i1 = i1;
             	xch->i2 = i2;
             	xch->chg1 = chg1;
             	xch->chg2 = chg2;
             	xch->ignore = 0;
             	return xch;
             }
             static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
             {
             	return (rec1->ha == rec2->ha &&
             		xdl_recmatch(rec1->ptr, rec1->size,
             			     rec2->ptr, rec2->size,
             			     flags));
             }
             /*
              * If a line is indented more than this, get_indent() just returns this value.
              * This avoids having to do absurd amounts of work for data that are not
              * human-readable text, and also ensures that the output of get_indent fits within
              * an int.
              */
             #define MAX_INDENT 200
             /*
              * Return the amount of indentation of the specified line, treating TAB as 8
              * columns. Return -1 if line is empty or contains only whitespace. Clamp the
              * output value at MAX_INDENT.
              */
             static int get_indent(xrecord_t *rec)
             {
             	long i;
             	int ret = 0;
             	for (i = 0; i < rec->size; i++) {
             		char c = rec->ptr[i];
             		if (!XDL_ISSPACE(c))
             			return ret;
             		else if (c == ' ')
             			ret += 1;
             		else if (c == '\t')
             			ret += 8 - ret % 8;
             		/* ignore other whitespace characters */
             		if (ret >= MAX_INDENT)
             			return MAX_INDENT;
             	}
             	/* The line contains only whitespace. */
             	return -1;
             }
             /*
              * If more than this number of consecutive blank rows are found, just return this
              * value. This avoids requiring O(N^2) work for pathological cases, and also
              * ensures that the output of score_split fits in an int.
              */
             #define MAX_BLANKS 20
             /* Characteristics measured about a hypothetical split position. */
             struct split_measurement {
             	/*
             	 * Is the split at the end of the file (aside from any blank lines)?
             	 */
             	int end_of_file;
             	/*
             	 * How much is the line immediately following the split indented (or -1 if
             	 * the line is blank):
             	 */
             	int indent;
             	/*
             	 * How many consecutive lines above the split are blank?
             	 */
             	int pre_blank;
             	/*
             	 * How much is the nearest non-blank line above the split indented (or -1
             	 * if there is no such line)?
             	 */
             	int pre_indent;
             	/*
             	 * How many lines after the line following the split are blank?
             	 */
             	int post_blank;
             	/*
             	 * How much is the nearest non-blank line after the line following the
             	 * split indented (or -1 if there is no such line)?
             	 */
             	int post_indent;
             };
             struct split_score {
             	/* The effective indent of this split (smaller is preferred). */
             	int effective_indent;
             	/* Penalty for this split (smaller is preferred). */
             	int penalty;
             };
             /*
              * Fill m with information about a hypothetical split of xdf above line split.
              */
             static void measure_split(const xdfile_t *xdf, long split,
             			  struct split_measurement *m)
             {
             	long i;
             	if (split >= xdf->nrec) {
             		m->end_of_file = 1;
             		m->indent = -1;
             	} else {
             		m->end_of_file = 0;
             		m->indent = get_indent(xdf->recs[split]);
             	}
             	m->pre_blank = 0;
             	m->pre_indent = -1;
             	for (i = split - 1; i >= 0; i--) {
             		m->pre_indent = get_indent(xdf->recs[i]);
             		if (m->pre_indent != -1)
             			break;
             		m->pre_blank += 1;
             		if (m->pre_blank == MAX_BLANKS) {
             			m->pre_indent = 0;
             			break;
             		}
             	}
             	m->post_blank = 0;
             	m->post_indent = -1;
             	for (i = split + 1; i < xdf->nrec; i++) {
             		m->post_indent = get_indent(xdf->recs[i]);
             		if (m->post_indent != -1)
             			break;
             		m->post_blank += 1;
             		if (m->post_blank == MAX_BLANKS) {
             			m->post_indent = 0;
             			break;
             		}
             	}
             }
             /*
              * The empirically-determined weight factors used by score_split() below.
              * Larger values means that the position is a less favorable place to split.
              *
              * Note that scores are only ever compared against each other, so multiplying
              * all of these weight/penalty values by the same factor wouldn't change the
              * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*.
              * In practice, these numbers are chosen to be large enough that they can be
              * adjusted relative to each other with sufficient precision despite using
              * integer math.
              */
             /* Penalty if there are no non-blank lines before the split */
             #define START_OF_FILE_PENALTY 1
             /* Penalty if there are no non-blank lines after the split */
             #define END_OF_FILE_PENALTY 21
             /* Multiplier for the number of blank lines around the split */
             #define TOTAL_BLANK_WEIGHT (-30)
             /* Multiplier for the number of blank lines after the split */
             #define POST_BLANK_WEIGHT 6
             /*
              * Penalties applied if the line is indented more than its predecessor
              */
             #define RELATIVE_INDENT_PENALTY (-4)
             #define RELATIVE_INDENT_WITH_BLANK_PENALTY 10
             /*
              * Penalties applied if the line is indented less than both its predecessor and
              * its successor
              */
             #define RELATIVE_OUTDENT_PENALTY 24
             #define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17
             /*
              * Penalties applied if the line is indented less than its predecessor but not
              * less than its successor
              */
             #define RELATIVE_DEDENT_PENALTY 23
             #define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17
             /*
              * We only consider whether the sum of the effective indents for splits are
              * less than (-1), equal to (0), or greater than (+1) each other. The resulting
              * value is multiplied by the following weight and combined with the penalty to
              * determine the better of two scores.
              */
             #define INDENT_WEIGHT 60
             /*
              * Compute a badness score for the hypothetical split whose measurements are
              * stored in m. The weight factors were determined empirically using the tools and
              * corpus described in
              *
              *     https://github.com/mhagger/diff-slider-tools
              *
              * Also see that project if you want to improve the weights based on, for example,
              * a larger or more diverse corpus.
              */
             static void score_add_split(const struct split_measurement *m, struct split_score *s)
             {
             	/*
             	 * A place to accumulate penalty factors (positive makes this index more
             	 * favored):
             	 */
             	int post_blank, total_blank, indent, any_blanks;
             	if (m->pre_indent == -1 && m->pre_blank == 0)
             		s->penalty += START_OF_FILE_PENALTY;
             	if (m->end_of_file)
             		s->penalty += END_OF_FILE_PENALTY;
             	/*
             	 * Set post_blank to the number of blank lines following the split,
             	 * including the line immediately after the split:
             	 */
             	post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;
             	total_blank = m->pre_blank + post_blank;
             	/* Penalties based on nearby blank lines: */
             	s->penalty += TOTAL_BLANK_WEIGHT * total_blank;
             	s->penalty += POST_BLANK_WEIGHT * post_blank;
             	if (m->indent != -1)
             		indent = m->indent;
             	else
             		indent = m->post_indent;
             	any_blanks = (total_blank != 0);
             	/* Note that the effective indent is -1 at the end of the file: */
             	s->effective_indent += indent;
             	if (indent == -1) {
             		/* No additional adjustments needed. */
             	} else if (m->pre_indent == -1) {
             		/* No additional adjustments needed. */
             	} else if (indent > m->pre_indent) {
             		/*
             		 * The line is indented more than its predecessor.
             		 */
             		s->penalty += any_blanks ?
             			RELATIVE_INDENT_WITH_BLANK_PENALTY :
             			RELATIVE_INDENT_PENALTY;
             	} else if (indent == m->pre_indent) {
             		/*
             		 * The line has the same indentation level as its predecessor.
             		 * No additional adjustments needed.
             		 */
             	} else {
             		/*
             		 * The line is indented less than its predecessor. It could be
             		 * the block terminator of the previous block, but it could
             		 * also be the start of a new block (e.g., an "else" block, or
             		 * maybe the previous block didn't have a block terminator).
             		 * Try to distinguish those cases based on what comes next:
             		 */
             		if (m->post_indent != -1 && m->post_indent > indent) {
             			/*
             			 * The following line is indented more. So it is likely
             			 * that this line is the start of a block.
             			 */
             			s->penalty += any_blanks ?
             				RELATIVE_OUTDENT_WITH_BLANK_PENALTY :
             				RELATIVE_OUTDENT_PENALTY;
             		} else {
             			/*
             			 * That was probably the end of a block.
             			 */
             			s->penalty += any_blanks ?
             				RELATIVE_DEDENT_WITH_BLANK_PENALTY :
             				RELATIVE_DEDENT_PENALTY;
             		}
             	}
             }
             static int score_cmp(struct split_score *s1, struct split_score *s2)
             {
             	/* -1 if s1.effective_indent < s2->effective_indent, etc. */
             	int cmp_indents = ((s1->effective_indent > s2->effective_indent) -
             			   (s1->effective_indent < s2->effective_indent));
             	return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);
             }
             /*
              * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group
              * of lines that was inserted or deleted from the corresponding version of the
              * file). We consider there to be such a group at the beginning of the file, at
              * the end of the file, and between any two unchanged lines, though most such
              * groups will usually be empty.
              *
              * If the first line in a group is equal to the line following the group, then
              * the group can be slid down. Similarly, if the last line in a group is equal
              * to the line preceding the group, then the group can be slid up. See
              * group_slide_down() and group_slide_up().
              *
              * Note that loops that are testing for changed lines in xdf->rchg do not need
              * index bounding since the array is prepared with a zero at position -1 and N.
              */
             struct xdlgroup {
             	/*
             	 * The index of the first changed line in the group, or the index of
             	 * the unchanged line above which the (empty) group is located.
             	 */
             	long start;
             	/*
             	 * The index of the first unchanged line after the group. For an empty
             	 * group, end is equal to start.
             	 */
             	long end;
             };
             /*
              * Initialize g to point at the first group in xdf.
              */
             static void group_init(xdfile_t *xdf, struct xdlgroup *g)
             {
             	g->start = g->end = 0;
             	while (xdf->rchg[g->end])
             		g->end++;
             }
             /*
              * Move g to describe the next (possibly empty) group in xdf and return 0. If g
              * is already at the end of the file, do nothing and return -1.
              */
             static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)
             {
             	if (g->end == xdf->nrec)
             		return -1;
             	g->start = g->end + 1;
             	for (g->end = g->start; xdf->rchg[g->end]; g->end++)
             		;
             	return 0;
             }
             /*
              * Move g to describe the previous (possibly empty) group in xdf and return 0.
              * If g is already at the beginning of the file, do nothing and return -1.
              */
             static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
             {
             	if (g->start == 0)
             		return -1;
             	g->end = g->start - 1;
             	for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
             		;
             	return 0;
             }
             /*
              * If g can be slid toward the end of the file, do so, and if it bumps into a
              * following group, expand this group to include it. Return 0 on success or -1
              * if g cannot be slid down.
              */
             static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags)
             {
             	if (g->end < xdf->nrec &&
             	    recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
             		xdf->rchg[g->start++] = 0;
             		xdf->rchg[g->end++] = 1;
             		while (xdf->rchg[g->end])
             			g->end++;
             		return 0;
             	} else {
             		return -1;
             	}
             }
             /*
              * If g can be slid toward the beginning of the file, do so, and if it bumps
              * into a previous group, expand this group to include it. Return 0 on success
              * or -1 if g cannot be slid up.
              */
             static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags)
             {
             	if (g->start > 0 &&
             	    recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
             		xdf->rchg[--g->start] = 1;
             		xdf->rchg[--g->end] = 0;
             		while (xdf->rchg[g->start - 1])
             			g->start--;
             		return 0;
             	} else {
             		return -1;
             	}
             }
             static void xdl_bug(const char *msg)
             {
             	fprintf(stderr, "BUG: %s\n", msg);
             	exit(1);
             }
             /*
              * For indentation heuristic, skip searching for better slide position after
              * checking MAX_BORING lines without finding an improvement. This defends the
              * indentation heuristic logic against pathological cases. The value is not
              * picked scientifically but should be good enough.
              */
             #define MAX_BORING 100
             /*
              * Move back and forward change groups for a consistent and pretty diff output.
              * This also helps in finding joinable change groups and reducing the diff
              * size.
              */
             int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
             	struct xdlgroup g, go;
             	long earliest_end, end_matching_other;
             	long groupsize;
             	group_init(xdf, &g);
             	group_init(xdfo, &go);
             	while (1) {
             		/* If the group is empty in the to-be-compacted file, skip it: */
             		if (g.end == g.start)
             			goto next;
             		/*
             		 * Now shift the change up and then down as far as possible in
             		 * each direction. If it bumps into any other changes, merge them.
             		 */
             		do {
             			groupsize = g.end - g.start;
             			/*
             			 * Keep track of the last "end" index that causes this
             			 * group to align with a group of changed lines in the
             			 * other file. -1 indicates that we haven't found such
             			 * a match yet:
             			 */
             			end_matching_other = -1;
             			/* Shift the group backward as much as possible: */
             			while (!group_slide_up(xdf, &g, flags))
             				if (group_previous(xdfo, &go))
             					xdl_bug("group sync broken sliding up");
             			/*
             			 * This is this highest that this group can be shifted.
             			 * Record its end index:
             			 */
             			earliest_end = g.end;
             			if (go.end > go.start)
             				end_matching_other = g.end;
             			/* Now shift the group forward as far as possible: */
             			while (1) {
             				if (group_slide_down(xdf, &g, flags))
             					break;
             				if (group_next(xdfo, &go))
             					xdl_bug("group sync broken sliding down");
             				if (go.end > go.start)
             					end_matching_other = g.end;
             			}
             		} while (groupsize != g.end - g.start);
             		/*
             		 * If the group can be shifted, then we can possibly use this
             		 * freedom to produce a more intuitive diff.
             		 *
             		 * The group is currently shifted as far down as possible, so the
             		 * heuristics below only have to handle upwards shifts.
             		 */
             		if (g.end == earliest_end) {
             			/* no shifting was possible */
             		} else if (end_matching_other != -1) {
             			/*
             			 * Move the possibly merged group of changes back to line
             			 * up with the last group of changes from the other file
             			 * that it can align with.
             			 */
             			while (go.end == go.start) {
             				if (group_slide_up(xdf, &g, flags))
             					xdl_bug("match disappeared");
             				if (group_previous(xdfo, &go))
             					xdl_bug("group sync broken sliding to match");
             			}
             		} else if (flags & XDF_INDENT_HEURISTIC) {
             			/*
             			 * Indent heuristic: a group of pure add/delete lines
             			 * implies two splits, one between the end of the "before"
             			 * context and the start of the group, and another between
             			 * the end of the group and the beginning of the "after"
             			 * context. Some splits are aesthetically better and some
             			 * are worse. We compute a badness "score" for each split,
             			 * and add the scores for the two splits to define a
             			 * "score" for each position that the group can be shifted
             			 * to. Then we pick the shift with the lowest score.
             			 */
             			long shift, best_shift = -1;
             			struct split_score best_score;
             			/*
             			 * This is O(N * MAX_BLANKS) (N = shift-able lines).
             			 * Even with MAX_BLANKS bounded to a small value, a
             			 * large N could still make this loop take several
             			 * times longer than the main diff algorithm. The
             			 * "boring" value is to help cut down N to something
             			 * like (MAX_BORING + groupsize).
             			 *
             			 * Scan from bottom to top. So we can exit the loop
             			 * without compromising the assumption "for a same best
             			 * score, pick the bottommost shift".
             			 */
             			int boring = 0;
             			for (shift = g.end; shift >= earliest_end; shift--) {
             				struct split_measurement m;
             				struct split_score score = {0, 0};
             				int cmp;
             				measure_split(xdf, shift, &m);
             				score_add_split(&m, &score);
             				measure_split(xdf, shift - groupsize, &m);
             				score_add_split(&m, &score);
             				if (best_shift == -1) {
             					cmp = -1;
             				} else {
             					cmp = score_cmp(&score, &best_score);
             				}
             				if (cmp < 0) {
             					boring = 0;
             					best_score.effective_indent = score.effective_indent;
             					best_score.penalty = score.penalty;
             					best_shift = shift;
             				} else {
             					boring += 1;
             					if (boring >= MAX_BORING)
             						break;
             				}
             			}
             			while (g.end > best_shift) {
             				if (group_slide_up(xdf, &g, flags))
             					xdl_bug("best shift unreached");
             				if (group_previous(xdfo, &go))
             					xdl_bug("group sync broken sliding to blank line");
             			}
             		}
             	next:
             		/* Move past the just-processed group: */
             		if (group_next(xdf, &g))
             			break;
             		if (group_next(xdfo, &go))
             			xdl_bug("group sync broken moving to next group");
             	}
             	if (!group_next(xdfo, &go))
             		xdl_bug("group sync broken at end of file");
             	return 0;
             }
             int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
             	xdchange_t *cscr = NULL, *xch;
             	char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
             	long i1, i2, l1, l2;
             	/*
             	 * Trivial. Collects "groups" of changes and creates an edit script.
             	 */
             	for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
             		if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
             			for (l1 = i1; rchg1[i1 - 1]; i1--);
             			for (l2 = i2; rchg2[i2 - 1]; i2--);
             			if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
             				xdl_free_script(cscr);
             				return -1;
             			}
             			cscr = xch;
             		}
             	*xscr = cscr;
             	return 0;
             }
             void xdl_free_script(xdchange_t *xscr) {
             	xdchange_t *xch;
             	while ((xch = xscr) != NULL) {
             		xscr = xscr->next;
             		xdl_free(xch);
             	}
             }
             static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
             			      xdemitconf_t const *xecfg)
             {
             	xdchange_t *xch, *xche;
             	if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
             		long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
             		for (xch = xscr; xch; xch = xche->next) {
             			xche = xdl_get_hunk(&xch, xecfg);
             			if (!xch)
             				break;
             			if (xch->i1 > i1 || xch->i2 > i2) {
             				if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)
             					return -1;
             			}
             			i1 = xche->i1 + xche->chg1;
             			i2 = xche->i2 + xche->chg2;
             		}
             		if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)
             			return -1;
             	} else {
             		for (xch = xscr; xch; xch = xche->next) {
             			xche = xdl_get_hunk(&xch, xecfg);
             			if (!xch)
             				break;
             			if (xecfg->hunk_func(
             					xch->i1, xche->i1 + xche->chg1 - xch->i1,
             					xch->i2, xche->i2 + xche->chg2 - xch->i2,
             					ecb->priv) < 0)
             				return -1;
             		}
             	}
             	return 0;
             }
             static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
             {
             	xdchange_t *xch;
             	for (xch = xscr; xch; xch = xch->next) {
             		int ignore = 1;
             		xrecord_t **rec;
             		long i;
             		rec = &xe->xdf1.recs[xch->i1];
             		for (i = 0; i < xch->chg1 && ignore; i++)
             			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
             		rec = &xe->xdf2.recs[xch->i2];
             		for (i = 0; i < xch->chg2 && ignore; i++)
             			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
             		xch->ignore = ignore;
             	}
             }
             int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
             	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
             	xdchange_t *xscr;
             	xdfenv_t xe;
             	emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
             	if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
             		return -1;
             	}
             	if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
             	    xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
             	    xdl_build_script(&xe, &xscr) < 0) {
             		xdl_free_env(&xe);
             		return -1;
             	}
             	if (xpp->flags & XDF_IGNORE_BLANK_LINES)
             		xdl_mark_ignorable(xscr, &xe, xpp->flags);
             	if (ef(&xe, xscr, ecb, xecfg) < 0) {
             		xdl_free_script(xscr);
             		xdl_free_env(&xe);
             		return -1;
             	}
             	xdl_free_script(xscr);
             	xdl_free_env(&xe);
             	return 0;
             }

             /*
              *  LibXDiff by Davide Libenzi ( File Differential Library )
              *  Copyright (C) 2003  Davide Libenzi
              *
              *  This library is free software; you can redistribute it and/or
              *  modify it under the terms of the GNU Lesser General Public
              *  License as published by the Free Software Foundation; either
              *  version 2.1 of the License, or (at your option) any later version.
              *
              *  This library is distributed in the hope that it will be useful,
              *  but WITHOUT ANY WARRANTY; without even the implied warranty of
              *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
              *  Lesser General Public License for more details.
              *
              *  You should have received a copy of the GNU Lesser General Public
              *  License along with this library; if not, see
              *  <http://www.gnu.org/licenses/>.
              *
              *  Davide Libenzi <davidel@xmailserver.org>
              *
              */
             #if !defined(XINCLUDE_H)
             #define XINCLUDE_H
             #include <ctype.h>
             #include <stdio.h>
             #include <stdlib.h>
-            #include <unistd.h>
             #include <string.h>
             #include <limits.h>
             #include "xmacros.h"
             #include "xdiff.h"
             #include "xtypes.h"
             #include "xutils.h"
             #include "xprepare.h"
             #include "xdiffi.h"
             #include "xemit.h"
             #endif /* #if !defined(XINCLUDE_H) */