upstream/mercurial-mirror Commit - r13729:4a9c0923

1

/*

1

/*

2

bdiff.c - efficient binary diff extension for Mercurial

2

bdiff.c - efficient binary diff extension for Mercurial

3

4

5

6

This software may be used and distributed according to the terms of

6

This software may be used and distributed according to the terms of

7

the GNU General Public License, incorporated herein by reference.

7

the GNU General Public License, incorporated herein by reference.

8

9

Based roughly on Python difflib

9

Based roughly on Python difflib

10

*/

10

*/

11

12

#include <Python.h>

12

#include <Python.h>

13

#include <stdlib.h>

13

#include <stdlib.h>

14

#include <string.h>

14

#include <string.h>

15

#include <limits.h>

15

#include <limits.h>

16

17

#if defined __hpux || defined __SUNPRO_C || defined _AIX

17

#if defined __hpux || defined __SUNPRO_C || defined _AIX

18

#define inline

18

#define inline

19

#endif

19

#endif

20

21

#ifdef __linux

21

#ifdef __linux

22

#define inline __inline

22

#define inline __inline

23

#endif

23

#endif

24

25

#ifdef _WIN32

25

#ifdef _WIN32

26

#ifdef _MSC_VER

26

#ifdef _MSC_VER

27

#define inline __inline

27

#define inline __inline

28

typedef unsigned long uint32_t;

28

typedef unsigned long uint32_t;

29

#else

29

#else

30

#include <stdint.h>

30

#include <stdint.h>

31

#endif

31

#endif

32

static uint32_t htonl(uint32_t x)

32

static uint32_t htonl(uint32_t x)

33

{

33

{

34

return ((x & 0x000000ffUL) << 24) |

34

return ((x & 0x000000ffUL) << 24) |

35

((x & 0x0000ff00UL) << 8) |

35

((x & 0x0000ff00UL) << 8) |

36

((x & 0x00ff0000UL) >> 8) |

36

((x & 0x00ff0000UL) >> 8) |

37

((x & 0xff000000UL) >> 24);

37

((x & 0xff000000UL) >> 24);

38

}

38

}

39

#else

39

#else

40

#include <sys/types.h>

40

#include <sys/types.h>

41

#if defined __BEOS__ && !defined __HAIKU__

41

#if defined __BEOS__ && !defined __HAIKU__

42

#include <ByteOrder.h>

42

#include <ByteOrder.h>

43

#else

43

#else

44

#include <arpa/inet.h>

44

#include <arpa/inet.h>

45

#endif

45

#endif

46

#include <inttypes.h>

46

#include <inttypes.h>

47

#endif

47

#endif

48

49

#include "util.h"

49

#include "util.h"

50

51

struct line {

51

struct line {

52

int h, len, n, e;

52

int h, len, n, e;

53

const char *l;

53

const char *l;

54

};

54

};

55

56

struct pos {

56

struct pos {

57

int pos, len;

57

int pos, len;

58

};

58

};

59

60

struct hunk;

60

struct hunk;

61

struct hunk {

61

struct hunk {

62

int a1, a2, b1, b2;

62

int a1, a2, b1, b2;

63

struct hunk *next;

63

struct hunk *next;

64

};

64

};

65

66

int splitlines(const char *a, int len, struct line **lr)

66

static int splitlines(const char *a, int len, struct line **lr)

67

{

67

{

68

int h, i;

68

int h, i;

69

const char *p, *b = a;

69

const char *p, *b = a;

70

const char * const plast = a + len - 1;

70

const char * const plast = a + len - 1;

71

struct line *l;

71

struct line *l;

72

73

/* count the lines */

73

/* count the lines */

74

i = 1; /* extra line for sentinel */

74

i = 1; /* extra line for sentinel */

75

for (p = a; p < a + len; p++)

75

for (p = a; p < a + len; p++)

76

if (*p == '\n' || p == plast)

76

if (*p == '\n' || p == plast)

77

i++;

77

i++;

78

79

*lr = l = (struct line *)malloc(sizeof(struct line) * i);

79

*lr = l = (struct line *)malloc(sizeof(struct line) * i);

80

if (!l)

80

if (!l)

81

return -1;

81

return -1;

82

83

/* build the line array and calculate hashes */

83

/* build the line array and calculate hashes */

84

h = 0;

84

h = 0;

85

for (p = a; p < a + len; p++) {

85

for (p = a; p < a + len; p++) {

86

/* Leonid Yuriev's hash */

86

/* Leonid Yuriev's hash */

87

h = (h * 1664525) + *p + 1013904223;

87

h = (h * 1664525) + *p + 1013904223;

88

89

if (*p == '\n' || p == plast) {

89

if (*p == '\n' || p == plast) {

90

l->h = h;

90

l->h = h;

91

h = 0;

91

h = 0;

92

l->len = p - b + 1;

92

l->len = p - b + 1;

93

l->l = b;

93

l->l = b;

94

l->n = INT_MAX;

94

l->n = INT_MAX;

95

l++;

95

l++;

96

b = p + 1;

96

b = p + 1;

97

}

97

}

98

}

98

}

99

100

/* set up a sentinel */

100

/* set up a sentinel */

101

l->h = l->len = 0;

101

l->h = l->len = 0;

102

l->l = a + len;

102

l->l = a + len;

103

return i - 1;

103

return i - 1;

104

}

104

}

105

106

~~int~~ inline cmp(struct line *a, struct line *b)

106

static inline int cmp(struct line *a, struct line *b)

107

{

107

{

108

return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);

108

return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);

109

}

109

}

110

111

static int equatelines(struct line *a, int an, struct line *b, int bn)

111

static int equatelines(struct line *a, int an, struct line *b, int bn)

112

{

112

{

113

int i, j, buckets = 1, t, scale;

113

int i, j, buckets = 1, t, scale;

114

struct pos *h = NULL;

114

struct pos *h = NULL;

115

116

/* build a hash table of the next highest power of 2 */

116

/* build a hash table of the next highest power of 2 */

117

while (buckets < bn + 1)

117

while (buckets < bn + 1)

118

buckets *= 2;

118

buckets *= 2;

119

120

/* try to allocate a large hash table to avoid collisions */

120

/* try to allocate a large hash table to avoid collisions */

121

for (scale = 4; scale; scale /= 2) {

121

for (scale = 4; scale; scale /= 2) {

122

h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));

122

h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));

123

if (h)

123

if (h)

124

break;

124

break;

125

}

125

}

126

127

if (!h)

127

if (!h)

128

return 0;

128

return 0;

129

130

buckets = buckets * scale - 1;

130

buckets = buckets * scale - 1;

131

132

/* clear the hash table */

132

/* clear the hash table */

133

for (i = 0; i <= buckets; i++) {

133

for (i = 0; i <= buckets; i++) {

134

h[i].pos = INT_MAX;

134

h[i].pos = INT_MAX;

135

h[i].len = 0;

135

h[i].len = 0;

136

}

136

}

137

138

/* add lines to the hash table chains */

138

/* add lines to the hash table chains */

139

for (i = bn - 1; i >= 0; i--) {

139

for (i = bn - 1; i >= 0; i--) {

140

/* find the equivalence class */

140

/* find the equivalence class */

141

for (j = b[i].h & buckets; h[j].pos != INT_MAX;

141

for (j = b[i].h & buckets; h[j].pos != INT_MAX;

142

j = (j + 1) & buckets)

142

j = (j + 1) & buckets)

143

if (!cmp(b + i, b + h[j].pos))

143

if (!cmp(b + i, b + h[j].pos))

144

break;

144

break;

145

146

/* add to the head of the equivalence class */

146

/* add to the head of the equivalence class */

147

b[i].n = h[j].pos;

147

b[i].n = h[j].pos;

148

b[i].e = j;

148

b[i].e = j;

149

h[j].pos = i;

149

h[j].pos = i;

150

h[j].len++; /* keep track of popularity */

150

h[j].len++; /* keep track of popularity */

151

}

151

}

152

153

/* compute popularity threshold */

153

/* compute popularity threshold */

154

t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);

154

t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);

155

156

/* match items in a to their equivalence class in b */

156

/* match items in a to their equivalence class in b */

157

for (i = 0; i < an; i++) {

157

for (i = 0; i < an; i++) {

158

/* find the equivalence class */

158

/* find the equivalence class */

159

for (j = a[i].h & buckets; h[j].pos != INT_MAX;

159

for (j = a[i].h & buckets; h[j].pos != INT_MAX;

160

j = (j + 1) & buckets)

160

j = (j + 1) & buckets)

161

if (!cmp(a + i, b + h[j].pos))

161

if (!cmp(a + i, b + h[j].pos))

162

break;

162

break;

163

164

a[i].e = j; /* use equivalence class for quick compare */

164

a[i].e = j; /* use equivalence class for quick compare */

165

if (h[j].len <= t)

165

if (h[j].len <= t)

166

a[i].n = h[j].pos; /* point to head of match list */

166

a[i].n = h[j].pos; /* point to head of match list */

167

else

167

else

168

a[i].n = INT_MAX; /* too popular */

168

a[i].n = INT_MAX; /* too popular */

169

}

169

}

170

171

/* discard hash tables */

171

/* discard hash tables */

172

free(h);

172

free(h);

173

return 1;

173

return 1;

174

}

174

}

175

176

static int longest_match(struct line *a, struct line *b, struct pos *pos,

176

static int longest_match(struct line *a, struct line *b, struct pos *pos,

177

int a1, int a2, int b1, int b2, int *omi, int *omj)

177

int a1, int a2, int b1, int b2, int *omi, int *omj)

178

{

178

{

179

int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;

179

int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;

180

181

for (i = a1; i < a2; i++) {

181

for (i = a1; i < a2; i++) {

182

/* skip things before the current block */

182

/* skip things before the current block */

183

for (j = a[i].n; j < b1; j = b[j].n)

183

for (j = a[i].n; j < b1; j = b[j].n)

184

;

184

;

185

186

/* loop through all lines match a[i] in b */

186

/* loop through all lines match a[i] in b */

187

for (; j < b2; j = b[j].n) {

187

for (; j < b2; j = b[j].n) {

188

/* does this extend an earlier match? */

188

/* does this extend an earlier match? */

189

if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)

189

if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)

190

k = pos[j - 1].len + 1;

190

k = pos[j - 1].len + 1;

191

else

191

else

192

k = 1;

192

k = 1;

193

pos[j].pos = i;

193

pos[j].pos = i;

194

pos[j].len = k;

194

pos[j].len = k;

195

196

/* best match so far? */

196

/* best match so far? */

197

if (k > mk) {

197

if (k > mk) {

198

mi = i;

198

mi = i;

199

mj = j;

199

mj = j;

200

mk = k;

200

mk = k;

201

}

201

}

202

}

202

}

203

}

203

}

204

205

if (mk) {

205

if (mk) {

206

mi = mi - mk + 1;

206

mi = mi - mk + 1;

207

mj = mj - mk + 1;

207

mj = mj - mk + 1;

208

}

208

}

209

210

/* expand match to include neighboring popular lines */

210

/* expand match to include neighboring popular lines */

211

while (mi - mb > a1 && mj - mb > b1 &&

211

while (mi - mb > a1 && mj - mb > b1 &&

212

a[mi - mb - 1].e == b[mj - mb - 1].e)

212

a[mi - mb - 1].e == b[mj - mb - 1].e)

213

mb++;

213

mb++;

214

while (mi + mk < a2 && mj + mk < b2 &&

214

while (mi + mk < a2 && mj + mk < b2 &&

215

a[mi + mk].e == b[mj + mk].e)

215

a[mi + mk].e == b[mj + mk].e)

216

mk++;

216

mk++;

217

218

*omi = mi - mb;

218

*omi = mi - mb;

219

*omj = mj - mb;

219

*omj = mj - mb;

220

221

return mk + mb;

221

return mk + mb;

222

}

222

}

223

224

static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,

224

static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,

225

int a1, int a2, int b1, int b2, struct hunk *l)

225

int a1, int a2, int b1, int b2, struct hunk *l)

226

{

226

{

227

int i, j, k;

227

int i, j, k;

228

229

while (1) {

229

while (1) {

230

/* find the longest match in this chunk */

230

/* find the longest match in this chunk */

231

k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);

231

k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);

232

if (!k)

232

if (!k)

233

return l;

233

return l;

234

235

/* and recurse on the remaining chunks on either side */

235

/* and recurse on the remaining chunks on either side */

236

l = recurse(a, b, pos, a1, i, b1, j, l);

236

l = recurse(a, b, pos, a1, i, b1, j, l);

237

if (!l)

237

if (!l)

238

return NULL;

238

return NULL;

239

240

l->next = (struct hunk *)malloc(sizeof(struct hunk));

240

l->next = (struct hunk *)malloc(sizeof(struct hunk));

241

if (!l->next)

241

if (!l->next)

242

return NULL;

242

return NULL;

243

244

l = l->next;

244

l = l->next;

245

l->a1 = i;

245

l->a1 = i;

246

l->a2 = i + k;

246

l->a2 = i + k;

247

l->b1 = j;

247

l->b1 = j;

248

l->b2 = j + k;

248

l->b2 = j + k;

249

l->next = NULL;

249

l->next = NULL;

250

251

/* tail-recursion didn't happen, so do equivalent iteration */

251

/* tail-recursion didn't happen, so do equivalent iteration */

252

a1 = i + k;

252

a1 = i + k;

253

b1 = j + k;

253

b1 = j + k;

254

}

254

}

255

}

255

}

256

257

static int diff(struct line *a, int an, struct line *b, int bn,

257

static int diff(struct line *a, int an, struct line *b, int bn,

258

struct hunk *base)

258

struct hunk *base)

259

{

259

{

260

struct hunk *curr;

260

struct hunk *curr;

261

struct pos *pos;

261

struct pos *pos;

262

int t, count = 0;

262

int t, count = 0;

263

264

/* allocate and fill arrays */

264

/* allocate and fill arrays */

265

t = equatelines(a, an, b, bn);

265

t = equatelines(a, an, b, bn);

266

pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));

266

pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));

267

268

if (pos && t) {

268

if (pos && t) {

269

/* generate the matching block list */

269

/* generate the matching block list */

270

271

curr = recurse(a, b, pos, 0, an, 0, bn, base);

271

curr = recurse(a, b, pos, 0, an, 0, bn, base);

272

if (!curr)

272

if (!curr)

273

return -1;

273

return -1;

274

275

/* sentinel end hunk */

275

/* sentinel end hunk */

276

curr->next = (struct hunk *)malloc(sizeof(struct hunk));

276

curr->next = (struct hunk *)malloc(sizeof(struct hunk));

277

if (!curr->next)

277

if (!curr->next)

278

return -1;

278

return -1;

279

curr = curr->next;

279

curr = curr->next;

280

curr->a1 = curr->a2 = an;

280

curr->a1 = curr->a2 = an;

281

curr->b1 = curr->b2 = bn;

281

curr->b1 = curr->b2 = bn;

282

curr->next = NULL;

282

curr->next = NULL;

283

}

283

}

284

285

free(pos);

285

free(pos);

286

287

/* normalize the hunk list, try to push each hunk towards the end */

287

/* normalize the hunk list, try to push each hunk towards the end */

288

for (curr = base->next; curr; curr = curr->next) {

288

for (curr = base->next; curr; curr = curr->next) {

289

struct hunk *next = curr->next;

289

struct hunk *next = curr->next;

290

int shift = 0;

290

int shift = 0;

291

292

if (!next)

292

if (!next)

293

break;

293

break;

294

295

if (curr->a2 == next->a1)

295

if (curr->a2 == next->a1)

296

while (curr->a2 + shift < an && curr->b2 + shift < bn

296

while (curr->a2 + shift < an && curr->b2 + shift < bn

297

&& !cmp(a + curr->a2 + shift,

297

&& !cmp(a + curr->a2 + shift,

298

b + curr->b2 + shift))

298

b + curr->b2 + shift))

299

shift++;

299

shift++;

300

else if (curr->b2 == next->b1)

300

else if (curr->b2 == next->b1)

301

while (curr->b2 + shift < bn && curr->a2 + shift < an

301

while (curr->b2 + shift < bn && curr->a2 + shift < an

302

&& !cmp(b + curr->b2 + shift,

302

&& !cmp(b + curr->b2 + shift,

303

a + curr->a2 + shift))

303

a + curr->a2 + shift))

304

shift++;

304

shift++;

305

if (!shift)

305

if (!shift)

306

continue;

306

continue;

307

curr->b2 += shift;

307

curr->b2 += shift;

308

next->b1 += shift;

308

next->b1 += shift;

309

curr->a2 += shift;

309

curr->a2 += shift;

310

next->a1 += shift;

310

next->a1 += shift;

311

}

311

}

312

313

for (curr = base->next; curr; curr = curr->next)

313

for (curr = base->next; curr; curr = curr->next)

314

count++;

314

count++;

315

return count;

315

return count;

316

}

316

}

317

318

static void freehunks(struct hunk *l)

318

static void freehunks(struct hunk *l)

319

{

319

{

320

struct hunk *n;

320

struct hunk *n;

321

for (; l; l = n) {

321

for (; l; l = n) {

322

n = l->next;

322

n = l->next;

323

free(l);

323

free(l);

324

}

324

}

325

}

325

}

326

327

static PyObject *blocks(PyObject *self, PyObject *args)

327

static PyObject *blocks(PyObject *self, PyObject *args)

328

{

328

{

329

PyObject *sa, *sb, *rl = NULL, *m;

329

PyObject *sa, *sb, *rl = NULL, *m;

330

struct line *a, *b;

330

struct line *a, *b;

331

struct hunk l, *h;

331

struct hunk l, *h;

332

int an, bn, count, pos = 0;

332

int an, bn, count, pos = 0;

333

334

if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))

334

if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))

335

return NULL;

335

return NULL;

336

337

an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);

337

an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);

338

bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);

338

bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);

339

340

if (!a || !b)

340

if (!a || !b)

341

goto nomem;

341

goto nomem;

342

343

l.next = NULL;

343

l.next = NULL;

344

count = diff(a, an, b, bn, &l);

344

count = diff(a, an, b, bn, &l);

345

if (count < 0)

345

if (count < 0)

346

goto nomem;

346

goto nomem;

347

348

rl = PyList_New(count);

348

rl = PyList_New(count);

349

if (!rl)

349

if (!rl)

350

goto nomem;

350

goto nomem;

351

352

for (h = l.next; h; h = h->next) {

352

for (h = l.next; h; h = h->next) {

353

m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);

353

m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);

354

PyList_SetItem(rl, pos, m);

354

PyList_SetItem(rl, pos, m);

355

pos++;

355

pos++;

356

}

356

}

357

358

nomem:

358

nomem:

359

free(a);

359

free(a);

360

free(b);

360

free(b);

361

freehunks(l.next);

361

freehunks(l.next);

362

return rl ? rl : PyErr_NoMemory();

362

return rl ? rl : PyErr_NoMemory();

363

}

363

}

364

365

static PyObject *bdiff(PyObject *self, PyObject *args)

365

static PyObject *bdiff(PyObject *self, PyObject *args)

366

{

366

{

367

char *sa, *sb;

367

char *sa, *sb;

368

PyObject *result = NULL;

368

PyObject *result = NULL;

369

struct line *al, *bl;

369

struct line *al, *bl;

370

struct hunk l, *h;

370

struct hunk l, *h;

371

char encode[12], *rb;

371

char encode[12], *rb;

372

int an, bn, len = 0, la, lb, count;

372

int an, bn, len = 0, la, lb, count;

373

374

if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))

374

if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))

375

return NULL;

375

return NULL;

376

377

an = splitlines(sa, la, &al);

377

an = splitlines(sa, la, &al);

378

bn = splitlines(sb, lb, &bl);

378

bn = splitlines(sb, lb, &bl);

379

if (!al || !bl)

379

if (!al || !bl)

380

goto nomem;

380

goto nomem;

381

382

l.next = NULL;

382

l.next = NULL;

383

count = diff(al, an, bl, bn, &l);

383

count = diff(al, an, bl, bn, &l);

384

if (count < 0)

384

if (count < 0)

385

goto nomem;

385

goto nomem;

386

387

/* calculate length of output */

387

/* calculate length of output */

388

la = lb = 0;

388

la = lb = 0;

389

for (h = l.next; h; h = h->next) {

389

for (h = l.next; h; h = h->next) {

390

if (h->a1 != la || h->b1 != lb)

390

if (h->a1 != la || h->b1 != lb)

391

len += 12 + bl[h->b1].l - bl[lb].l;

391

len += 12 + bl[h->b1].l - bl[lb].l;

392

la = h->a2;

392

la = h->a2;

393

lb = h->b2;

393

lb = h->b2;

394

}

394

}

395

396

result = PyBytes_FromStringAndSize(NULL, len);

396

result = PyBytes_FromStringAndSize(NULL, len);

397

398

if (!result)

398

if (!result)

399

goto nomem;

399

goto nomem;

400

401

/* build binary patch */

401

/* build binary patch */

402

rb = PyBytes_AsString(result);

402

rb = PyBytes_AsString(result);

403

la = lb = 0;

403

la = lb = 0;

404

405

for (h = l.next; h; h = h->next) {

405

for (h = l.next; h; h = h->next) {

406

if (h->a1 != la || h->b1 != lb) {

406

if (h->a1 != la || h->b1 != lb) {

407

len = bl[h->b1].l - bl[lb].l;

407

len = bl[h->b1].l - bl[lb].l;

408

*(uint32_t *)(encode) = htonl(al[la].l - al->l);

408

*(uint32_t *)(encode) = htonl(al[la].l - al->l);

409

*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);

409

*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);

410

*(uint32_t *)(encode + 8) = htonl(len);

410

*(uint32_t *)(encode + 8) = htonl(len);

411

memcpy(rb, encode, 12);

411

memcpy(rb, encode, 12);

412

memcpy(rb + 12, bl[lb].l, len);

412

memcpy(rb + 12, bl[lb].l, len);

413

rb += 12 + len;

413

rb += 12 + len;

414

}

414

}

415

la = h->a2;

415

la = h->a2;

416

lb = h->b2;

416

lb = h->b2;

417

}

417

}

418

419

nomem:

419

nomem:

420

free(al);

420

free(al);

421

free(bl);

421

free(bl);

422

freehunks(l.next);

422

freehunks(l.next);

423

return result ? result : PyErr_NoMemory();

423

return result ? result : PyErr_NoMemory();

424

}

424

}

425

426

static char mdiff_doc[] = "Efficient binary diff.";

426

static char mdiff_doc[] = "Efficient binary diff.";

427

428

static PyMethodDef methods[] = {

428

static PyMethodDef methods[] = {

429

{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},

429

{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},

430

{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},

430

{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},

431

{NULL, NULL}

431

{NULL, NULL}

432

};

432

};

433

434

#ifdef IS_PY3K

434

#ifdef IS_PY3K

435

static struct PyModuleDef bdiff_module = {

435

static struct PyModuleDef bdiff_module = {

436

PyModuleDef_HEAD_INIT,

436

PyModuleDef_HEAD_INIT,

437

"bdiff",

437

"bdiff",

438

mdiff_doc,

438

mdiff_doc,

439

-1,

439

-1,

440

methods

440

methods

441

};

441

};

442

443

PyMODINIT_FUNC PyInit_bdiff(void)

443

PyMODINIT_FUNC PyInit_bdiff(void)

444

{

444

{

445

return PyModule_Create(&bdiff_module);

445

return PyModule_Create(&bdiff_module);

446

}

446

}

447

#else

447

#else

448

PyMODINIT_FUNC initbdiff(void)

448

PyMODINIT_FUNC initbdiff(void)

449

{

449

{

450

Py_InitModule3("bdiff", methods, mdiff_doc);

450

Py_InitModule3("bdiff", methods, mdiff_doc);

451

}

451

}

452

#endif

452

#endif

453

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             /*
              bdiff.c - efficient binary diff extension for Mercurial
              Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
              This software may be used and distributed according to the terms of
              the GNU General Public License, incorporated herein by reference.
              Based roughly on Python difflib
             */
             #include <Python.h>
             #include <stdlib.h>
             #include <string.h>
             #include <limits.h>
             #if defined __hpux || defined __SUNPRO_C || defined _AIX
             #define inline
             #endif
             #ifdef __linux
             #define inline __inline
             #endif
             #ifdef _WIN32
             #ifdef _MSC_VER
             #define inline __inline
             typedef unsigned long uint32_t;
             #else
             #include <stdint.h>
             #endif
             static uint32_t htonl(uint32_t x)
             {
             	return ((x & 0x000000ffUL) << 24) |
             		((x & 0x0000ff00UL) <<  8) |
             		((x & 0x00ff0000UL) >>  8) |
             		((x & 0xff000000UL) >> 24);
             }
             #else
             #include <sys/types.h>
             #if defined __BEOS__ && !defined __HAIKU__
             #include <ByteOrder.h>
             #else
             #include <arpa/inet.h>
             #endif
             #include <inttypes.h>
             #endif
             #include "util.h"
             struct line {
             	int h, len, n, e;
             	const char *l;
             };
             struct pos {
             	int pos, len;
             };
             struct hunk;
             struct hunk {
             	int a1, a2, b1, b2;
             	struct hunk *next;
             };
-            int splitlines(const char *a, int len, struct line **lr)
+            static int splitlines(const char *a, int len, struct line **lr)
             {
             	int h, i;
             	const char *p, *b = a;
             	const char * const plast = a + len - 1;
             	struct line *l;
             	/* count the lines */
             	i = 1; /* extra line for sentinel */
             	for (p = a; p < a + len; p++)
             		if (*p == '\n' || p == plast)
             			i++;
             	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
             	if (!l)
             		return -1;
             	/* build the line array and calculate hashes */
             	h = 0;
             	for (p = a; p < a + len; p++) {
             		/* Leonid Yuriev's hash */
             		h = (h * 1664525) + *p + 1013904223;
             		if (*p == '\n' || p == plast) {
             			l->h = h;
             			h = 0;
             			l->len = p - b + 1;
             			l->l = b;
             			l->n = INT_MAX;
             			l++;
             			b = p + 1;
             		}
             	}
             	/* set up a sentinel */
             	l->h = l->len = 0;
             	l->l = a + len;
             	return i - 1;
             }
-            int inline cmp(struct line *a, struct line *b)
+            static inline int cmp(struct line *a, struct line *b)
             {
             	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
             }
             static int equatelines(struct line *a, int an, struct line *b, int bn)
             {
             	int i, j, buckets = 1, t, scale;
             	struct pos *h = NULL;
             	/* build a hash table of the next highest power of 2 */
             	while (buckets < bn + 1)
             		buckets *= 2;
             	/* try to allocate a large hash table to avoid collisions */
             	for (scale = 4; scale; scale /= 2) {
             		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
             		if (h)
             			break;
             	}
             	if (!h)
             		return 0;
             	buckets = buckets * scale - 1;
             	/* clear the hash table */
             	for (i = 0; i <= buckets; i++) {
             		h[i].pos = INT_MAX;
             		h[i].len = 0;
             	}
             	/* add lines to the hash table chains */
             	for (i = bn - 1; i >= 0; i--) {
             		/* find the equivalence class */
             		for (j = b[i].h & buckets; h[j].pos != INT_MAX;
             		     j = (j + 1) & buckets)
             			if (!cmp(b + i, b + h[j].pos))
             				break;
             		/* add to the head of the equivalence class */
             		b[i].n = h[j].pos;
             		b[i].e = j;
             		h[j].pos = i;
             		h[j].len++; /* keep track of popularity */
             	}
             	/* compute popularity threshold */
             	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
             	/* match items in a to their equivalence class in b */
             	for (i = 0; i < an; i++) {
             		/* find the equivalence class */
             		for (j = a[i].h & buckets; h[j].pos != INT_MAX;
             		     j = (j + 1) & buckets)
             			if (!cmp(a + i, b + h[j].pos))
             				break;
             		a[i].e = j; /* use equivalence class for quick compare */
             		if (h[j].len <= t)
             			a[i].n = h[j].pos; /* point to head of match list */
             		else
             			a[i].n = INT_MAX; /* too popular */
             	}
             	/* discard hash tables */
             	free(h);
             	return 1;
             }
             static int longest_match(struct line *a, struct line *b, struct pos *pos,
             			 int a1, int a2, int b1, int b2, int *omi, int *omj)
             {
             	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
             	for (i = a1; i < a2; i++) {
             		/* skip things before the current block */
             		for (j = a[i].n; j < b1; j = b[j].n)
             			;
             		/* loop through all lines match a[i] in b */
             		for (; j < b2; j = b[j].n) {
             			/* does this extend an earlier match? */
             			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
             				k = pos[j - 1].len + 1;
             			else
             				k = 1;
             			pos[j].pos = i;
             			pos[j].len = k;
             			/* best match so far? */
             			if (k > mk) {
             				mi = i;
             				mj = j;
             				mk = k;
             			}
             		}
             	}
             	if (mk) {
             		mi = mi - mk + 1;
             		mj = mj - mk + 1;
             	}
             	/* expand match to include neighboring popular lines */
             	while (mi - mb > a1 && mj - mb > b1 &&
             	       a[mi - mb - 1].e == b[mj - mb - 1].e)
             		mb++;
             	while (mi + mk < a2 && mj + mk < b2 &&
             	       a[mi + mk].e == b[mj + mk].e)
             		mk++;
             	*omi = mi - mb;
             	*omj = mj - mb;
             	return mk + mb;
             }
             static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
             			    int a1, int a2, int b1, int b2, struct hunk *l)
             {
             	int i, j, k;
             	while (1) {
             		/* find the longest match in this chunk */
             		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
             		if (!k)
             			return l;
             		/* and recurse on the remaining chunks on either side */
             		l = recurse(a, b, pos, a1, i, b1, j, l);
             		if (!l)
             			return NULL;
             		l->next = (struct hunk *)malloc(sizeof(struct hunk));
             		if (!l->next)
             			return NULL;
             		l = l->next;
             		l->a1 = i;
             		l->a2 = i + k;
             		l->b1 = j;
             		l->b2 = j + k;
             		l->next = NULL;
             		/* tail-recursion didn't happen, so do equivalent iteration */
             		a1 = i + k;
             		b1 = j + k;
             	}
             }
             static int diff(struct line *a, int an, struct line *b, int bn,
             		 struct hunk *base)
             {
             	struct hunk *curr;
             	struct pos *pos;
             	int t, count = 0;
             	/* allocate and fill arrays */
             	t = equatelines(a, an, b, bn);
             	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
             	if (pos && t) {
             		/* generate the matching block list */
             		curr = recurse(a, b, pos, 0, an, 0, bn, base);
             		if (!curr)
             			return -1;
             		/* sentinel end hunk */
             		curr->next = (struct hunk *)malloc(sizeof(struct hunk));
             		if (!curr->next)
             			return -1;
             		curr = curr->next;
             		curr->a1 = curr->a2 = an;
             		curr->b1 = curr->b2 = bn;
             		curr->next = NULL;
             	}
             	free(pos);
             	/* normalize the hunk list, try to push each hunk towards the end */
             	for (curr = base->next; curr; curr = curr->next) {
             		struct hunk *next = curr->next;
             		int shift = 0;
             		if (!next)
             			break;
             		if (curr->a2 == next->a1)
             			while (curr->a2 + shift < an && curr->b2 + shift < bn
             			       && !cmp(a + curr->a2 + shift,
             				       b + curr->b2 + shift))
             				shift++;
             		else if (curr->b2 == next->b1)
             			while (curr->b2 + shift < bn && curr->a2 + shift < an
             			       && !cmp(b + curr->b2 + shift,
             				       a + curr->a2 + shift))
             				shift++;
             		if (!shift)
             			continue;
             		curr->b2 += shift;
             		next->b1 += shift;
             		curr->a2 += shift;
             		next->a1 += shift;
             	}
             	for (curr = base->next; curr; curr = curr->next)
             		count++;
             	return count;
             }
             static void freehunks(struct hunk *l)
             {
             	struct hunk *n;
             	for (; l; l = n) {
             		n = l->next;
             		free(l);
             	}
             }
             static PyObject *blocks(PyObject *self, PyObject *args)
             {
             	PyObject *sa, *sb, *rl = NULL, *m;
             	struct line *a, *b;
             	struct hunk l, *h;
             	int an, bn, count, pos = 0;
             	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
             		return NULL;
             	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
             	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
             	if (!a || !b)
             		goto nomem;
             	l.next = NULL;
             	count = diff(a, an, b, bn, &l);
             	if (count < 0)
             		goto nomem;
             	rl = PyList_New(count);
             	if (!rl)
             		goto nomem;
             	for (h = l.next; h; h = h->next) {
             		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
             		PyList_SetItem(rl, pos, m);
             		pos++;
             	}
             nomem:
             	free(a);
             	free(b);
             	freehunks(l.next);
             	return rl ? rl : PyErr_NoMemory();
             }
             static PyObject *bdiff(PyObject *self, PyObject *args)
             {
             	char *sa, *sb;
             	PyObject *result = NULL;
             	struct line *al, *bl;
             	struct hunk l, *h;
             	char encode[12], *rb;
             	int an, bn, len = 0, la, lb, count;
             	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
             		return NULL;
             	an = splitlines(sa, la, &al);
             	bn = splitlines(sb, lb, &bl);
             	if (!al || !bl)
             		goto nomem;
             	l.next = NULL;
             	count = diff(al, an, bl, bn, &l);
             	if (count < 0)
             		goto nomem;
             	/* calculate length of output */
             	la = lb = 0;
             	for (h = l.next; h; h = h->next) {
             		if (h->a1 != la || h->b1 != lb)
             			len += 12 + bl[h->b1].l - bl[lb].l;
             		la = h->a2;
             		lb = h->b2;
             	}
             	result = PyBytes_FromStringAndSize(NULL, len);
             	if (!result)
             		goto nomem;
             	/* build binary patch */
             	rb = PyBytes_AsString(result);
             	la = lb = 0;
             	for (h = l.next; h; h = h->next) {
             		if (h->a1 != la || h->b1 != lb) {
             			len = bl[h->b1].l - bl[lb].l;
             			*(uint32_t *)(encode)     = htonl(al[la].l - al->l);
             			*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
             			*(uint32_t *)(encode + 8) = htonl(len);
             			memcpy(rb, encode, 12);
             			memcpy(rb + 12, bl[lb].l, len);
             			rb += 12 + len;
             		}
             		la = h->a2;
             		lb = h->b2;
             	}
             nomem:
             	free(al);
             	free(bl);
             	freehunks(l.next);
             	return result ? result : PyErr_NoMemory();
             }
             static char mdiff_doc[] = "Efficient binary diff.";
             static PyMethodDef methods[] = {
             	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
             	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
             	{NULL, NULL}
             };
             #ifdef IS_PY3K
             static struct PyModuleDef bdiff_module = {
             	PyModuleDef_HEAD_INIT,
             	"bdiff",
             	mdiff_doc,
             	-1,
             	methods
             };
             PyMODINIT_FUNC PyInit_bdiff(void)
             {
             	return PyModule_Create(&bdiff_module);
             }
             #else
             PyMODINIT_FUNC initbdiff(void)
             {
             	Py_InitModule3("bdiff", methods, mdiff_doc);
             }
             #endif