upstream/mercurial-mirror Commit - r16749:eab8ca17

1

/*

1

/*

2

bdiff.c - efficient binary diff extension for Mercurial

2

bdiff.c - efficient binary diff extension for Mercurial

3

4

5

6

This software may be used and distributed according to the terms of

6

This software may be used and distributed according to the terms of

7

the GNU General Public License, incorporated herein by reference.

7

the GNU General Public License, incorporated herein by reference.

8

9

Based roughly on Python difflib

9

Based roughly on Python difflib

10

*/

10

*/

11

12

#define PY_SSIZE_T_CLEAN

12

#include <Python.h>

13

#include <Python.h>

13

#include <stdlib.h>

14

#include <stdlib.h>

14

#include <string.h>

15

#include <string.h>

15

#include <limits.h>

16

#include <limits.h>

16

17

#include "util.h"

18

#include "util.h"

18

19

struct line {

20

struct line {

20

int hash, ~~len~~, n, e;

21

int hash, n, e;

22

Py_ssize_t len;

21

const char *l;

23

const char *l;

22

};

24

};

23

25

24

struct pos {

26

struct pos {

25

int pos, len;

27

int pos, len;

26

};

28

};

27

29

28

struct hunk;

30

struct hunk;

29

struct hunk {

31

struct hunk {

30

int a1, a2, b1, b2;

32

int a1, a2, b1, b2;

31

struct hunk *next;

33

struct hunk *next;

32

};

34

};

33

35

34

static int splitlines(const char *a, int len, struct line **lr)

36

static int splitlines(const char *a, Py_ssize_t len, struct line **lr)

35

{

37

{

36

unsigned hash;

38

unsigned hash;

37

int i;

39

int i;

38

const char *p, *b = a;

40

const char *p, *b = a;

39

const char * const plast = a + len - 1;

41

const char * const plast = a + len - 1;

40

struct line *l;

42

struct line *l;

41

43

42

/* count the lines */

44

/* count the lines */

43

i = 1; /* extra line for sentinel */

45

i = 1; /* extra line for sentinel */

44

for (p = a; p < a + len; p++)

46

for (p = a; p < a + len; p++)

45

if (*p == '\n' || p == plast)

47

if (*p == '\n' || p == plast)

46

i++;

48

i++;

47

49

48

*lr = l = (struct line *)malloc(sizeof(struct line) * i);

50

*lr = l = (struct line *)malloc(sizeof(struct line) * i);

49

if (!l)

51

if (!l)

50

return -1;

52

return -1;

51

53

52

/* build the line array and calculate hashes */

54

/* build the line array and calculate hashes */

53

hash = 0;

55

hash = 0;

54

for (p = a; p < a + len; p++) {

56

for (p = a; p < a + len; p++) {

55

/* Leonid Yuriev's hash */

57

/* Leonid Yuriev's hash */

56

hash = (hash * 1664525) + (unsigned char)*p + 1013904223;

58

hash = (hash * 1664525) + (unsigned char)*p + 1013904223;

57

59

58

if (*p == '\n' || p == plast) {

60

if (*p == '\n' || p == plast) {

59

l->hash = hash;

61

l->hash = hash;

60

hash = 0;

62

hash = 0;

61

l->len = p - b + 1;

63

l->len = p - b + 1;

62

l->l = b;

64

l->l = b;

63

l->n = INT_MAX;

65

l->n = INT_MAX;

64

l++;

66

l++;

65

b = p + 1;

67

b = p + 1;

66

}

68

}

67

}

69

}

68

70

69

/* set up a sentinel */

71

/* set up a sentinel */

70

l->hash = 0;

72

l->hash = 0;

71

l->len = 0;

73

l->len = 0;

72

l->l = a + len;

74

l->l = a + len;

73

return i - 1;

75

return i - 1;

74

}

76

}

75

77

76

static inline int cmp(struct line *a, struct line *b)

78

static inline int cmp(struct line *a, struct line *b)

77

{

79

{

78

return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);

80

return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);

79

}

81

}

80

82

81

static int equatelines(struct line *a, int an, struct line *b, int bn)

83

static int equatelines(struct line *a, int an, struct line *b, int bn)

82

{

84

{

83

int i, j, buckets = 1, t, scale;

85

int i, j, buckets = 1, t, scale;

84

struct pos *h = NULL;

86

struct pos *h = NULL;

85

87

86

/* build a hash table of the next highest power of 2 */

88

/* build a hash table of the next highest power of 2 */

87

while (buckets < bn + 1)

89

while (buckets < bn + 1)

88

buckets *= 2;

90

buckets *= 2;

89

91

90

/* try to allocate a large hash table to avoid collisions */

92

/* try to allocate a large hash table to avoid collisions */

91

for (scale = 4; scale; scale /= 2) {

93

for (scale = 4; scale; scale /= 2) {

92

h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));

94

h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));

93

if (h)

95

if (h)

94

break;

96

break;

95

}

97

}

96

98

97

if (!h)

99

if (!h)

98

return 0;

100

return 0;

99

101

100

buckets = buckets * scale - 1;

102

buckets = buckets * scale - 1;

101

103

102

/* clear the hash table */

104

/* clear the hash table */

103

for (i = 0; i <= buckets; i++) {

105

for (i = 0; i <= buckets; i++) {

104

h[i].pos = INT_MAX;

106

h[i].pos = INT_MAX;

105

h[i].len = 0;

107

h[i].len = 0;

106

}

108

}

107

109

108

/* add lines to the hash table chains */

110

/* add lines to the hash table chains */

109

for (i = bn - 1; i >= 0; i--) {

111

for (i = bn - 1; i >= 0; i--) {

110

/* find the equivalence class */

112

/* find the equivalence class */

111

for (j = b[i].hash & buckets; h[j].pos != INT_MAX;

113

for (j = b[i].hash & buckets; h[j].pos != INT_MAX;

112

j = (j + 1) & buckets)

114

j = (j + 1) & buckets)

113

if (!cmp(b + i, b + h[j].pos))

115

if (!cmp(b + i, b + h[j].pos))

114

break;

116

break;

115

117

116

/* add to the head of the equivalence class */

118

/* add to the head of the equivalence class */

117

b[i].n = h[j].pos;

119

b[i].n = h[j].pos;

118

b[i].e = j;

120

b[i].e = j;

119

h[j].pos = i;

121

h[j].pos = i;

120

h[j].len++; /* keep track of popularity */

122

h[j].len++; /* keep track of popularity */

121

}

123

}

122

124

123

/* compute popularity threshold */

125

/* compute popularity threshold */

124

t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);

126

t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);

125

127

126

/* match items in a to their equivalence class in b */

128

/* match items in a to their equivalence class in b */

127

for (i = 0; i < an; i++) {

129

for (i = 0; i < an; i++) {

128

/* find the equivalence class */

130

/* find the equivalence class */

129

for (j = a[i].hash & buckets; h[j].pos != INT_MAX;

131

for (j = a[i].hash & buckets; h[j].pos != INT_MAX;

130

j = (j + 1) & buckets)

132

j = (j + 1) & buckets)

131

if (!cmp(a + i, b + h[j].pos))

133

if (!cmp(a + i, b + h[j].pos))

132

break;

134

break;

133

135

134

a[i].e = j; /* use equivalence class for quick compare */

136

a[i].e = j; /* use equivalence class for quick compare */

135

if (h[j].len <= t)

137

if (h[j].len <= t)

136

a[i].n = h[j].pos; /* point to head of match list */

138

a[i].n = h[j].pos; /* point to head of match list */

137

else

139

else

138

a[i].n = INT_MAX; /* too popular */

140

a[i].n = INT_MAX; /* too popular */

139

}

141

}

140

142

141

/* discard hash tables */

143

/* discard hash tables */

142

free(h);

144

free(h);

143

return 1;

145

return 1;

144

}

146

}

145

147

146

static int longest_match(struct line *a, struct line *b, struct pos *pos,

148

static int longest_match(struct line *a, struct line *b, struct pos *pos,

147

int a1, int a2, int b1, int b2, int *omi, int *omj)

149

int a1, int a2, int b1, int b2, int *omi, int *omj)

148

{

150

{

149

int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;

151

int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;

150

152

151

for (i = a1; i < a2; i++) {

153

for (i = a1; i < a2; i++) {

152

/* skip things before the current block */

154

/* skip things before the current block */

153

for (j = a[i].n; j < b1; j = b[j].n)

155

for (j = a[i].n; j < b1; j = b[j].n)

154

;

156

;

155

157

156

/* loop through all lines match a[i] in b */

158

/* loop through all lines match a[i] in b */

157

for (; j < b2; j = b[j].n) {

159

for (; j < b2; j = b[j].n) {

158

/* does this extend an earlier match? */

160

/* does this extend an earlier match? */

159

if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)

161

if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)

160

k = pos[j - 1].len + 1;

162

k = pos[j - 1].len + 1;

161

else

163

else

162

k = 1;

164

k = 1;

163

pos[j].pos = i;

165

pos[j].pos = i;

164

pos[j].len = k;

166

pos[j].len = k;

165

167

166

/* best match so far? */

168

/* best match so far? */

167

if (k > mk) {

169

if (k > mk) {

168

mi = i;

170

mi = i;

169

mj = j;

171

mj = j;

170

mk = k;

172

mk = k;

171

}

173

}

172

}

174

}

173

}

175

}

174

176

175

if (mk) {

177

if (mk) {

176

mi = mi - mk + 1;

178

mi = mi - mk + 1;

177

mj = mj - mk + 1;

179

mj = mj - mk + 1;

178

}

180

}

179

181

180

/* expand match to include neighboring popular lines */

182

/* expand match to include neighboring popular lines */

181

while (mi - mb > a1 && mj - mb > b1 &&

183

while (mi - mb > a1 && mj - mb > b1 &&

182

a[mi - mb - 1].e == b[mj - mb - 1].e)

184

a[mi - mb - 1].e == b[mj - mb - 1].e)

183

mb++;

185

mb++;

184

while (mi + mk < a2 && mj + mk < b2 &&

186

while (mi + mk < a2 && mj + mk < b2 &&

185

a[mi + mk].e == b[mj + mk].e)

187

a[mi + mk].e == b[mj + mk].e)

186

mk++;

188

mk++;

187

189

188

*omi = mi - mb;

190

*omi = mi - mb;

189

*omj = mj - mb;

191

*omj = mj - mb;

190

192

191

return mk + mb;

193

return mk + mb;

192

}

194

}

193

195

194

static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,

196

static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,

195

int a1, int a2, int b1, int b2, struct hunk *l)

197

int a1, int a2, int b1, int b2, struct hunk *l)

196

{

198

{

197

int i, j, k;

199

int i, j, k;

198

200

199

while (1) {

201

while (1) {

200

/* find the longest match in this chunk */

202

/* find the longest match in this chunk */

201

k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);

203

k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);

202

if (!k)

204

if (!k)

203

return l;

205

return l;

204

206

205

/* and recurse on the remaining chunks on either side */

207

/* and recurse on the remaining chunks on either side */

206

l = recurse(a, b, pos, a1, i, b1, j, l);

208

l = recurse(a, b, pos, a1, i, b1, j, l);

207

if (!l)

209

if (!l)

208

return NULL;

210

return NULL;

209

211

210

l->next = (struct hunk *)malloc(sizeof(struct hunk));

212

l->next = (struct hunk *)malloc(sizeof(struct hunk));

211

if (!l->next)

213

if (!l->next)

212

return NULL;

214

return NULL;

213

215

214

l = l->next;

216

l = l->next;

215

l->a1 = i;

217

l->a1 = i;

216

l->a2 = i + k;

218

l->a2 = i + k;

217

l->b1 = j;

219

l->b1 = j;

218

l->b2 = j + k;

220

l->b2 = j + k;

219

l->next = NULL;

221

l->next = NULL;

220

222

221

/* tail-recursion didn't happen, so do equivalent iteration */

223

/* tail-recursion didn't happen, so do equivalent iteration */

222

a1 = i + k;

224

a1 = i + k;

223

b1 = j + k;

225

b1 = j + k;

224

}

226

}

225

}

227

}

226

228

227

static int diff(struct line *a, int an, struct line *b, int bn,

229

static int diff(struct line *a, int an, struct line *b, int bn,

228

struct hunk *base)

230

struct hunk *base)

229

{

231

{

230

struct hunk *curr;

232

struct hunk *curr;

231

struct pos *pos;

233

struct pos *pos;

232

int t, count = 0;

234

int t, count = 0;

233

235

234

/* allocate and fill arrays */

236

/* allocate and fill arrays */

235

t = equatelines(a, an, b, bn);

237

t = equatelines(a, an, b, bn);

236

pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));

238

pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));

237

239

238

if (pos && t) {

240

if (pos && t) {

239

/* generate the matching block list */

241

/* generate the matching block list */

240

242

241

curr = recurse(a, b, pos, 0, an, 0, bn, base);

243

curr = recurse(a, b, pos, 0, an, 0, bn, base);

242

if (!curr)

244

if (!curr)

243

return -1;

245

return -1;

244

246

245

/* sentinel end hunk */

247

/* sentinel end hunk */

246

curr->next = (struct hunk *)malloc(sizeof(struct hunk));

248

curr->next = (struct hunk *)malloc(sizeof(struct hunk));

247

if (!curr->next)

249

if (!curr->next)

248

return -1;

250

return -1;

249

curr = curr->next;

251

curr = curr->next;

250

curr->a1 = curr->a2 = an;

252

curr->a1 = curr->a2 = an;

251

curr->b1 = curr->b2 = bn;

253

curr->b1 = curr->b2 = bn;

252

curr->next = NULL;

254

curr->next = NULL;

253

}

255

}

254

256

255

free(pos);

257

free(pos);

256

258

257

/* normalize the hunk list, try to push each hunk towards the end */

259

/* normalize the hunk list, try to push each hunk towards the end */

258

for (curr = base->next; curr; curr = curr->next) {

260

for (curr = base->next; curr; curr = curr->next) {

259

struct hunk *next = curr->next;

261

struct hunk *next = curr->next;

260

int shift = 0;

262

int shift = 0;

261

263

262

if (!next)

264

if (!next)

263

break;

265

break;

264

266

265

if (curr->a2 == next->a1)

267

if (curr->a2 == next->a1)

266

while (curr->a2 + shift < an && curr->b2 + shift < bn

268

while (curr->a2 + shift < an && curr->b2 + shift < bn

267

&& !cmp(a + curr->a2 + shift,

269

&& !cmp(a + curr->a2 + shift,

268

b + curr->b2 + shift))

270

b + curr->b2 + shift))

269

shift++;

271

shift++;

270

else if (curr->b2 == next->b1)

272

else if (curr->b2 == next->b1)

271

while (curr->b2 + shift < bn && curr->a2 + shift < an

273

while (curr->b2 + shift < bn && curr->a2 + shift < an

272

&& !cmp(b + curr->b2 + shift,

274

&& !cmp(b + curr->b2 + shift,

273

a + curr->a2 + shift))

275

a + curr->a2 + shift))

274

shift++;

276

shift++;

275

if (!shift)

277

if (!shift)

276

continue;

278

continue;

277

curr->b2 += shift;

279

curr->b2 += shift;

278

next->b1 += shift;

280

next->b1 += shift;

279

curr->a2 += shift;

281

curr->a2 += shift;

280

next->a1 += shift;

282

next->a1 += shift;

281

}

283

}

282

284

283

for (curr = base->next; curr; curr = curr->next)

285

for (curr = base->next; curr; curr = curr->next)

284

count++;

286

count++;

285

return count;

287

return count;

286

}

288

}

287

289

288

static void freehunks(struct hunk *l)

290

static void freehunks(struct hunk *l)

289

{

291

{

290

struct hunk *n;

292

struct hunk *n;

291

for (; l; l = n) {

293

for (; l; l = n) {

292

n = l->next;

294

n = l->next;

293

free(l);

295

free(l);

294

}

296

}

295

}

297

}

296

298

297

static PyObject *blocks(PyObject *self, PyObject *args)

299

static PyObject *blocks(PyObject *self, PyObject *args)

298

{

300

{

299

PyObject *sa, *sb, *rl = NULL, *m;

301

PyObject *sa, *sb, *rl = NULL, *m;

300

struct line *a, *b;

302

struct line *a, *b;

301

struct hunk l, *h;

303

struct hunk l, *h;

302

int an, bn, count, pos = 0;

304

int an, bn, count, pos = 0;

303

305

304

if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))

306

if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))

305

return NULL;

307

return NULL;

306

308

307

an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);

309

an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);

308

bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);

310

bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);

309

311

310

if (!a || !b)

312

if (!a || !b)

311

goto nomem;

313

goto nomem;

312

314

313

l.next = NULL;

315

l.next = NULL;

314

count = diff(a, an, b, bn, &l);

316

count = diff(a, an, b, bn, &l);

315

if (count < 0)

317

if (count < 0)

316

goto nomem;

318

goto nomem;

317

319

318

rl = PyList_New(count);

320

rl = PyList_New(count);

319

if (!rl)

321

if (!rl)

320

goto nomem;

322

goto nomem;

321

323

322

for (h = l.next; h; h = h->next) {

324

for (h = l.next; h; h = h->next) {

323

m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);

325

m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);

324

PyList_SetItem(rl, pos, m);

326

PyList_SetItem(rl, pos, m);

325

pos++;

327

pos++;

326

}

328

}

327

329

328

nomem:

330

nomem:

329

free(a);

331

free(a);

330

free(b);

332

free(b);

331

freehunks(l.next);

333

freehunks(l.next);

332

return rl ? rl : PyErr_NoMemory();

334

return rl ? rl : PyErr_NoMemory();

333

}

335

}

334

336

335

static PyObject *bdiff(PyObject *self, PyObject *args)

337

static PyObject *bdiff(PyObject *self, PyObject *args)

336

{

338

{

337

char *sa, *sb, *rb;

339

char *sa, *sb, *rb;

338

PyObject *result = NULL;

340

PyObject *result = NULL;

339

struct line *al, *bl;

341

struct line *al, *bl;

340

struct hunk l, *h;

342

struct hunk l, *h;

341

int an, bn, ~~len~~ = 0, la, lb, count;

343

int an, bn, count;

344

Py_ssize_t len = 0, la, lb;

342

PyThreadState *_save;

345

PyThreadState *_save;

343

346

344

if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))

347

if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))

345

return NULL;

348

return NULL;

346

349

347

_save = PyEval_SaveThread();

350

_save = PyEval_SaveThread();

348

an = splitlines(sa, la, &al);

351

an = splitlines(sa, la, &al);

349

bn = splitlines(sb, lb, &bl);

352

bn = splitlines(sb, lb, &bl);

350

if (!al || !bl)

353

if (!al || !bl)

351

goto nomem;

354

goto nomem;

352

355

353

l.next = NULL;

356

l.next = NULL;

354

count = diff(al, an, bl, bn, &l);

357

count = diff(al, an, bl, bn, &l);

355

if (count < 0)

358

if (count < 0)

356

goto nomem;

359

goto nomem;

357

360

358

/* calculate length of output */

361

/* calculate length of output */

359

la = lb = 0;

362

la = lb = 0;

360

for (h = l.next; h; h = h->next) {

363

for (h = l.next; h; h = h->next) {

361

if (h->a1 != la || h->b1 != lb)

364

if (h->a1 != la || h->b1 != lb)

362

len += 12 + bl[h->b1].l - bl[lb].l;

365

len += 12 + bl[h->b1].l - bl[lb].l;

363

la = h->a2;

366

la = h->a2;

364

lb = h->b2;

367

lb = h->b2;

365

}

368

}

366

PyEval_RestoreThread(_save);

369

PyEval_RestoreThread(_save);

367

_save = NULL;

370

_save = NULL;

368

371

369

result = PyBytes_FromStringAndSize(NULL, len);

372

result = PyBytes_FromStringAndSize(NULL, len);

370

373

371

if (!result)

374

if (!result)

372

goto nomem;

375

goto nomem;

373

376

374

/* build binary patch */

377

/* build binary patch */

375

rb = PyBytes_AsString(result);

378

rb = PyBytes_AsString(result);

376

la = lb = 0;

379

la = lb = 0;

377

380

378

for (h = l.next; h; h = h->next) {

381

for (h = l.next; h; h = h->next) {

379

if (h->a1 != la || h->b1 != lb) {

382

if (h->a1 != la || h->b1 != lb) {

380

len = bl[h->b1].l - bl[lb].l;

383

len = bl[h->b1].l - bl[lb].l;

381

putbe32(al[la].l - al->l, rb);

384

putbe32(al[la].l - al->l, rb);

382

putbe32(al[h->a1].l - al->l, rb + 4);

385

putbe32(al[h->a1].l - al->l, rb + 4);

383

putbe32(len, rb + 8);

386

putbe32(len, rb + 8);

384

memcpy(rb + 12, bl[lb].l, len);

387

memcpy(rb + 12, bl[lb].l, len);

385

rb += 12 + len;

388

rb += 12 + len;

386

}

389

}

387

la = h->a2;

390

la = h->a2;

388

lb = h->b2;

391

lb = h->b2;

389

}

392

}

390

393

391

nomem:

394

nomem:

392

if (_save)

395

if (_save)

393

PyEval_RestoreThread(_save);

396

PyEval_RestoreThread(_save);

394

free(al);

397

free(al);

395

free(bl);

398

free(bl);

396

freehunks(l.next);

399

freehunks(l.next);

397

return result ? result : PyErr_NoMemory();

400

return result ? result : PyErr_NoMemory();

398

}

401

}

399

402

400

/*

403

/*

401

* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,

404

* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,

402

* reduce whitespace sequences to a single space and trim remaining whitespace

405

* reduce whitespace sequences to a single space and trim remaining whitespace

403

* from end of lines.

406

* from end of lines.

404

*/

407

*/

405

static PyObject *fixws(PyObject *self, PyObject *args)

408

static PyObject *fixws(PyObject *self, PyObject *args)

406

{

409

{

407

PyObject *s, *result = NULL;

410

PyObject *s, *result = NULL;

408

char allws, c;

411

char allws, c;

409

const char *r;

412

const char *r;

410

int i, rlen, wlen = 0;

413

Py_ssize_t i, rlen, wlen = 0;

411

char *w;

414

char *w;

412

415

413

if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))

416

if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))

414

return NULL;

417

return NULL;

415

r = PyBytes_AsString(s);

418

r = PyBytes_AsString(s);

416

rlen = PyBytes_Size(s);

419

rlen = PyBytes_Size(s);

417

420

418

w = (char *)malloc(rlen ? rlen : 1);

421

w = (char *)malloc(rlen ? rlen : 1);

419

if (!w)

422

if (!w)

420

goto nomem;

423

goto nomem;

421

424

422

for (i = 0; i != rlen; i++) {

425

for (i = 0; i != rlen; i++) {

423

c = r[i];

426

c = r[i];

424

if (c == ' ' || c == '\t' || c == '\r') {

427

if (c == ' ' || c == '\t' || c == '\r') {

425

if (!allws && (wlen == 0 || w[wlen - 1] != ' '))

428

if (!allws && (wlen == 0 || w[wlen - 1] != ' '))

426

w[wlen++] = ' ';

429

w[wlen++] = ' ';

427

} else if (c == '\n' && !allws

430

} else if (c == '\n' && !allws

428

&& wlen > 0 && w[wlen - 1] == ' ') {

431

&& wlen > 0 && w[wlen - 1] == ' ') {

429

w[wlen - 1] = '\n';

432

w[wlen - 1] = '\n';

430

} else {

433

} else {

431

w[wlen++] = c;

434

w[wlen++] = c;

432

}

435

}

433

}

436

}

434

437

435

result = PyBytes_FromStringAndSize(w, wlen);

438

result = PyBytes_FromStringAndSize(w, wlen);

436

439

437

nomem:

440

nomem:

438

free(w);

441

free(w);

439

return result ? result : PyErr_NoMemory();

442

return result ? result : PyErr_NoMemory();

440

}

443

}

441

444

442

445

443

static char mdiff_doc[] = "Efficient binary diff.";

446

static char mdiff_doc[] = "Efficient binary diff.";

444

447

445

static PyMethodDef methods[] = {

448

static PyMethodDef methods[] = {

446

{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},

449

{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},

447

{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},

450

{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},

448

{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},

451

{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},

449

{NULL, NULL}

452

{NULL, NULL}

450

};

453

};

451

454

452

#ifdef IS_PY3K

455

#ifdef IS_PY3K

453

static struct PyModuleDef bdiff_module = {

456

static struct PyModuleDef bdiff_module = {

454

PyModuleDef_HEAD_INIT,

457

PyModuleDef_HEAD_INIT,

455

"bdiff",

458

"bdiff",

456

mdiff_doc,

459

mdiff_doc,

457

-1,

460

-1,

458

methods

461

methods

459

};

462

};

460

463

461

PyMODINIT_FUNC PyInit_bdiff(void)

464

PyMODINIT_FUNC PyInit_bdiff(void)

462

{

465

{

463

return PyModule_Create(&bdiff_module);

466

return PyModule_Create(&bdiff_module);

464

}

467

}

465

#else

468

#else

466

PyMODINIT_FUNC initbdiff(void)

469

PyMODINIT_FUNC initbdiff(void)

467

{

470

{

468

Py_InitModule3("bdiff", methods, mdiff_doc);

471

Py_InitModule3("bdiff", methods, mdiff_doc);

469

}

472

}

470

#endif

473

#endif

471

474

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             /*
              bdiff.c - efficient binary diff extension for Mercurial
              Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
              This software may be used and distributed according to the terms of
              the GNU General Public License, incorporated herein by reference.
              Based roughly on Python difflib
             */
+            #define PY_SSIZE_T_CLEAN
             #include <Python.h>
             #include <stdlib.h>
             #include <string.h>
             #include <limits.h>
             #include "util.h"
             struct line {
-            	int hash, len, n, e;
+            	int hash, n, e;
+            	Py_ssize_t len;
             	const char *l;
             };
             struct pos {
             	int pos, len;
             };
             struct hunk;
             struct hunk {
             	int a1, a2, b1, b2;
             	struct hunk *next;
             };
-            static int splitlines(const char *a, int len, struct line **lr)
+            static int splitlines(const char *a, Py_ssize_t len, struct line **lr)
             {
             	unsigned hash;
             	int i;
             	const char *p, *b = a;
             	const char * const plast = a + len - 1;
             	struct line *l;
             	/* count the lines */
             	i = 1; /* extra line for sentinel */
             	for (p = a; p < a + len; p++)
             		if (*p == '\n' || p == plast)
             			i++;
             	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
             	if (!l)
             		return -1;
             	/* build the line array and calculate hashes */
             	hash = 0;
             	for (p = a; p < a + len; p++) {
             		/* Leonid Yuriev's hash */
             		hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
             		if (*p == '\n' || p == plast) {
             			l->hash = hash;
             			hash = 0;
             			l->len = p - b + 1;
             			l->l = b;
             			l->n = INT_MAX;
             			l++;
             			b = p + 1;
             		}
             	}
             	/* set up a sentinel */
             	l->hash = 0;
             	l->len = 0;
             	l->l = a + len;
             	return i - 1;
             }
             static inline int cmp(struct line *a, struct line *b)
             {
             	return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
             }
             static int equatelines(struct line *a, int an, struct line *b, int bn)
             {
             	int i, j, buckets = 1, t, scale;
             	struct pos *h = NULL;
             	/* build a hash table of the next highest power of 2 */
             	while (buckets < bn + 1)
             		buckets *= 2;
             	/* try to allocate a large hash table to avoid collisions */
             	for (scale = 4; scale; scale /= 2) {
             		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
             		if (h)
             			break;
             	}
             	if (!h)
             		return 0;
             	buckets = buckets * scale - 1;
             	/* clear the hash table */
             	for (i = 0; i <= buckets; i++) {
             		h[i].pos = INT_MAX;
             		h[i].len = 0;
             	}
             	/* add lines to the hash table chains */
             	for (i = bn - 1; i >= 0; i--) {
             		/* find the equivalence class */
             		for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
             		     j = (j + 1) & buckets)
             			if (!cmp(b + i, b + h[j].pos))
             				break;
             		/* add to the head of the equivalence class */
             		b[i].n = h[j].pos;
             		b[i].e = j;
             		h[j].pos = i;
             		h[j].len++; /* keep track of popularity */
             	}
             	/* compute popularity threshold */
             	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
             	/* match items in a to their equivalence class in b */
             	for (i = 0; i < an; i++) {
             		/* find the equivalence class */
             		for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
             		     j = (j + 1) & buckets)
             			if (!cmp(a + i, b + h[j].pos))
             				break;
             		a[i].e = j; /* use equivalence class for quick compare */
             		if (h[j].len <= t)
             			a[i].n = h[j].pos; /* point to head of match list */
             		else
             			a[i].n = INT_MAX; /* too popular */
             	}
             	/* discard hash tables */
             	free(h);
             	return 1;
             }
             static int longest_match(struct line *a, struct line *b, struct pos *pos,
             			 int a1, int a2, int b1, int b2, int *omi, int *omj)
             {
             	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
             	for (i = a1; i < a2; i++) {
             		/* skip things before the current block */
             		for (j = a[i].n; j < b1; j = b[j].n)
             			;
             		/* loop through all lines match a[i] in b */
             		for (; j < b2; j = b[j].n) {
             			/* does this extend an earlier match? */
             			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
             				k = pos[j - 1].len + 1;
             			else
             				k = 1;
             			pos[j].pos = i;
             			pos[j].len = k;
             			/* best match so far? */
             			if (k > mk) {
             				mi = i;
             				mj = j;
             				mk = k;
             			}
             		}
             	}
             	if (mk) {
             		mi = mi - mk + 1;
             		mj = mj - mk + 1;
             	}
             	/* expand match to include neighboring popular lines */
             	while (mi - mb > a1 && mj - mb > b1 &&
             	       a[mi - mb - 1].e == b[mj - mb - 1].e)
             		mb++;
             	while (mi + mk < a2 && mj + mk < b2 &&
             	       a[mi + mk].e == b[mj + mk].e)
             		mk++;
             	*omi = mi - mb;
             	*omj = mj - mb;
             	return mk + mb;
             }
             static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
             			    int a1, int a2, int b1, int b2, struct hunk *l)
             {
             	int i, j, k;
             	while (1) {
             		/* find the longest match in this chunk */
             		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
             		if (!k)
             			return l;
             		/* and recurse on the remaining chunks on either side */
             		l = recurse(a, b, pos, a1, i, b1, j, l);
             		if (!l)
             			return NULL;
             		l->next = (struct hunk *)malloc(sizeof(struct hunk));
             		if (!l->next)
             			return NULL;
             		l = l->next;
             		l->a1 = i;
             		l->a2 = i + k;
             		l->b1 = j;
             		l->b2 = j + k;
             		l->next = NULL;
             		/* tail-recursion didn't happen, so do equivalent iteration */
             		a1 = i + k;
             		b1 = j + k;
             	}
             }
             static int diff(struct line *a, int an, struct line *b, int bn,
             		 struct hunk *base)
             {
             	struct hunk *curr;
             	struct pos *pos;
             	int t, count = 0;
             	/* allocate and fill arrays */
             	t = equatelines(a, an, b, bn);
             	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
             	if (pos && t) {
             		/* generate the matching block list */
             		curr = recurse(a, b, pos, 0, an, 0, bn, base);
             		if (!curr)
             			return -1;
             		/* sentinel end hunk */
             		curr->next = (struct hunk *)malloc(sizeof(struct hunk));
             		if (!curr->next)
             			return -1;
             		curr = curr->next;
             		curr->a1 = curr->a2 = an;
             		curr->b1 = curr->b2 = bn;
             		curr->next = NULL;
             	}
             	free(pos);
             	/* normalize the hunk list, try to push each hunk towards the end */
             	for (curr = base->next; curr; curr = curr->next) {
             		struct hunk *next = curr->next;
             		int shift = 0;
             		if (!next)
             			break;
             		if (curr->a2 == next->a1)
             			while (curr->a2 + shift < an && curr->b2 + shift < bn
             			       && !cmp(a + curr->a2 + shift,
             				       b + curr->b2 + shift))
             				shift++;
             		else if (curr->b2 == next->b1)
             			while (curr->b2 + shift < bn && curr->a2 + shift < an
             			       && !cmp(b + curr->b2 + shift,
             				       a + curr->a2 + shift))
             				shift++;
             		if (!shift)
             			continue;
             		curr->b2 += shift;
             		next->b1 += shift;
             		curr->a2 += shift;
             		next->a1 += shift;
             	}
             	for (curr = base->next; curr; curr = curr->next)
             		count++;
             	return count;
             }
             static void freehunks(struct hunk *l)
             {
             	struct hunk *n;
             	for (; l; l = n) {
             		n = l->next;
             		free(l);
             	}
             }
             static PyObject *blocks(PyObject *self, PyObject *args)
             {
             	PyObject *sa, *sb, *rl = NULL, *m;
             	struct line *a, *b;
             	struct hunk l, *h;
             	int an, bn, count, pos = 0;
             	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
             		return NULL;
             	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
             	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
             	if (!a || !b)
             		goto nomem;
             	l.next = NULL;
             	count = diff(a, an, b, bn, &l);
             	if (count < 0)
             		goto nomem;
             	rl = PyList_New(count);
             	if (!rl)
             		goto nomem;
             	for (h = l.next; h; h = h->next) {
             		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
             		PyList_SetItem(rl, pos, m);
             		pos++;
             	}
             nomem:
             	free(a);
             	free(b);
             	freehunks(l.next);
             	return rl ? rl : PyErr_NoMemory();
             }
             static PyObject *bdiff(PyObject *self, PyObject *args)
             {
             	char *sa, *sb, *rb;
             	PyObject *result = NULL;
             	struct line *al, *bl;
             	struct hunk l, *h;
-            	int an, bn, len = 0, la, lb, count;
+            	int an, bn, count;
+            	Py_ssize_t len = 0, la, lb;
             	PyThreadState *_save;
             	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
             		return NULL;
             	_save = PyEval_SaveThread();
             	an = splitlines(sa, la, &al);
             	bn = splitlines(sb, lb, &bl);
             	if (!al || !bl)
             		goto nomem;
             	l.next = NULL;
             	count = diff(al, an, bl, bn, &l);
             	if (count < 0)
             		goto nomem;
             	/* calculate length of output */
             	la = lb = 0;
             	for (h = l.next; h; h = h->next) {
             		if (h->a1 != la || h->b1 != lb)
             			len += 12 + bl[h->b1].l - bl[lb].l;
             		la = h->a2;
             		lb = h->b2;
             	}
             	PyEval_RestoreThread(_save);
             	_save = NULL;
             	result = PyBytes_FromStringAndSize(NULL, len);
             	if (!result)
             		goto nomem;
             	/* build binary patch */
             	rb = PyBytes_AsString(result);
             	la = lb = 0;
             	for (h = l.next; h; h = h->next) {
             		if (h->a1 != la || h->b1 != lb) {
             			len = bl[h->b1].l - bl[lb].l;
             			putbe32(al[la].l - al->l, rb);
             			putbe32(al[h->a1].l - al->l, rb + 4);
             			putbe32(len, rb + 8);
             			memcpy(rb + 12, bl[lb].l, len);
             			rb += 12 + len;
             		}
             		la = h->a2;
             		lb = h->b2;
             	}
             nomem:
             	if (_save)
             		PyEval_RestoreThread(_save);
             	free(al);
             	free(bl);
             	freehunks(l.next);
             	return result ? result : PyErr_NoMemory();
             }
             /*
              * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
              * reduce whitespace sequences to a single space and trim remaining whitespace
              * from end of lines.
              */
             static PyObject *fixws(PyObject *self, PyObject *args)
             {
             	PyObject *s, *result = NULL;
             	char allws, c;
             	const char *r;
-            	int i, rlen, wlen = 0;
+            	Py_ssize_t i, rlen, wlen = 0;
             	char *w;
             	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
             		return NULL;
             	r = PyBytes_AsString(s);
             	rlen = PyBytes_Size(s);
             	w = (char *)malloc(rlen ? rlen : 1);
             	if (!w)
             		goto nomem;
             	for (i = 0; i != rlen; i++) {
             		c = r[i];
             		if (c == ' ' || c == '\t' || c == '\r') {
             			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
             				w[wlen++] = ' ';
             		} else if (c == '\n' && !allws
             			  && wlen > 0 && w[wlen - 1] == ' ') {
             			w[wlen - 1] = '\n';
             		} else {
             			w[wlen++] = c;
             		}
             	}
             	result = PyBytes_FromStringAndSize(w, wlen);
             nomem:
             	free(w);
             	return result ? result : PyErr_NoMemory();
             }
             static char mdiff_doc[] = "Efficient binary diff.";
             static PyMethodDef methods[] = {
             	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
             	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
             	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
             	{NULL, NULL}
             };
             #ifdef IS_PY3K
             static struct PyModuleDef bdiff_module = {
             	PyModuleDef_HEAD_INIT,
             	"bdiff",
             	mdiff_doc,
             	-1,
             	methods
             };
             PyMODINIT_FUNC PyInit_bdiff(void)
             {
             	return PyModule_Create(&bdiff_module);
             }
             #else
             PyMODINIT_FUNC initbdiff(void)
             {
             	Py_InitModule3("bdiff", methods, mdiff_doc);
             }
             #endif