upstream/mercurial-mirror Commit - r29416:30789efb

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import re

10

import re

11

import struct

11

import struct

12

import zlib

12

import zlib

13

14

from .i18n import _

14

from .i18n import _

15

from . import (

15

from . import (

16

base85,

16

base85,

17

bdiff,

17

bdiff,

18

error,

18

error,

19

mpatch,

19

mpatch,

20

util,

20

util,

21

)

21

)

22

23

def splitnewlines(text):

23

def splitnewlines(text):

24

'''like str.splitlines, but only split on newlines.'''

24

'''like str.splitlines, but only split on newlines.'''

25

lines = [l + '\n' for l in text.split('\n')]

25

lines = [l + '\n' for l in text.split('\n')]

26

if lines:

26

if lines:

27

if lines[-1] == '\n':

27

if lines[-1] == '\n':

28

lines.pop()

28

lines.pop()

29

else:

29

else:

30

lines[-1] = lines[-1][:-1]

30

lines[-1] = lines[-1][:-1]

31

return lines

31

return lines

32

33

class diffopts(object):

33

class diffopts(object):

34

'''context is the number of context lines

34

'''context is the number of context lines

35

text treats all files as text

35

text treats all files as text

36

showfunc enables diff -p output

36

showfunc enables diff -p output

37

git enables the git extended patch format

37

git enables the git extended patch format

38

nodates removes dates from diff headers

38

nodates removes dates from diff headers

39

nobinary ignores binary files

39

nobinary ignores binary files

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

41

ignorews ignores all whitespace changes in the diff

41

ignorews ignores all whitespace changes in the diff

42

ignorewsamount ignores changes in the amount of whitespace

42

ignorewsamount ignores changes in the amount of whitespace

43

ignoreblanklines ignores changes whose lines are all blank

43

ignoreblanklines ignores changes whose lines are all blank

44

upgrade generates git diffs to avoid data loss

44

upgrade generates git diffs to avoid data loss

45

'''

45

'''

46

47

defaults = {

47

defaults = {

48

'context': 3,

48

'context': 3,

49

'text': False,

49

'text': False,

50

'showfunc': False,

50

'showfunc': False,

51

'git': False,

51

'git': False,

52

'nodates': False,

52

'nodates': False,

53

'nobinary': False,

53

'nobinary': False,

54

'noprefix': False,

54

'noprefix': False,

55

'ignorews': False,

55

'ignorews': False,

56

'ignorewsamount': False,

56

'ignorewsamount': False,

57

'ignoreblanklines': False,

57

'ignoreblanklines': False,

58

'upgrade': False,

58

'upgrade': False,

59

}

59

}

60

61

__slots__ = defaults.keys()

62

63

def __init__(self, **opts):

61

def __init__(self, **opts):

64

for k in self.~~__slots__~~:

62

for k in self.defaults.keys():

65

v = opts.get(k)

63

v = opts.get(k)

66

if v is None:

64

if v is None:

67

v = self.defaults[k]

65

v = self.defaults[k]

68

setattr(self, k, v)

66

setattr(self, k, v)

69

67

70

try:

68

try:

71

self.context = int(self.context)

69

self.context = int(self.context)

72

except ValueError:

70

except ValueError:

73

raise error.Abort(_('diff context lines count must be '

71

raise error.Abort(_('diff context lines count must be '

74

'an integer, not %r') % self.context)

72

'an integer, not %r') % self.context)

75

73

76

def copy(self, **kwargs):

74

def copy(self, **kwargs):

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

75

opts = dict((k, getattr(self, k)) for k in self.defaults)

78

opts.update(kwargs)

76

opts.update(kwargs)

79

return diffopts(**opts)

77

return diffopts(**opts)

80

78

81

defaultopts = diffopts()

79

defaultopts = diffopts()

82

80

83

def wsclean(opts, text, blank=True):

81

def wsclean(opts, text, blank=True):

84

if opts.ignorews:

82

if opts.ignorews:

85

text = bdiff.fixws(text, 1)

83

text = bdiff.fixws(text, 1)

86

elif opts.ignorewsamount:

84

elif opts.ignorewsamount:

87

text = bdiff.fixws(text, 0)

85

text = bdiff.fixws(text, 0)

88

if blank and opts.ignoreblanklines:

86

if blank and opts.ignoreblanklines:

89

text = re.sub('\n+', '\n', text).strip('\n')

87

text = re.sub('\n+', '\n', text).strip('\n')

90

return text

88

return text

91

89

92

def splitblock(base1, lines1, base2, lines2, opts):

90

def splitblock(base1, lines1, base2, lines2, opts):

93

# The input lines matches except for interwoven blank lines. We

91

# The input lines matches except for interwoven blank lines. We

94

# transform it into a sequence of matching blocks and blank blocks.

92

# transform it into a sequence of matching blocks and blank blocks.

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

93

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

94

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

97

s1, e1 = 0, len(lines1)

95

s1, e1 = 0, len(lines1)

98

s2, e2 = 0, len(lines2)

96

s2, e2 = 0, len(lines2)

99

while s1 < e1 or s2 < e2:

97

while s1 < e1 or s2 < e2:

100

i1, i2, btype = s1, s2, '='

98

i1, i2, btype = s1, s2, '='

101

if (i1 >= e1 or lines1[i1] == 0

99

if (i1 >= e1 or lines1[i1] == 0

102

or i2 >= e2 or lines2[i2] == 0):

100

or i2 >= e2 or lines2[i2] == 0):

103

# Consume the block of blank lines

101

# Consume the block of blank lines

104

btype = '~'

102

btype = '~'

105

while i1 < e1 and lines1[i1] == 0:

103

while i1 < e1 and lines1[i1] == 0:

106

i1 += 1

104

i1 += 1

107

while i2 < e2 and lines2[i2] == 0:

105

while i2 < e2 and lines2[i2] == 0:

108

i2 += 1

106

i2 += 1

109

else:

107

else:

110

# Consume the matching lines

108

# Consume the matching lines

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

109

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

112

i1 += 1

110

i1 += 1

113

i2 += 1

111

i2 += 1

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

112

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

115

s1 = i1

113

s1 = i1

116

s2 = i2

114

s2 = i2

117

115

118

def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):

116

def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):

119

"""Return (block, type) tuples, where block is an mdiff.blocks

117

"""Return (block, type) tuples, where block is an mdiff.blocks

120

line entry. type is '=' for blocks matching exactly one another

118

line entry. type is '=' for blocks matching exactly one another

121

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

119

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

122

matching only after having filtered blank lines. If refine is True,

120

matching only after having filtered blank lines. If refine is True,

123

then '~' blocks are refined and are only made of blank lines.

121

then '~' blocks are refined and are only made of blank lines.

124

line1 and line2 are text1 and text2 split with splitnewlines() if

122

line1 and line2 are text1 and text2 split with splitnewlines() if

125

they are already available.

123

they are already available.

126

"""

124

"""

127

if opts is None:

125

if opts is None:

128

opts = defaultopts

126

opts = defaultopts

129

if opts.ignorews or opts.ignorewsamount:

127

if opts.ignorews or opts.ignorewsamount:

130

text1 = wsclean(opts, text1, False)

128

text1 = wsclean(opts, text1, False)

131

text2 = wsclean(opts, text2, False)

129

text2 = wsclean(opts, text2, False)

132

diff = bdiff.blocks(text1, text2)

130

diff = bdiff.blocks(text1, text2)

133

for i, s1 in enumerate(diff):

131

for i, s1 in enumerate(diff):

134

# The first match is special.

132

# The first match is special.

135

# we've either found a match starting at line 0 or a match later

133

# we've either found a match starting at line 0 or a match later

136

# in the file. If it starts later, old and new below will both be

134

# in the file. If it starts later, old and new below will both be

137

# empty and we'll continue to the next match.

135

# empty and we'll continue to the next match.

138

if i > 0:

136

if i > 0:

139

s = diff[i - 1]

137

s = diff[i - 1]

140

else:

138

else:

141

s = [0, 0, 0, 0]

139

s = [0, 0, 0, 0]

142

s = [s[1], s1[0], s[3], s1[2]]

140

s = [s[1], s1[0], s[3], s1[2]]

143

141

144

# bdiff sometimes gives huge matches past eof, this check eats them,

142

# bdiff sometimes gives huge matches past eof, this check eats them,

145

# and deals with the special first match case described above

143

# and deals with the special first match case described above

146

if s[0] != s[1] or s[2] != s[3]:

144

if s[0] != s[1] or s[2] != s[3]:

147

type = '!'

145

type = '!'

148

if opts.ignoreblanklines:

146

if opts.ignoreblanklines:

149

if lines1 is None:

147

if lines1 is None:

150

lines1 = splitnewlines(text1)

148

lines1 = splitnewlines(text1)

151

if lines2 is None:

149

if lines2 is None:

152

lines2 = splitnewlines(text2)

150

lines2 = splitnewlines(text2)

153

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

151

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

154

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

152

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

155

if old == new:

153

if old == new:

156

type = '~'

154

type = '~'

157

yield s, type

155

yield s, type

158

yield s1, '='

156

yield s1, '='

159

157

160

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

158

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

161

def datetag(date, fn=None):

159

def datetag(date, fn=None):

162

if not opts.git and not opts.nodates:

160

if not opts.git and not opts.nodates:

163

return '\t%s\n' % date

161

return '\t%s\n' % date

164

if fn and ' ' in fn:

162

if fn and ' ' in fn:

165

return '\t\n'

163

return '\t\n'

166

return '\n'

164

return '\n'

167

165

168

if not a and not b:

166

if not a and not b:

169

return ""

167

return ""

170

168

171

if opts.noprefix:

169

if opts.noprefix:

172

aprefix = bprefix = ''

170

aprefix = bprefix = ''

173

else:

171

else:

174

aprefix = 'a/'

172

aprefix = 'a/'

175

bprefix = 'b/'

173

bprefix = 'b/'

176

174

177

epoch = util.datestr((0, 0))

175

epoch = util.datestr((0, 0))

178

176

179

fn1 = util.pconvert(fn1)

177

fn1 = util.pconvert(fn1)

180

fn2 = util.pconvert(fn2)

178

fn2 = util.pconvert(fn2)

181

179

182

if not opts.text and (util.binary(a) or util.binary(b)):

180

if not opts.text and (util.binary(a) or util.binary(b)):

183

if a and b and len(a) == len(b) and a == b:

181

if a and b and len(a) == len(b) and a == b:

184

return ""

182

return ""

185

l = ['Binary file %s has changed\n' % fn1]

183

l = ['Binary file %s has changed\n' % fn1]

186

elif not a:

184

elif not a:

187

b = splitnewlines(b)

185

b = splitnewlines(b)

188

if a is None:

186

if a is None:

189

l1 = '--- /dev/null%s' % datetag(epoch)

187

l1 = '--- /dev/null%s' % datetag(epoch)

190

else:

188

else:

191

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

189

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

192

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

190

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

193

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

191

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

194

l = [l1, l2, l3] + ["+" + e for e in b]

192

l = [l1, l2, l3] + ["+" + e for e in b]

195

elif not b:

193

elif not b:

196

a = splitnewlines(a)

194

a = splitnewlines(a)

197

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

195

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

198

if b is None:

196

if b is None:

199

l2 = '+++ /dev/null%s' % datetag(epoch)

197

l2 = '+++ /dev/null%s' % datetag(epoch)

200

else:

198

else:

201

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

199

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

202

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

200

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

203

l = [l1, l2, l3] + ["-" + e for e in a]

201

l = [l1, l2, l3] + ["-" + e for e in a]

204

else:

202

else:

205

al = splitnewlines(a)

203

al = splitnewlines(a)

206

bl = splitnewlines(b)

204

bl = splitnewlines(b)

207

l = list(_unidiff(a, b, al, bl, opts=opts))

205

l = list(_unidiff(a, b, al, bl, opts=opts))

208

if not l:

206

if not l:

209

return ""

207

return ""

210

208

211

l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))

209

l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))

212

l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))

210

l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))

213

211

214

for ln in xrange(len(l)):

212

for ln in xrange(len(l)):

215

if l[ln][-1] != '\n':

213

if l[ln][-1] != '\n':

216

l[ln] += "\n\ No newline at end of file\n"

214

l[ln] += "\n\ No newline at end of file\n"

217

215

218

return "".join(l)

216

return "".join(l)

219

217

220

# creates a headerless unified diff

218

# creates a headerless unified diff

221

# t1 and t2 are the text to be diffed

219

# t1 and t2 are the text to be diffed

222

# l1 and l2 are the text broken up into lines

220

# l1 and l2 are the text broken up into lines

223

def _unidiff(t1, t2, l1, l2, opts=defaultopts):

221

def _unidiff(t1, t2, l1, l2, opts=defaultopts):

224

def contextend(l, len):

222

def contextend(l, len):

225

ret = l + opts.context

223

ret = l + opts.context

226

if ret > len:

224

if ret > len:

227

ret = len

225

ret = len

228

return ret

226

return ret

229

227

230

def contextstart(l):

228

def contextstart(l):

231

ret = l - opts.context

229

ret = l - opts.context

232

if ret < 0:

230

if ret < 0:

233

return 0

231

return 0

234

return ret

232

return ret

235

233

236

lastfunc = [0, '']

234

lastfunc = [0, '']

237

def yieldhunk(hunk):

235

def yieldhunk(hunk):

238

(astart, a2, bstart, b2, delta) = hunk

236

(astart, a2, bstart, b2, delta) = hunk

239

aend = contextend(a2, len(l1))

237

aend = contextend(a2, len(l1))

240

alen = aend - astart

238

alen = aend - astart

241

blen = b2 - bstart + aend - a2

239

blen = b2 - bstart + aend - a2

242

240

243

func = ""

241

func = ""

244

if opts.showfunc:

242

if opts.showfunc:

245

lastpos, func = lastfunc

243

lastpos, func = lastfunc

246

# walk backwards from the start of the context up to the start of

244

# walk backwards from the start of the context up to the start of

247

# the previous hunk context until we find a line starting with an

245

# the previous hunk context until we find a line starting with an

248

# alphanumeric char.

246

# alphanumeric char.

249

for i in xrange(astart - 1, lastpos - 1, -1):

247

for i in xrange(astart - 1, lastpos - 1, -1):

250

if l1[i][0].isalnum():

248

if l1[i][0].isalnum():

251

func = ' ' + l1[i].rstrip()[:40]

249

func = ' ' + l1[i].rstrip()[:40]

252

lastfunc[1] = func

250

lastfunc[1] = func

253

break

251

break

254

# by recording this hunk's starting point as the next place to

252

# by recording this hunk's starting point as the next place to

255

# start looking for function lines, we avoid reading any line in

253

# start looking for function lines, we avoid reading any line in

256

# the file more than once.

254

# the file more than once.

257

lastfunc[0] = astart

255

lastfunc[0] = astart

258

256

259

# zero-length hunk ranges report their start line as one less

257

# zero-length hunk ranges report their start line as one less

260

if alen:

258

if alen:

261

astart += 1

259

astart += 1

262

if blen:

260

if blen:

263

bstart += 1

261

bstart += 1

264

262

265

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

263

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

266

bstart, blen, func)

264

bstart, blen, func)

267

for x in delta:

265

for x in delta:

268

yield x

266

yield x

269

for x in xrange(a2, aend):

267

for x in xrange(a2, aend):

270

yield ' ' + l1[x]

268

yield ' ' + l1[x]

271

269

272

# bdiff.blocks gives us the matching sequences in the files. The loop

270

# bdiff.blocks gives us the matching sequences in the files. The loop

273

# below finds the spaces between those matching sequences and translates

271

# below finds the spaces between those matching sequences and translates

274

# them into diff output.

272

# them into diff output.

275

#

273

#

276

hunk = None

274

hunk = None

277

ignoredlines = 0

275

ignoredlines = 0

278

for s, stype in allblocks(t1, t2, opts, l1, l2):

276

for s, stype in allblocks(t1, t2, opts, l1, l2):

279

a1, a2, b1, b2 = s

277

a1, a2, b1, b2 = s

280

if stype != '!':

278

if stype != '!':

281

if stype == '~':

279

if stype == '~':

282

# The diff context lines are based on t1 content. When

280

# The diff context lines are based on t1 content. When

283

# blank lines are ignored, the new lines offsets must

281

# blank lines are ignored, the new lines offsets must

284

# be adjusted as if equivalent blocks ('~') had the

282

# be adjusted as if equivalent blocks ('~') had the

285

# same sizes on both sides.

283

# same sizes on both sides.

286

ignoredlines += (b2 - b1) - (a2 - a1)

284

ignoredlines += (b2 - b1) - (a2 - a1)

287

continue

285

continue

288

delta = []

286

delta = []

289

old = l1[a1:a2]

287

old = l1[a1:a2]

290

new = l2[b1:b2]

288

new = l2[b1:b2]

291

289

292

b1 -= ignoredlines

290

b1 -= ignoredlines

293

b2 -= ignoredlines

291

b2 -= ignoredlines

294

astart = contextstart(a1)

292

astart = contextstart(a1)

295

bstart = contextstart(b1)

293

bstart = contextstart(b1)

296

prev = None

294

prev = None

297

if hunk:

295

if hunk:

298

# join with the previous hunk if it falls inside the context

296

# join with the previous hunk if it falls inside the context

299

if astart < hunk[1] + opts.context + 1:

297

if astart < hunk[1] + opts.context + 1:

300

prev = hunk

298

prev = hunk

301

astart = hunk[1]

299

astart = hunk[1]

302

bstart = hunk[3]

300

bstart = hunk[3]

303

else:

301

else:

304

for x in yieldhunk(hunk):

302

for x in yieldhunk(hunk):

305

yield x

303

yield x

306

if prev:

304

if prev:

307

# we've joined the previous hunk, record the new ending points.

305

# we've joined the previous hunk, record the new ending points.

308

hunk[1] = a2

306

hunk[1] = a2

309

hunk[3] = b2

307

hunk[3] = b2

310

delta = hunk[4]

308

delta = hunk[4]

311

else:

309

else:

312

# create a new hunk

310

# create a new hunk

313

hunk = [astart, a2, bstart, b2, delta]

311

hunk = [astart, a2, bstart, b2, delta]

314

312

315

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

313

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

316

delta[len(delta):] = ['-' + x for x in old]

314

delta[len(delta):] = ['-' + x for x in old]

317

delta[len(delta):] = ['+' + x for x in new]

315

delta[len(delta):] = ['+' + x for x in new]

318

316

319

if hunk:

317

if hunk:

320

for x in yieldhunk(hunk):

318

for x in yieldhunk(hunk):

321

yield x

319

yield x

322

320

323

def b85diff(to, tn):

321

def b85diff(to, tn):

324

'''print base85-encoded binary diff'''

322

'''print base85-encoded binary diff'''

325

def fmtline(line):

323

def fmtline(line):

326

l = len(line)

324

l = len(line)

327

if l <= 26:

325

if l <= 26:

328

l = chr(ord('A') + l - 1)

326

l = chr(ord('A') + l - 1)

329

else:

327

else:

330

l = chr(l - 26 + ord('a') - 1)

328

l = chr(l - 26 + ord('a') - 1)

331

return '%c%s\n' % (l, base85.b85encode(line, True))

329

return '%c%s\n' % (l, base85.b85encode(line, True))

332

330

333

def chunk(text, csize=52):

331

def chunk(text, csize=52):

334

l = len(text)

332

l = len(text)

335

i = 0

333

i = 0

336

while i < l:

334

while i < l:

337

yield text[i:i + csize]

335

yield text[i:i + csize]

338

i += csize

336

i += csize

339

337

340

if to is None:

338

if to is None:

341

to = ''

339

to = ''

342

if tn is None:

340

if tn is None:

343

tn = ''

341

tn = ''

344

342

345

if to == tn:

343

if to == tn:

346

return ''

344

return ''

347

345

348

# TODO: deltas

346

# TODO: deltas

349

ret = []

347

ret = []

350

ret.append('GIT binary patch\n')

348

ret.append('GIT binary patch\n')

351

ret.append('literal %s\n' % len(tn))

349

ret.append('literal %s\n' % len(tn))

352

for l in chunk(zlib.compress(tn)):

350

for l in chunk(zlib.compress(tn)):

353

ret.append(fmtline(l))

351

ret.append(fmtline(l))

354

ret.append('\n')

352

ret.append('\n')

355

353

356

return ''.join(ret)

354

return ''.join(ret)

357

355

358

def patchtext(bin):

356

def patchtext(bin):

359

pos = 0

357

pos = 0

360

t = []

358

t = []

361

while pos < len(bin):

359

while pos < len(bin):

362

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

360

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

363

pos += 12

361

pos += 12

364

t.append(bin[pos:pos + l])

362

t.append(bin[pos:pos + l])

365

pos += l

363

pos += l

366

return "".join(t)

364

return "".join(t)

367

365

368

def patch(a, bin):

366

def patch(a, bin):

369

if len(a) == 0:

367

if len(a) == 0:

370

# skip over trivial delta header

368

# skip over trivial delta header

371

return util.buffer(bin, 12)

369

return util.buffer(bin, 12)

372

return mpatch.patches(a, [bin])

370

return mpatch.patches(a, [bin])

373

371

374

# similar to difflib.SequenceMatcher.get_matching_blocks

372

# similar to difflib.SequenceMatcher.get_matching_blocks

375

def get_matching_blocks(a, b):

373

def get_matching_blocks(a, b):

376

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

374

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

377

375

378

def trivialdiffheader(length):

376

def trivialdiffheader(length):

379

return struct.pack(">lll", 0, 0, length) if length else ''

377

return struct.pack(">lll", 0, 0, length) if length else ''

380

378

381

def replacediffheader(oldlen, newlen):

379

def replacediffheader(oldlen, newlen):

382

return struct.pack(">lll", 0, oldlen, newlen)

380

return struct.pack(">lll", 0, oldlen, newlen)

383

381

384

patches = mpatch.patches

382

patches = mpatch.patches

385

patchedsize = mpatch.patchedsize

383

patchedsize = mpatch.patchedsize

386

textdiff = bdiff.bdiff

384

textdiff = bdiff.bdiff

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import re
             import struct
             import zlib
             from .i18n import _
             from . import (
                 base85,
                 bdiff,
                 error,
                 mpatch,
                 util,
             )
             def splitnewlines(text):
                 '''like str.splitlines, but only split on newlines.'''
                 lines = [l + '\n' for l in text.split('\n')]
                 if lines:
                     if lines[-1] == '\n':
                         lines.pop()
                     else:
                         lines[-1] = lines[-1][:-1]
                 return lines
             class diffopts(object):
                 '''context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 nobinary ignores binary files
                 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 '''
                 defaults = {
                     'context': 3,
                     'text': False,
                     'showfunc': False,
                     'git': False,
                     'nodates': False,
                     'nobinary': False,
                     'noprefix': False,
                     'ignorews': False,
                     'ignorewsamount': False,
                     'ignoreblanklines': False,
                     'upgrade': False,
                     }
-                __slots__ = defaults.keys()
                 def __init__(self, **opts):
-                    for k in self.__slots__:
+                    for k in self.defaults.keys():
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, k, v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise error.Abort(_('diff context lines count must be '
                                            'an integer, not %r') % self.context)
                 def copy(self, **kwargs):
                     opts = dict((k, getattr(self, k)) for k in self.defaults)
                     opts.update(kwargs)
                     return diffopts(**opts)
             defaultopts = diffopts()
             def wsclean(opts, text, blank=True):
                 if opts.ignorews:
                     text = bdiff.fixws(text, 1)
                 elif opts.ignorewsamount:
                     text = bdiff.fixws(text, 0)
                 if blank and opts.ignoreblanklines:
                     text = re.sub('\n+', '\n', text).strip('\n')
                 return text
             def splitblock(base1, lines1, base2, lines2, opts):
                 # The input lines matches except for interwoven blank lines. We
                 # transform it into a sequence of matching blocks and blank blocks.
                 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
                 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
                 s1, e1 = 0, len(lines1)
                 s2, e2 = 0, len(lines2)
                 while s1 < e1 or s2 < e2:
                     i1, i2, btype = s1, s2, '='
                     if (i1 >= e1 or lines1[i1] == 0
                         or i2 >= e2 or lines2[i2] == 0):
                         # Consume the block of blank lines
                         btype = '~'
                         while i1 < e1 and lines1[i1] == 0:
                             i1 += 1
                         while i2 < e2 and lines2[i2] == 0:
                             i2 += 1
                     else:
                         # Consume the matching lines
                         while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
                             i1 += 1
                             i2 += 1
                     yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
                     s1 = i1
                     s2 = i2
             def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
                 """Return (block, type) tuples, where block is an mdiff.blocks
                 line entry. type is '=' for blocks matching exactly one another
                 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
                 matching only after having filtered blank lines. If refine is True,
                 then '~' blocks are refined and are only made of blank lines.
                 line1 and line2 are text1 and text2 split with splitnewlines() if
                 they are already available.
                 """
                 if opts is None:
                     opts = defaultopts
                 if opts.ignorews or opts.ignorewsamount:
                     text1 = wsclean(opts, text1, False)
                     text2 = wsclean(opts, text2, False)
                 diff = bdiff.blocks(text1, text2)
                 for i, s1 in enumerate(diff):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i - 1]
                     else:
                         s = [0, 0, 0, 0]
                     s = [s[1], s1[0], s[3], s1[2]]
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if s[0] != s[1] or s[2] != s[3]:
                         type = '!'
                         if opts.ignoreblanklines:
                             if lines1 is None:
                                 lines1 = splitnewlines(text1)
                             if lines2 is None:
                                 lines2 = splitnewlines(text2)
                             old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
                             new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
                             if old == new:
                                 type = '~'
                         yield s, type
                     yield s1, '='
             def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
                 def datetag(date, fn=None):
                     if not opts.git and not opts.nodates:
                         return '\t%s\n' % date
                     if fn and ' ' in fn:
                         return '\t\n'
                     return '\n'
                 if not a and not b:
                     return ""
                 if opts.noprefix:
                     aprefix = bprefix = ''
                 else:
                     aprefix = 'a/'
                     bprefix = 'b/'
                 epoch = util.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
                 if not opts.text and (util.binary(a) or util.binary(b)):
                     if a and b and len(a) == len(b) and a == b:
                         return ""
                     l = ['Binary file %s has changed\n' % fn1]
                 elif not a:
                     b = splitnewlines(b)
                     if a is None:
                         l1 = '--- /dev/null%s' % datetag(epoch)
                     else:
                         l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
                     l3 = "@@ -0,0 +1,%d @@\n" % len(b)
                     l = [l1, l2, l3] + ["+" + e for e in b]
                 elif not b:
                     a = splitnewlines(a)
                     l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     if b is None:
                         l2 = '+++ /dev/null%s' % datetag(epoch)
                     else:
                         l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
                     l3 = "@@ -1,%d +0,0 @@\n" % len(a)
                     l = [l1, l2, l3] + ["-" + e for e in a]
                 else:
                     al = splitnewlines(a)
                     bl = splitnewlines(b)
                     l = list(_unidiff(a, b, al, bl, opts=opts))
                     if not l:
                         return ""
                     l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))
                     l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))
                 for ln in xrange(len(l)):
                     if l[ln][-1] != '\n':
                         l[ln] += "\n\ No newline at end of file\n"
                 return "".join(l)
             # creates a headerless unified diff
             # t1 and t2 are the text to be diffed
             # l1 and l2 are the text broken up into lines
             def _unidiff(t1, t2, l1, l2, opts=defaultopts):
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, '']
                 def yieldhunk(hunk):
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = ""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in xrange(astart - 1, lastpos - 1, -1):
                             if l1[i][0].isalnum():
                                 func = ' ' + l1[i].rstrip()[:40]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
                     yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
                                                        bstart, blen, func)
                     for x in delta:
                         yield x
                     for x in xrange(a2, aend):
                         yield ' ' + l1[x]
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 hunk = None
                 ignoredlines = 0
                 for s, stype in allblocks(t1, t2, opts, l1, l2):
                     a1, a2, b1, b2 = s
                     if stype != '!':
                         if stype == '~':
                             # The diff context lines are based on t1 content. When
                             # blank lines are ignored, the new lines offsets must
                             # be adjusted as if equivalent blocks ('~') had the
                             # same sizes on both sides.
                             ignoredlines += (b2 - b1) - (a2 - a1)
                         continue
                     delta = []
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     b1 -= ignoredlines
                     b2 -= ignoredlines
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [astart, a2, bstart, b2, delta]
                     delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
                     delta[len(delta):] = ['-' + x for x in old]
                     delta[len(delta):] = ['+' + x for x in new]
                 if hunk:
                     for x in yieldhunk(hunk):
                         yield x
             def b85diff(to, tn):
                 '''print base85-encoded binary diff'''
                 def fmtline(line):
                     l = len(line)
                     if l <= 26:
                         l = chr(ord('A') + l - 1)
                     else:
                         l = chr(l - 26 + ord('a') - 1)
                     return '%c%s\n' % (l, base85.b85encode(line, True))
                 def chunk(text, csize=52):
                     l = len(text)
                     i = 0
                     while i < l:
                         yield text[i:i + csize]
                         i += csize
                 if to is None:
                     to = ''
                 if tn is None:
                     tn = ''
                 if to == tn:
                     return ''
                 # TODO: deltas
                 ret = []
                 ret.append('GIT binary patch\n')
                 ret.append('literal %s\n' % len(tn))
                 for l in chunk(zlib.compress(tn)):
                     ret.append(fmtline(l))
                 ret.append('\n')
                 return ''.join(ret)
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
                     pos += 12
                     t.append(bin[pos:pos + l])
                     pos += l
                 return "".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return util.buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
             def get_matching_blocks(a, b):
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
             def trivialdiffheader(length):
                 return struct.pack(">lll", 0, 0, length) if length else ''
             def replacediffheader(oldlen, newlen):
                 return struct.pack(">lll", 0, oldlen, newlen)
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff