upstream/mercurial-mirror Commit - r31269:5e7fd3a0

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import re

10

import re

11

import struct

11

import struct

12

import zlib

12

import zlib

13

14

from .i18n import _

14

from .i18n import _

15

from . import (

15

from . import (

16

base85,

16

base85,

17

bdiff,

17

bdiff,

18

error,

18

error,

19

mpatch,

19

mpatch,

20

util,

20

util,

21

)

21

)

22

23

def splitnewlines(text):

23

def splitnewlines(text):

24

'''like str.splitlines, but only split on newlines.'''

24

'''like str.splitlines, but only split on newlines.'''

25

lines = [l + '\n' for l in text.split('\n')]

25

lines = [l + '\n' for l in text.split('\n')]

26

if lines:

26

if lines:

27

if lines[-1] == '\n':

27

if lines[-1] == '\n':

28

lines.pop()

28

lines.pop()

29

else:

29

else:

30

lines[-1] = lines[-1][:-1]

30

lines[-1] = lines[-1][:-1]

31

return lines

31

return lines

32

33

class diffopts(object):

33

class diffopts(object):

34

'''context is the number of context lines

34

'''context is the number of context lines

35

text treats all files as text

35

text treats all files as text

36

showfunc enables diff -p output

36

showfunc enables diff -p output

37

git enables the git extended patch format

37

git enables the git extended patch format

38

nodates removes dates from diff headers

38

nodates removes dates from diff headers

39

nobinary ignores binary files

39

nobinary ignores binary files

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

41

ignorews ignores all whitespace changes in the diff

41

ignorews ignores all whitespace changes in the diff

42

ignorewsamount ignores changes in the amount of whitespace

42

ignorewsamount ignores changes in the amount of whitespace

43

ignoreblanklines ignores changes whose lines are all blank

43

ignoreblanklines ignores changes whose lines are all blank

44

upgrade generates git diffs to avoid data loss

44

upgrade generates git diffs to avoid data loss

45

'''

45

'''

46

47

defaults = {

47

defaults = {

48

'context': 3,

48

'context': 3,

49

'text': False,

49

'text': False,

50

'showfunc': False,

50

'showfunc': False,

51

'git': False,

51

'git': False,

52

'nodates': False,

52

'nodates': False,

53

'nobinary': False,

53

'nobinary': False,

54

'noprefix': False,

54

'noprefix': False,

55

'index': 0,

55

'index': 0,

56

'ignorews': False,

56

'ignorews': False,

57

'ignorewsamount': False,

57

'ignorewsamount': False,

58

'ignoreblanklines': False,

58

'ignoreblanklines': False,

59

'upgrade': False,

59

'upgrade': False,

60

'showsimilarity': False,

60

'showsimilarity': False,

61

}

61

}

62

63

def __init__(self, **opts):

63

def __init__(self, **opts):

64

for k in self.defaults.keys():

64

for k in self.defaults.keys():

65

v = opts.get(k)

65

v = opts.get(k)

66

if v is None:

66

if v is None:

67

v = self.defaults[k]

67

v = self.defaults[k]

68

setattr(self, k, v)

68

setattr(self, k, v)

69

70

try:

70

try:

71

self.context = int(self.context)

71

self.context = int(self.context)

72

except ValueError:

72

except ValueError:

73

raise error.Abort(_('diff context lines count must be '

73

raise error.Abort(_('diff context lines count must be '

74

'an integer, not %r') % self.context)

74

'an integer, not %r') % self.context)

75

76

def copy(self, **kwargs):

76

def copy(self, **kwargs):

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

78

opts.update(kwargs)

78

opts.update(kwargs)

79

return diffopts(**opts)

79

return diffopts(**opts)

80

81

defaultopts = diffopts()

81

defaultopts = diffopts()

82

83

def wsclean(opts, text, blank=True):

83

def wsclean(opts, text, blank=True):

84

if opts.ignorews:

84

if opts.ignorews:

85

text = bdiff.fixws(text, 1)

85

text = bdiff.fixws(text, 1)

86

elif opts.ignorewsamount:

86

elif opts.ignorewsamount:

87

text = bdiff.fixws(text, 0)

87

text = bdiff.fixws(text, 0)

88

if blank and opts.ignoreblanklines:

88

if blank and opts.ignoreblanklines:

89

text = re.sub('\n+', '\n', text).strip('\n')

89

text = re.sub('\n+', '\n', text).strip('\n')

90

return text

90

return text

91

92

def splitblock(base1, lines1, base2, lines2, opts):

92

def splitblock(base1, lines1, base2, lines2, opts):

93

# The input lines matches except for interwoven blank lines. We

93

# The input lines matches except for interwoven blank lines. We

94

# transform it into a sequence of matching blocks and blank blocks.

94

# transform it into a sequence of matching blocks and blank blocks.

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

97

s1, e1 = 0, len(lines1)

97

s1, e1 = 0, len(lines1)

98

s2, e2 = 0, len(lines2)

98

s2, e2 = 0, len(lines2)

99

while s1 < e1 or s2 < e2:

99

while s1 < e1 or s2 < e2:

100

i1, i2, btype = s1, s2, '='

100

i1, i2, btype = s1, s2, '='

101

if (i1 >= e1 or lines1[i1] == 0

101

if (i1 >= e1 or lines1[i1] == 0

102

or i2 >= e2 or lines2[i2] == 0):

102

or i2 >= e2 or lines2[i2] == 0):

103

# Consume the block of blank lines

103

# Consume the block of blank lines

104

btype = '~'

104

btype = '~'

105

while i1 < e1 and lines1[i1] == 0:

105

while i1 < e1 and lines1[i1] == 0:

106

i1 += 1

106

i1 += 1

107

while i2 < e2 and lines2[i2] == 0:

107

while i2 < e2 and lines2[i2] == 0:

108

i2 += 1

108

i2 += 1

109

else:

109

else:

110

# Consume the matching lines

110

# Consume the matching lines

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

112

i1 += 1

112

i1 += 1

113

i2 += 1

113

i2 += 1

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

115

s1 = i1

115

s1 = i1

116

s2 = i2

116

s2 = i2

117

118

def blocksinrange(blocks, rangeb):

118

def blocksinrange(blocks, rangeb):

119

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

119

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

120

`rangeb` from ``(b1, b2)`` point of view.

120

`rangeb` from ``(b1, b2)`` point of view.

121

122

Return `filteredblocks, rangea` where:

122

Return `filteredblocks, rangea` where:

123

124

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

124

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

125

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

125

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

126

block ``(b1, b2)`` being inside `rangeb` if

126

block ``(b1, b2)`` being inside `rangeb` if

127

``rangeb[0] < b2 and b1 < rangeb[1]``;

127

``rangeb[0] < b2 and b1 < rangeb[1]``;

128

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

128

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

129

"""

129

"""

130

lbb, ubb = rangeb

130

lbb, ubb = rangeb

131

lba, uba = None, None

131

lba, uba = None, None

132

filteredblocks = []

132

filteredblocks = []

133

for block in blocks:

133

for block in blocks:

134

(a1, a2, b1, b2), stype = block

134

(a1, a2, b1, b2), stype = block

135

if lbb >= b1 and ubb <= b2 and stype == '=':

135

if lbb >= b1 and ubb <= b2 and stype == '=':

136

# rangeb is within a single "=" hunk, restrict back linerange1

136

# rangeb is within a single "=" hunk, restrict back linerange1

137

# by offsetting rangeb

137

# by offsetting rangeb

138

lba = lbb - b1 + a1

138

lba = lbb - b1 + a1

139

uba = ubb - b1 + a1

139

uba = ubb - b1 + a1

140

else:

140

else:

141

if b1 <= lbb < b2:

141

if b1 <= lbb < b2:

142

if stype == '=':

142

if stype == '=':

143

lba = a2 - (b2 - lbb)

143

lba = a2 - (b2 - lbb)

144

else:

144

else:

145

lba = a1

145

lba = a1

146

if b1 < ubb <= b2:

146

if b1 < ubb <= b2:

147

if stype == '=':

147

if stype == '=':

148

uba = a1 + (ubb - b1)

148

uba = a1 + (ubb - b1)

149

else:

149

else:

150

uba = a2

150

uba = a2

151

if lbb < b2 and b1 < ubb:

151

if lbb < b2 and b1 < ubb:

152

filteredblocks.append(block)

152

filteredblocks.append(block)

153

if lba is None or uba is None or uba < lba:

153

if lba is None or uba is None or uba < lba:

154

raise error.Abort(_('line range exceeds file size'))

154

raise error.Abort(_('line range exceeds file size'))

155

return filteredblocks, (lba, uba)

155

return filteredblocks, (lba, uba)

156

157

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

157

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

158

"""Return (block, type) tuples, where block is an mdiff.blocks

158

"""Return (block, type) tuples, where block is an mdiff.blocks

159

line entry. type is '=' for blocks matching exactly one another

159

line entry. type is '=' for blocks matching exactly one another

160

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

160

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

161

matching only after having filtered blank lines.

161

matching only after having filtered blank lines.

162

line1 and line2 are text1 and text2 split with splitnewlines() if

162

line1 and line2 are text1 and text2 split with splitnewlines() if

163

they are already available.

163

they are already available.

164

"""

164

"""

165

if opts is None:

165

if opts is None:

166

opts = defaultopts

166

opts = defaultopts

167

if opts.ignorews or opts.ignorewsamount:

167

if opts.ignorews or opts.ignorewsamount:

168

text1 = wsclean(opts, text1, False)

168

text1 = wsclean(opts, text1, False)

169

text2 = wsclean(opts, text2, False)

169

text2 = wsclean(opts, text2, False)

170

diff = bdiff.blocks(text1, text2)

170

diff = bdiff.blocks(text1, text2)

171

for i, s1 in enumerate(diff):

171

for i, s1 in enumerate(diff):

172

# The first match is special.

172

# The first match is special.

173

# we've either found a match starting at line 0 or a match later

173

# we've either found a match starting at line 0 or a match later

174

# in the file. If it starts later, old and new below will both be

174

# in the file. If it starts later, old and new below will both be

175

# empty and we'll continue to the next match.

175

# empty and we'll continue to the next match.

176

if i > 0:

176

if i > 0:

177

s = diff[i - 1]

177

s = diff[i - 1]

178

else:

178

else:

179

s = [0, 0, 0, 0]

179

s = [0, 0, 0, 0]

180

s = [s[1], s1[0], s[3], s1[2]]

180

s = [s[1], s1[0], s[3], s1[2]]

181

182

# bdiff sometimes gives huge matches past eof, this check eats them,

182

# bdiff sometimes gives huge matches past eof, this check eats them,

183

# and deals with the special first match case described above

183

# and deals with the special first match case described above

184

if s[0] != s[1] or s[2] != s[3]:

184

if s[0] != s[1] or s[2] != s[3]:

185

type = '!'

185

type = '!'

186

if opts.ignoreblanklines:

186

if opts.ignoreblanklines:

187

if lines1 is None:

187

if lines1 is None:

188

lines1 = splitnewlines(text1)

188

lines1 = splitnewlines(text1)

189

if lines2 is None:

189

if lines2 is None:

190

lines2 = splitnewlines(text2)

190

lines2 = splitnewlines(text2)

191

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

191

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

192

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

192

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

193

if old == new:

193

if old == new:

194

type = '~'

194

type = '~'

195

yield s, type

195

yield s, type

196

yield s1, '='

196

yield s1, '='

197

198

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

198

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

199

def datetag(date, fn=None):

199

def datetag(date, fn=None):

200

if not opts.git and not opts.nodates:

200

if not opts.git and not opts.nodates:

201

return '\t%s\n' % date

201

return '\t%s\n' % date

202

if fn and ' ' in fn:

202

if fn and ' ' in fn:

203

return '\t\n'

203

return '\t\n'

204

return '\n'

204

return '\n'

205

206

if not a and not b:

206

if not a and not b:

207

return ""

207

return ""

208

209

if opts.noprefix:

209

if opts.noprefix:

210

aprefix = bprefix = ''

210

aprefix = bprefix = ''

211

else:

211

else:

212

aprefix = 'a/'

212

aprefix = 'a/'

213

bprefix = 'b/'

213

bprefix = 'b/'

214

215

epoch = util.datestr((0, 0))

215

epoch = util.datestr((0, 0))

216

217

fn1 = util.pconvert(fn1)

217

fn1 = util.pconvert(fn1)

218

fn2 = util.pconvert(fn2)

218

fn2 = util.pconvert(fn2)

219

220

if not opts.text and (util.binary(a) or util.binary(b)):

220

if not opts.text and (util.binary(a) or util.binary(b)):

221

if a and b and len(a) == len(b) and a == b:

221

if a and b and len(a) == len(b) and a == b:

222

return ""

222

return ""

223

l = ['Binary file %s has changed\n' % fn1]

223

l = ['Binary file %s has changed\n' % fn1]

224

elif not a:

224

elif not a:

225

b = splitnewlines(b)

225

b = splitnewlines(b)

226

if a is None:

226

if a is None:

227

l1 = '--- /dev/null%s' % datetag(epoch)

227

l1 = '--- /dev/null%s' % datetag(epoch)

228

else:

228

else:

229

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

229

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

230

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

230

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

231

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

231

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

232

l = [l1, l2, l3] + ["+" + e for e in b]

232

l = [l1, l2, l3] + ["+" + e for e in b]

233

elif not b:

233

elif not b:

234

a = splitnewlines(a)

234

a = splitnewlines(a)

235

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

235

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

236

if b is None:

236

if b is None:

237

l2 = '+++ /dev/null%s' % datetag(epoch)

237

l2 = '+++ /dev/null%s' % datetag(epoch)

238

else:

238

else:

239

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

239

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

240

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

240

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

241

l = [l1, l2, l3] + ["-" + e for e in a]

241

l = [l1, l2, l3] + ["-" + e for e in a]

242

else:

242

else:

243

l = ~~list~~(_unidiff(a, b, opts=opts))

243

l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])

244

if not l:

244

if not l:

245

return ""

245

return ""

246

247

l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))

247

l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))

248

l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))

248

l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))

249

250

for ln in xrange(len(l)):

250

for ln in xrange(len(l)):

251

if l[ln][-1] != '\n':

251

if l[ln][-1] != '\n':

252

l[ln] += "\n\ No newline at end of file\n"

252

l[ln] += "\n\ No newline at end of file\n"

253

254

return "".join(l)

254

return "".join(l)

255

256

def _unidiff(t1, t2, opts=defaultopts):

256

def _unidiff(t1, t2, opts=defaultopts):

257

"""Yield hunks of a headerless unified diff from t1 and t2 texts.~~"""~~

257

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

258

259

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

260

tuple (s1, l1, s2, l2) representing the range information of the hunk to

261

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

262

of the hunk combining said header followed by line additions and

263

deletions.

264

"""

258

l1 = splitnewlines(t1)

265

l1 = splitnewlines(t1)

259

l2 = splitnewlines(t2)

266

l2 = splitnewlines(t2)

260

def contextend(l, len):

267

def contextend(l, len):

261

ret = l + opts.context

268

ret = l + opts.context

262

if ret > len:

269

if ret > len:

263

ret = len

270

ret = len

264

return ret

271

return ret

265

272

266

def contextstart(l):

273

def contextstart(l):

267

ret = l - opts.context

274

ret = l - opts.context

268

if ret < 0:

275

if ret < 0:

269

return 0

276

return 0

270

return ret

277

return ret

271

278

272

lastfunc = [0, '']

279

lastfunc = [0, '']

273

def yieldhunk(hunk):

280

def yieldhunk(hunk):

274

(astart, a2, bstart, b2, delta) = hunk

281

(astart, a2, bstart, b2, delta) = hunk

275

aend = contextend(a2, len(l1))

282

aend = contextend(a2, len(l1))

276

alen = aend - astart

283

alen = aend - astart

277

blen = b2 - bstart + aend - a2

284

blen = b2 - bstart + aend - a2

278

285

279

func = ""

286

func = ""

280

if opts.showfunc:

287

if opts.showfunc:

281

lastpos, func = lastfunc

288

lastpos, func = lastfunc

282

# walk backwards from the start of the context up to the start of

289

# walk backwards from the start of the context up to the start of

283

# the previous hunk context until we find a line starting with an

290

# the previous hunk context until we find a line starting with an

284

# alphanumeric char.

291

# alphanumeric char.

285

for i in xrange(astart - 1, lastpos - 1, -1):

292

for i in xrange(astart - 1, lastpos - 1, -1):

286

if l1[i][0].isalnum():

293

if l1[i][0].isalnum():

287

func = ' ' + l1[i].rstrip()[:40]

294

func = ' ' + l1[i].rstrip()[:40]

288

lastfunc[1] = func

295

lastfunc[1] = func

289

break

296

break

290

# by recording this hunk's starting point as the next place to

297

# by recording this hunk's starting point as the next place to

291

# start looking for function lines, we avoid reading any line in

298

# start looking for function lines, we avoid reading any line in

292

# the file more than once.

299

# the file more than once.

293

lastfunc[0] = astart

300

lastfunc[0] = astart

294

301

295

# zero-length hunk ranges report their start line as one less

302

# zero-length hunk ranges report their start line as one less

296

if alen:

303

if alen:

297

astart += 1

304

astart += 1

298

if blen:

305

if blen:

299

bstart += 1

306

bstart += 1

300

307

301

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

308

hunkrange = astart, alen, bstart, blen

302

bstart, blen, func)

309

hunklines = (

303

for x in delta:

310

["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

304

~~yield~~ x

311

+ delta

305

for x in xrange(a2, aend):

312

+ [' ' + l1[x] for x in xrange(a2, aend)]

306

yield ' ' + l1[x]

313

)

314

yield hunkrange, hunklines

307

315

308

# bdiff.blocks gives us the matching sequences in the files. The loop

316

# bdiff.blocks gives us the matching sequences in the files. The loop

309

# below finds the spaces between those matching sequences and translates

317

# below finds the spaces between those matching sequences and translates

310

# them into diff output.

318

# them into diff output.

311

#

319

#

312

hunk = None

320

hunk = None

313

ignoredlines = 0

321

ignoredlines = 0

314

for s, stype in allblocks(t1, t2, opts, l1, l2):

322

for s, stype in allblocks(t1, t2, opts, l1, l2):

315

a1, a2, b1, b2 = s

323

a1, a2, b1, b2 = s

316

if stype != '!':

324

if stype != '!':

317

if stype == '~':

325

if stype == '~':

318

# The diff context lines are based on t1 content. When

326

# The diff context lines are based on t1 content. When

319

# blank lines are ignored, the new lines offsets must

327

# blank lines are ignored, the new lines offsets must

320

# be adjusted as if equivalent blocks ('~') had the

328

# be adjusted as if equivalent blocks ('~') had the

321

# same sizes on both sides.

329

# same sizes on both sides.

322

ignoredlines += (b2 - b1) - (a2 - a1)

330

ignoredlines += (b2 - b1) - (a2 - a1)

323

continue

331

continue

324

delta = []

332

delta = []

325

old = l1[a1:a2]

333

old = l1[a1:a2]

326

new = l2[b1:b2]

334

new = l2[b1:b2]

327

335

328

b1 -= ignoredlines

336

b1 -= ignoredlines

329

b2 -= ignoredlines

337

b2 -= ignoredlines

330

astart = contextstart(a1)

338

astart = contextstart(a1)

331

bstart = contextstart(b1)

339

bstart = contextstart(b1)

332

prev = None

340

prev = None

333

if hunk:

341

if hunk:

334

# join with the previous hunk if it falls inside the context

342

# join with the previous hunk if it falls inside the context

335

if astart < hunk[1] + opts.context + 1:

343

if astart < hunk[1] + opts.context + 1:

336

prev = hunk

344

prev = hunk

337

astart = hunk[1]

345

astart = hunk[1]

338

bstart = hunk[3]

346

bstart = hunk[3]

339

else:

347

else:

340

for x in yieldhunk(hunk):

348

for x in yieldhunk(hunk):

341

yield x

349

yield x

342

if prev:

350

if prev:

343

# we've joined the previous hunk, record the new ending points.

351

# we've joined the previous hunk, record the new ending points.

344

hunk[1] = a2

352

hunk[1] = a2

345

hunk[3] = b2

353

hunk[3] = b2

346

delta = hunk[4]

354

delta = hunk[4]

347

else:

355

else:

348

# create a new hunk

356

# create a new hunk

349

hunk = [astart, a2, bstart, b2, delta]

357

hunk = [astart, a2, bstart, b2, delta]

350

358

351

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

359

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

352

delta[len(delta):] = ['-' + x for x in old]

360

delta[len(delta):] = ['-' + x for x in old]

353

delta[len(delta):] = ['+' + x for x in new]

361

delta[len(delta):] = ['+' + x for x in new]

354

362

355

if hunk:

363

if hunk:

356

for x in yieldhunk(hunk):

364

for x in yieldhunk(hunk):

357

yield x

365

yield x

358

366

359

def b85diff(to, tn):

367

def b85diff(to, tn):

360

'''print base85-encoded binary diff'''

368

'''print base85-encoded binary diff'''

361

def fmtline(line):

369

def fmtline(line):

362

l = len(line)

370

l = len(line)

363

if l <= 26:

371

if l <= 26:

364

l = chr(ord('A') + l - 1)

372

l = chr(ord('A') + l - 1)

365

else:

373

else:

366

l = chr(l - 26 + ord('a') - 1)

374

l = chr(l - 26 + ord('a') - 1)

367

return '%c%s\n' % (l, base85.b85encode(line, True))

375

return '%c%s\n' % (l, base85.b85encode(line, True))

368

376

369

def chunk(text, csize=52):

377

def chunk(text, csize=52):

370

l = len(text)

378

l = len(text)

371

i = 0

379

i = 0

372

while i < l:

380

while i < l:

373

yield text[i:i + csize]

381

yield text[i:i + csize]

374

i += csize

382

i += csize

375

383

376

if to is None:

384

if to is None:

377

to = ''

385

to = ''

378

if tn is None:

386

if tn is None:

379

tn = ''

387

tn = ''

380

388

381

if to == tn:

389

if to == tn:

382

return ''

390

return ''

383

391

384

# TODO: deltas

392

# TODO: deltas

385

ret = []

393

ret = []

386

ret.append('GIT binary patch\n')

394

ret.append('GIT binary patch\n')

387

ret.append('literal %s\n' % len(tn))

395

ret.append('literal %s\n' % len(tn))

388

for l in chunk(zlib.compress(tn)):

396

for l in chunk(zlib.compress(tn)):

389

ret.append(fmtline(l))

397

ret.append(fmtline(l))

390

ret.append('\n')

398

ret.append('\n')

391

399

392

return ''.join(ret)

400

return ''.join(ret)

393

401

394

def patchtext(bin):

402

def patchtext(bin):

395

pos = 0

403

pos = 0

396

t = []

404

t = []

397

while pos < len(bin):

405

while pos < len(bin):

398

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

406

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

399

pos += 12

407

pos += 12

400

t.append(bin[pos:pos + l])

408

t.append(bin[pos:pos + l])

401

pos += l

409

pos += l

402

return "".join(t)

410

return "".join(t)

403

411

404

def patch(a, bin):

412

def patch(a, bin):

405

if len(a) == 0:

413

if len(a) == 0:

406

# skip over trivial delta header

414

# skip over trivial delta header

407

return util.buffer(bin, 12)

415

return util.buffer(bin, 12)

408

return mpatch.patches(a, [bin])

416

return mpatch.patches(a, [bin])

409

417

410

# similar to difflib.SequenceMatcher.get_matching_blocks

418

# similar to difflib.SequenceMatcher.get_matching_blocks

411

def get_matching_blocks(a, b):

419

def get_matching_blocks(a, b):

412

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

420

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

413

421

414

def trivialdiffheader(length):

422

def trivialdiffheader(length):

415

return struct.pack(">lll", 0, 0, length) if length else ''

423

return struct.pack(">lll", 0, 0, length) if length else ''

416

424

417

def replacediffheader(oldlen, newlen):

425

def replacediffheader(oldlen, newlen):

418

return struct.pack(">lll", 0, oldlen, newlen)

426

return struct.pack(">lll", 0, oldlen, newlen)

419

427

420

patches = mpatch.patches

428

patches = mpatch.patches

421

patchedsize = mpatch.patchedsize

429

patchedsize = mpatch.patchedsize

422

textdiff = bdiff.bdiff

430

textdiff = bdiff.bdiff

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import re
             import struct
             import zlib
             from .i18n import _
             from . import (
                 base85,
                 bdiff,
                 error,
                 mpatch,
                 util,
             )
             def splitnewlines(text):
                 '''like str.splitlines, but only split on newlines.'''
                 lines = [l + '\n' for l in text.split('\n')]
                 if lines:
                     if lines[-1] == '\n':
                         lines.pop()
                     else:
                         lines[-1] = lines[-1][:-1]
                 return lines
             class diffopts(object):
                 '''context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 nobinary ignores binary files
                 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 '''
                 defaults = {
                     'context': 3,
                     'text': False,
                     'showfunc': False,
                     'git': False,
                     'nodates': False,
                     'nobinary': False,
                     'noprefix': False,
                     'index': 0,
                     'ignorews': False,
                     'ignorewsamount': False,
                     'ignoreblanklines': False,
                     'upgrade': False,
                     'showsimilarity': False,
                     }
                 def __init__(self, **opts):
                     for k in self.defaults.keys():
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, k, v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise error.Abort(_('diff context lines count must be '
                                            'an integer, not %r') % self.context)
                 def copy(self, **kwargs):
                     opts = dict((k, getattr(self, k)) for k in self.defaults)
                     opts.update(kwargs)
                     return diffopts(**opts)
             defaultopts = diffopts()
             def wsclean(opts, text, blank=True):
                 if opts.ignorews:
                     text = bdiff.fixws(text, 1)
                 elif opts.ignorewsamount:
                     text = bdiff.fixws(text, 0)
                 if blank and opts.ignoreblanklines:
                     text = re.sub('\n+', '\n', text).strip('\n')
                 return text
             def splitblock(base1, lines1, base2, lines2, opts):
                 # The input lines matches except for interwoven blank lines. We
                 # transform it into a sequence of matching blocks and blank blocks.
                 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
                 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
                 s1, e1 = 0, len(lines1)
                 s2, e2 = 0, len(lines2)
                 while s1 < e1 or s2 < e2:
                     i1, i2, btype = s1, s2, '='
                     if (i1 >= e1 or lines1[i1] == 0
                         or i2 >= e2 or lines2[i2] == 0):
                         # Consume the block of blank lines
                         btype = '~'
                         while i1 < e1 and lines1[i1] == 0:
                             i1 += 1
                         while i2 < e2 and lines2[i2] == 0:
                             i2 += 1
                     else:
                         # Consume the matching lines
                         while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
                             i1 += 1
                             i2 += 1
                     yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
                     s1 = i1
                     s2 = i2
             def blocksinrange(blocks, rangeb):
                 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
                 `rangeb` from ``(b1, b2)`` point of view.
                 Return `filteredblocks, rangea` where:
                 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
                   `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
                   block ``(b1, b2)`` being inside `rangeb` if
                   ``rangeb[0] < b2 and b1 < rangeb[1]``;
                 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
                 """
                 lbb, ubb = rangeb
                 lba, uba = None, None
                 filteredblocks = []
                 for block in blocks:
                     (a1, a2, b1, b2), stype = block
                     if lbb >= b1 and ubb <= b2 and stype == '=':
                         # rangeb is within a single "=" hunk, restrict back linerange1
                         # by offsetting rangeb
                         lba = lbb - b1 + a1
                         uba = ubb - b1 + a1
                     else:
                         if b1 <= lbb < b2:
                             if stype == '=':
                                 lba = a2 - (b2 - lbb)
                             else:
                                 lba = a1
                         if b1 < ubb <= b2:
                             if stype == '=':
                                 uba = a1 + (ubb - b1)
                             else:
                                 uba = a2
                     if lbb < b2 and b1 < ubb:
                         filteredblocks.append(block)
                 if lba is None or uba is None or uba < lba:
                     raise error.Abort(_('line range exceeds file size'))
                 return filteredblocks, (lba, uba)
             def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
                 """Return (block, type) tuples, where block is an mdiff.blocks
                 line entry. type is '=' for blocks matching exactly one another
                 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
                 matching only after having filtered blank lines.
                 line1 and line2 are text1 and text2 split with splitnewlines() if
                 they are already available.
                 """
                 if opts is None:
                     opts = defaultopts
                 if opts.ignorews or opts.ignorewsamount:
                     text1 = wsclean(opts, text1, False)
                     text2 = wsclean(opts, text2, False)
                 diff = bdiff.blocks(text1, text2)
                 for i, s1 in enumerate(diff):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i - 1]
                     else:
                         s = [0, 0, 0, 0]
                     s = [s[1], s1[0], s[3], s1[2]]
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if s[0] != s[1] or s[2] != s[3]:
                         type = '!'
                         if opts.ignoreblanklines:
                             if lines1 is None:
                                 lines1 = splitnewlines(text1)
                             if lines2 is None:
                                 lines2 = splitnewlines(text2)
                             old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
                             new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
                             if old == new:
                                 type = '~'
                         yield s, type
                     yield s1, '='
             def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
                 def datetag(date, fn=None):
                     if not opts.git and not opts.nodates:
                         return '\t%s\n' % date
                     if fn and ' ' in fn:
                         return '\t\n'
                     return '\n'
                 if not a and not b:
                     return ""
                 if opts.noprefix:
                     aprefix = bprefix = ''
                 else:
                     aprefix = 'a/'
                     bprefix = 'b/'
                 epoch = util.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
                 if not opts.text and (util.binary(a) or util.binary(b)):
                     if a and b and len(a) == len(b) and a == b:
                         return ""
                     l = ['Binary file %s has changed\n' % fn1]
                 elif not a:
                     b = splitnewlines(b)
                     if a is None:
                         l1 = '--- /dev/null%s' % datetag(epoch)
                     else:
                         l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
                     l3 = "@@ -0,0 +1,%d @@\n" % len(b)
                     l = [l1, l2, l3] + ["+" + e for e in b]
                 elif not b:
                     a = splitnewlines(a)
                     l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     if b is None:
                         l2 = '+++ /dev/null%s' % datetag(epoch)
                     else:
                         l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
                     l3 = "@@ -1,%d +0,0 @@\n" % len(a)
                     l = [l1, l2, l3] + ["-" + e for e in a]
                 else:
-                    l = list(_unidiff(a, b, opts=opts))
+                    l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])
                     if not l:
                         return ""
                     l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))
                     l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))
                 for ln in xrange(len(l)):
                     if l[ln][-1] != '\n':
                         l[ln] += "\n\ No newline at end of file\n"
                 return "".join(l)
             def _unidiff(t1, t2, opts=defaultopts):
-                """Yield hunks of a headerless unified diff from t1 and t2 texts."""
+                """Yield hunks of a headerless unified diff from t1 and t2 texts.
+                Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
+                tuple (s1, l1, s2, l2) representing the range information of the hunk to
+                form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
+                of the hunk combining said header followed by line additions and
+                deletions.
+                """
                 l1 = splitnewlines(t1)
                 l2 = splitnewlines(t2)
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, '']
                 def yieldhunk(hunk):
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = ""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in xrange(astart - 1, lastpos - 1, -1):
                             if l1[i][0].isalnum():
                                 func = ' ' + l1[i].rstrip()[:40]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
-                    yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
+                    hunkrange = astart, alen, bstart, blen
-                                                       bstart, blen, func)
+                    hunklines = (
-                    for x in delta:
+                        ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
-                        yield x
+                        + delta
-                    for x in xrange(a2, aend):
+                        + [' ' + l1[x] for x in xrange(a2, aend)]
-                        yield ' ' + l1[x]
+                    yield hunkrange, hunklines
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 hunk = None
                 ignoredlines = 0
                 for s, stype in allblocks(t1, t2, opts, l1, l2):
                     a1, a2, b1, b2 = s
                     if stype != '!':
                         if stype == '~':
                             # The diff context lines are based on t1 content. When
                             # blank lines are ignored, the new lines offsets must
                             # be adjusted as if equivalent blocks ('~') had the
                             # same sizes on both sides.
                             ignoredlines += (b2 - b1) - (a2 - a1)
                         continue
                     delta = []
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     b1 -= ignoredlines
                     b2 -= ignoredlines
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [astart, a2, bstart, b2, delta]
                     delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
                     delta[len(delta):] = ['-' + x for x in old]
                     delta[len(delta):] = ['+' + x for x in new]
                 if hunk:
                     for x in yieldhunk(hunk):
                         yield x
             def b85diff(to, tn):
                 '''print base85-encoded binary diff'''
                 def fmtline(line):
                     l = len(line)
                     if l <= 26:
                         l = chr(ord('A') + l - 1)
                     else:
                         l = chr(l - 26 + ord('a') - 1)
                     return '%c%s\n' % (l, base85.b85encode(line, True))
                 def chunk(text, csize=52):
                     l = len(text)
                     i = 0
                     while i < l:
                         yield text[i:i + csize]
                         i += csize
                 if to is None:
                     to = ''
                 if tn is None:
                     tn = ''
                 if to == tn:
                     return ''
                 # TODO: deltas
                 ret = []
                 ret.append('GIT binary patch\n')
                 ret.append('literal %s\n' % len(tn))
                 for l in chunk(zlib.compress(tn)):
                     ret.append(fmtline(l))
                 ret.append('\n')
                 return ''.join(ret)
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
                     pos += 12
                     t.append(bin[pos:pos + l])
                     pos += l
                 return "".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return util.buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
             def get_matching_blocks(a, b):
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
             def trivialdiffheader(length):
                 return struct.pack(">lll", 0, 0, length) if length else ''
             def replacediffheader(oldlen, newlen):
                 return struct.pack(">lll", 0, oldlen, newlen)
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff