upstream/mercurial-mirror Commit - r31267:881ed6a4

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import re

10

import re

11

import struct

11

import struct

12

import zlib

12

import zlib

13

14

from .i18n import _

14

from .i18n import _

15

from . import (

15

from . import (

16

base85,

16

base85,

17

bdiff,

17

bdiff,

18

error,

18

error,

19

mpatch,

19

mpatch,

20

util,

20

util,

21

)

21

)

22

23

def splitnewlines(text):

23

def splitnewlines(text):

24

'''like str.splitlines, but only split on newlines.'''

24

'''like str.splitlines, but only split on newlines.'''

25

lines = [l + '\n' for l in text.split('\n')]

25

lines = [l + '\n' for l in text.split('\n')]

26

if lines:

26

if lines:

27

if lines[-1] == '\n':

27

if lines[-1] == '\n':

28

lines.pop()

28

lines.pop()

29

else:

29

else:

30

lines[-1] = lines[-1][:-1]

30

lines[-1] = lines[-1][:-1]

31

return lines

31

return lines

32

33

class diffopts(object):

33

class diffopts(object):

34

'''context is the number of context lines

34

'''context is the number of context lines

35

text treats all files as text

35

text treats all files as text

36

showfunc enables diff -p output

36

showfunc enables diff -p output

37

git enables the git extended patch format

37

git enables the git extended patch format

38

nodates removes dates from diff headers

38

nodates removes dates from diff headers

39

nobinary ignores binary files

39

nobinary ignores binary files

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

41

ignorews ignores all whitespace changes in the diff

41

ignorews ignores all whitespace changes in the diff

42

ignorewsamount ignores changes in the amount of whitespace

42

ignorewsamount ignores changes in the amount of whitespace

43

ignoreblanklines ignores changes whose lines are all blank

43

ignoreblanklines ignores changes whose lines are all blank

44

upgrade generates git diffs to avoid data loss

44

upgrade generates git diffs to avoid data loss

45

'''

45

'''

46

47

defaults = {

47

defaults = {

48

'context': 3,

48

'context': 3,

49

'text': False,

49

'text': False,

50

'showfunc': False,

50

'showfunc': False,

51

'git': False,

51

'git': False,

52

'nodates': False,

52

'nodates': False,

53

'nobinary': False,

53

'nobinary': False,

54

'noprefix': False,

54

'noprefix': False,

55

'index': 0,

55

'index': 0,

56

'ignorews': False,

56

'ignorews': False,

57

'ignorewsamount': False,

57

'ignorewsamount': False,

58

'ignoreblanklines': False,

58

'ignoreblanklines': False,

59

'upgrade': False,

59

'upgrade': False,

60

'showsimilarity': False,

60

'showsimilarity': False,

61

}

61

}

62

63

def __init__(self, **opts):

63

def __init__(self, **opts):

64

for k in self.defaults.keys():

64

for k in self.defaults.keys():

65

v = opts.get(k)

65

v = opts.get(k)

66

if v is None:

66

if v is None:

67

v = self.defaults[k]

67

v = self.defaults[k]

68

setattr(self, k, v)

68

setattr(self, k, v)

69

70

try:

70

try:

71

self.context = int(self.context)

71

self.context = int(self.context)

72

except ValueError:

72

except ValueError:

73

raise error.Abort(_('diff context lines count must be '

73

raise error.Abort(_('diff context lines count must be '

74

'an integer, not %r') % self.context)

74

'an integer, not %r') % self.context)

75

76

def copy(self, **kwargs):

76

def copy(self, **kwargs):

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

78

opts.update(kwargs)

78

opts.update(kwargs)

79

return diffopts(**opts)

79

return diffopts(**opts)

80

81

defaultopts = diffopts()

81

defaultopts = diffopts()

82

83

def wsclean(opts, text, blank=True):

83

def wsclean(opts, text, blank=True):

84

if opts.ignorews:

84

if opts.ignorews:

85

text = bdiff.fixws(text, 1)

85

text = bdiff.fixws(text, 1)

86

elif opts.ignorewsamount:

86

elif opts.ignorewsamount:

87

text = bdiff.fixws(text, 0)

87

text = bdiff.fixws(text, 0)

88

if blank and opts.ignoreblanklines:

88

if blank and opts.ignoreblanklines:

89

text = re.sub('\n+', '\n', text).strip('\n')

89

text = re.sub('\n+', '\n', text).strip('\n')

90

return text

90

return text

91

92

def splitblock(base1, lines1, base2, lines2, opts):

92

def splitblock(base1, lines1, base2, lines2, opts):

93

# The input lines matches except for interwoven blank lines. We

93

# The input lines matches except for interwoven blank lines. We

94

# transform it into a sequence of matching blocks and blank blocks.

94

# transform it into a sequence of matching blocks and blank blocks.

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

97

s1, e1 = 0, len(lines1)

97

s1, e1 = 0, len(lines1)

98

s2, e2 = 0, len(lines2)

98

s2, e2 = 0, len(lines2)

99

while s1 < e1 or s2 < e2:

99

while s1 < e1 or s2 < e2:

100

i1, i2, btype = s1, s2, '='

100

i1, i2, btype = s1, s2, '='

101

if (i1 >= e1 or lines1[i1] == 0

101

if (i1 >= e1 or lines1[i1] == 0

102

or i2 >= e2 or lines2[i2] == 0):

102

or i2 >= e2 or lines2[i2] == 0):

103

# Consume the block of blank lines

103

# Consume the block of blank lines

104

btype = '~'

104

btype = '~'

105

while i1 < e1 and lines1[i1] == 0:

105

while i1 < e1 and lines1[i1] == 0:

106

i1 += 1

106

i1 += 1

107

while i2 < e2 and lines2[i2] == 0:

107

while i2 < e2 and lines2[i2] == 0:

108

i2 += 1

108

i2 += 1

109

else:

109

else:

110

# Consume the matching lines

110

# Consume the matching lines

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

112

i1 += 1

112

i1 += 1

113

i2 += 1

113

i2 += 1

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

115

s1 = i1

115

s1 = i1

116

s2 = i2

116

s2 = i2

117

118

def blocksinrange(blocks, rangeb):

118

def blocksinrange(blocks, rangeb):

119

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

119

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

120

`rangeb` from ``(b1, b2)`` point of view.

120

`rangeb` from ``(b1, b2)`` point of view.

121

122

Return `filteredblocks, rangea` where:

122

Return `filteredblocks, rangea` where:

123

124

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

124

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

125

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

125

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

126

block ``(b1, b2)`` being inside `rangeb` if

126

block ``(b1, b2)`` being inside `rangeb` if

127

``rangeb[0] < b2 and b1 < rangeb[1]``;

127

``rangeb[0] < b2 and b1 < rangeb[1]``;

128

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

128

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

129

"""

129

"""

130

lbb, ubb = rangeb

130

lbb, ubb = rangeb

131

lba, uba = None, None

131

lba, uba = None, None

132

filteredblocks = []

132

filteredblocks = []

133

for block in blocks:

133

for block in blocks:

134

(a1, a2, b1, b2), stype = block

134

(a1, a2, b1, b2), stype = block

135

if lbb >= b1 and ubb <= b2 and stype == '=':

135

if lbb >= b1 and ubb <= b2 and stype == '=':

136

# rangeb is within a single "=" hunk, restrict back linerange1

136

# rangeb is within a single "=" hunk, restrict back linerange1

137

# by offsetting rangeb

137

# by offsetting rangeb

138

lba = lbb - b1 + a1

138

lba = lbb - b1 + a1

139

uba = ubb - b1 + a1

139

uba = ubb - b1 + a1

140

else:

140

else:

141

if b1 <= lbb < b2:

141

if b1 <= lbb < b2:

142

if stype == '=':

142

if stype == '=':

143

lba = a2 - (b2 - lbb)

143

lba = a2 - (b2 - lbb)

144

else:

144

else:

145

lba = a1

145

lba = a1

146

if b1 < ubb <= b2:

146

if b1 < ubb <= b2:

147

if stype == '=':

147

if stype == '=':

148

uba = a1 + (ubb - b1)

148

uba = a1 + (ubb - b1)

149

else:

149

else:

150

uba = a2

150

uba = a2

151

if lbb < b2 and b1 < ubb:

151

if lbb < b2 and b1 < ubb:

152

filteredblocks.append(block)

152

filteredblocks.append(block)

153

if lba is None or uba is None or uba < lba:

153

if lba is None or uba is None or uba < lba:

154

raise error.Abort(_('line range exceeds file size'))

154

raise error.Abort(_('line range exceeds file size'))

155

return filteredblocks, (lba, uba)

155

return filteredblocks, (lba, uba)

156

157

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

157

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

158

"""Return (block, type) tuples, where block is an mdiff.blocks

158

"""Return (block, type) tuples, where block is an mdiff.blocks

159

line entry. type is '=' for blocks matching exactly one another

159

line entry. type is '=' for blocks matching exactly one another

160

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

160

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

161

matching only after having filtered blank lines.

161

matching only after having filtered blank lines.

162

line1 and line2 are text1 and text2 split with splitnewlines() if

162

line1 and line2 are text1 and text2 split with splitnewlines() if

163

they are already available.

163

they are already available.

164

"""

164

"""

165

if opts is None:

165

if opts is None:

166

opts = defaultopts

166

opts = defaultopts

167

if opts.ignorews or opts.ignorewsamount:

167

if opts.ignorews or opts.ignorewsamount:

168

text1 = wsclean(opts, text1, False)

168

text1 = wsclean(opts, text1, False)

169

text2 = wsclean(opts, text2, False)

169

text2 = wsclean(opts, text2, False)

170

diff = bdiff.blocks(text1, text2)

170

diff = bdiff.blocks(text1, text2)

171

for i, s1 in enumerate(diff):

171

for i, s1 in enumerate(diff):

172

# The first match is special.

172

# The first match is special.

173

# we've either found a match starting at line 0 or a match later

173

# we've either found a match starting at line 0 or a match later

174

# in the file. If it starts later, old and new below will both be

174

# in the file. If it starts later, old and new below will both be

175

# empty and we'll continue to the next match.

175

# empty and we'll continue to the next match.

176

if i > 0:

176

if i > 0:

177

s = diff[i - 1]

177

s = diff[i - 1]

178

else:

178

else:

179

s = [0, 0, 0, 0]

179

s = [0, 0, 0, 0]

180

s = [s[1], s1[0], s[3], s1[2]]

180

s = [s[1], s1[0], s[3], s1[2]]

181

182

# bdiff sometimes gives huge matches past eof, this check eats them,

182

# bdiff sometimes gives huge matches past eof, this check eats them,

183

# and deals with the special first match case described above

183

# and deals with the special first match case described above

184

if s[0] != s[1] or s[2] != s[3]:

184

if s[0] != s[1] or s[2] != s[3]:

185

type = '!'

185

type = '!'

186

if opts.ignoreblanklines:

186

if opts.ignoreblanklines:

187

if lines1 is None:

187

if lines1 is None:

188

lines1 = splitnewlines(text1)

188

lines1 = splitnewlines(text1)

189

if lines2 is None:

189

if lines2 is None:

190

lines2 = splitnewlines(text2)

190

lines2 = splitnewlines(text2)

191

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

191

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

192

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

192

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

193

if old == new:

193

if old == new:

194

type = '~'

194

type = '~'

195

yield s, type

195

yield s, type

196

yield s1, '='

196

yield s1, '='

197

198

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

198

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

199

def datetag(date, fn=None):

199

def datetag(date, fn=None):

200

if not opts.git and not opts.nodates:

200

if not opts.git and not opts.nodates:

201

return '\t%s\n' % date

201

return '\t%s\n' % date

202

if fn and ' ' in fn:

202

if fn and ' ' in fn:

203

return '\t\n'

203

return '\t\n'

204

return '\n'

204

return '\n'

205

206

if not a and not b:

206

if not a and not b:

207

return ""

207

return ""

208

209

if opts.noprefix:

209

if opts.noprefix:

210

aprefix = bprefix = ''

210

aprefix = bprefix = ''

211

else:

211

else:

212

aprefix = 'a/'

212

aprefix = 'a/'

213

bprefix = 'b/'

213

bprefix = 'b/'

214

215

epoch = util.datestr((0, 0))

215

epoch = util.datestr((0, 0))

216

217

fn1 = util.pconvert(fn1)

217

fn1 = util.pconvert(fn1)

218

fn2 = util.pconvert(fn2)

218

fn2 = util.pconvert(fn2)

219

220

if not opts.text and (util.binary(a) or util.binary(b)):

220

if not opts.text and (util.binary(a) or util.binary(b)):

221

if a and b and len(a) == len(b) and a == b:

221

if a and b and len(a) == len(b) and a == b:

222

return ""

222

return ""

223

l = ['Binary file %s has changed\n' % fn1]

223

l = ['Binary file %s has changed\n' % fn1]

224

elif not a:

224

elif not a:

225

b = splitnewlines(b)

225

b = splitnewlines(b)

226

if a is None:

226

if a is None:

227

l1 = '--- /dev/null%s' % datetag(epoch)

227

l1 = '--- /dev/null%s' % datetag(epoch)

228

else:

228

else:

229

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

229

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

230

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

230

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

231

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

231

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

232

l = [l1, l2, l3] + ["+" + e for e in b]

232

l = [l1, l2, l3] + ["+" + e for e in b]

233

elif not b:

233

elif not b:

234

a = splitnewlines(a)

234

a = splitnewlines(a)

235

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

235

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

236

if b is None:

236

if b is None:

237

l2 = '+++ /dev/null%s' % datetag(epoch)

237

l2 = '+++ /dev/null%s' % datetag(epoch)

238

else:

238

else:

239

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

239

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

240

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

240

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

241

l = [l1, l2, l3] + ["-" + e for e in a]

241

l = [l1, l2, l3] + ["-" + e for e in a]

242

else:

242

else:

243

al = splitnewlines(a)

243

l = list(_unidiff(a, b, opts=opts))

244

bl = splitnewlines(b)

245

l = list(_unidiff(a, b, al, bl, opts=opts))

246

if not l:

244

if not l:

247

return ""

245

return ""

248

246

249

l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))

247

l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))

250

l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))

248

l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))

251

249

252

for ln in xrange(len(l)):

250

for ln in xrange(len(l)):

253

if l[ln][-1] != '\n':

251

if l[ln][-1] != '\n':

254

l[ln] += "\n\ No newline at end of file\n"

252

l[ln] += "\n\ No newline at end of file\n"

255

253

256

return "".join(l)

254

return "".join(l)

257

255

258

# creates a headerless unified diff

256

# creates a headerless unified diff

259

# t1 and t2 are the text to be diffed

257

# t1 and t2 are the text to be diffed

260

# l1 and l2 are the text broken up into lines

258

def _unidiff(t1, t2, opts=defaultopts):

261

def _unidiff(t1, t2, l1, l2, opts=defaultopts):

259

l1 = splitnewlines(t1)

260

l2 = splitnewlines(t2)

262

def contextend(l, len):

261

def contextend(l, len):

263

ret = l + opts.context

262

ret = l + opts.context

264

if ret > len:

263

if ret > len:

265

ret = len

264

ret = len

266

return ret

265

return ret

267

266

268

def contextstart(l):

267

def contextstart(l):

269

ret = l - opts.context

268

ret = l - opts.context

270

if ret < 0:

269

if ret < 0:

271

return 0

270

return 0

272

return ret

271

return ret

273

272

274

lastfunc = [0, '']

273

lastfunc = [0, '']

275

def yieldhunk(hunk):

274

def yieldhunk(hunk):

276

(astart, a2, bstart, b2, delta) = hunk

275

(astart, a2, bstart, b2, delta) = hunk

277

aend = contextend(a2, len(l1))

276

aend = contextend(a2, len(l1))

278

alen = aend - astart

277

alen = aend - astart

279

blen = b2 - bstart + aend - a2

278

blen = b2 - bstart + aend - a2

280

279

281

func = ""

280

func = ""

282

if opts.showfunc:

281

if opts.showfunc:

283

lastpos, func = lastfunc

282

lastpos, func = lastfunc

284

# walk backwards from the start of the context up to the start of

283

# walk backwards from the start of the context up to the start of

285

# the previous hunk context until we find a line starting with an

284

# the previous hunk context until we find a line starting with an

286

# alphanumeric char.

285

# alphanumeric char.

287

for i in xrange(astart - 1, lastpos - 1, -1):

286

for i in xrange(astart - 1, lastpos - 1, -1):

288

if l1[i][0].isalnum():

287

if l1[i][0].isalnum():

289

func = ' ' + l1[i].rstrip()[:40]

288

func = ' ' + l1[i].rstrip()[:40]

290

lastfunc[1] = func

289

lastfunc[1] = func

291

break

290

break

292

# by recording this hunk's starting point as the next place to

291

# by recording this hunk's starting point as the next place to

293

# start looking for function lines, we avoid reading any line in

292

# start looking for function lines, we avoid reading any line in

294

# the file more than once.

293

# the file more than once.

295

lastfunc[0] = astart

294

lastfunc[0] = astart

296

295

297

# zero-length hunk ranges report their start line as one less

296

# zero-length hunk ranges report their start line as one less

298

if alen:

297

if alen:

299

astart += 1

298

astart += 1

300

if blen:

299

if blen:

301

bstart += 1

300

bstart += 1

302

301

303

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

302

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

304

bstart, blen, func)

303

bstart, blen, func)

305

for x in delta:

304

for x in delta:

306

yield x

305

yield x

307

for x in xrange(a2, aend):

306

for x in xrange(a2, aend):

308

yield ' ' + l1[x]

307

yield ' ' + l1[x]

309

308

310

# bdiff.blocks gives us the matching sequences in the files. The loop

309

# bdiff.blocks gives us the matching sequences in the files. The loop

311

# below finds the spaces between those matching sequences and translates

310

# below finds the spaces between those matching sequences and translates

312

# them into diff output.

311

# them into diff output.

313

#

312

#

314

hunk = None

313

hunk = None

315

ignoredlines = 0

314

ignoredlines = 0

316

for s, stype in allblocks(t1, t2, opts, l1, l2):

315

for s, stype in allblocks(t1, t2, opts, l1, l2):

317

a1, a2, b1, b2 = s

316

a1, a2, b1, b2 = s

318

if stype != '!':

317

if stype != '!':

319

if stype == '~':

318

if stype == '~':

320

# The diff context lines are based on t1 content. When

319

# The diff context lines are based on t1 content. When

321

# blank lines are ignored, the new lines offsets must

320

# blank lines are ignored, the new lines offsets must

322

# be adjusted as if equivalent blocks ('~') had the

321

# be adjusted as if equivalent blocks ('~') had the

323

# same sizes on both sides.

322

# same sizes on both sides.

324

ignoredlines += (b2 - b1) - (a2 - a1)

323

ignoredlines += (b2 - b1) - (a2 - a1)

325

continue

324

continue

326

delta = []

325

delta = []

327

old = l1[a1:a2]

326

old = l1[a1:a2]

328

new = l2[b1:b2]

327

new = l2[b1:b2]

329

328

330

b1 -= ignoredlines

329

b1 -= ignoredlines

331

b2 -= ignoredlines

330

b2 -= ignoredlines

332

astart = contextstart(a1)

331

astart = contextstart(a1)

333

bstart = contextstart(b1)

332

bstart = contextstart(b1)

334

prev = None

333

prev = None

335

if hunk:

334

if hunk:

336

# join with the previous hunk if it falls inside the context

335

# join with the previous hunk if it falls inside the context

337

if astart < hunk[1] + opts.context + 1:

336

if astart < hunk[1] + opts.context + 1:

338

prev = hunk

337

prev = hunk

339

astart = hunk[1]

338

astart = hunk[1]

340

bstart = hunk[3]

339

bstart = hunk[3]

341

else:

340

else:

342

for x in yieldhunk(hunk):

341

for x in yieldhunk(hunk):

343

yield x

342

yield x

344

if prev:

343

if prev:

345

# we've joined the previous hunk, record the new ending points.

344

# we've joined the previous hunk, record the new ending points.

346

hunk[1] = a2

345

hunk[1] = a2

347

hunk[3] = b2

346

hunk[3] = b2

348

delta = hunk[4]

347

delta = hunk[4]

349

else:

348

else:

350

# create a new hunk

349

# create a new hunk

351

hunk = [astart, a2, bstart, b2, delta]

350

hunk = [astart, a2, bstart, b2, delta]

352

351

353

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

352

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

354

delta[len(delta):] = ['-' + x for x in old]

353

delta[len(delta):] = ['-' + x for x in old]

355

delta[len(delta):] = ['+' + x for x in new]

354

delta[len(delta):] = ['+' + x for x in new]

356

355

357

if hunk:

356

if hunk:

358

for x in yieldhunk(hunk):

357

for x in yieldhunk(hunk):

359

yield x

358

yield x

360

359

361

def b85diff(to, tn):

360

def b85diff(to, tn):

362

'''print base85-encoded binary diff'''

361

'''print base85-encoded binary diff'''

363

def fmtline(line):

362

def fmtline(line):

364

l = len(line)

363

l = len(line)

365

if l <= 26:

364

if l <= 26:

366

l = chr(ord('A') + l - 1)

365

l = chr(ord('A') + l - 1)

367

else:

366

else:

368

l = chr(l - 26 + ord('a') - 1)

367

l = chr(l - 26 + ord('a') - 1)

369

return '%c%s\n' % (l, base85.b85encode(line, True))

368

return '%c%s\n' % (l, base85.b85encode(line, True))

370

369

371

def chunk(text, csize=52):

370

def chunk(text, csize=52):

372

l = len(text)

371

l = len(text)

373

i = 0

372

i = 0

374

while i < l:

373

while i < l:

375

yield text[i:i + csize]

374

yield text[i:i + csize]

376

i += csize

375

i += csize

377

376

378

if to is None:

377

if to is None:

379

to = ''

378

to = ''

380

if tn is None:

379

if tn is None:

381

tn = ''

380

tn = ''

382

381

383

if to == tn:

382

if to == tn:

384

return ''

383

return ''

385

384

386

# TODO: deltas

385

# TODO: deltas

387

ret = []

386

ret = []

388

ret.append('GIT binary patch\n')

387

ret.append('GIT binary patch\n')

389

ret.append('literal %s\n' % len(tn))

388

ret.append('literal %s\n' % len(tn))

390

for l in chunk(zlib.compress(tn)):

389

for l in chunk(zlib.compress(tn)):

391

ret.append(fmtline(l))

390

ret.append(fmtline(l))

392

ret.append('\n')

391

ret.append('\n')

393

392

394

return ''.join(ret)

393

return ''.join(ret)

395

394

396

def patchtext(bin):

395

def patchtext(bin):

397

pos = 0

396

pos = 0

398

t = []

397

t = []

399

while pos < len(bin):

398

while pos < len(bin):

400

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

399

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

401

pos += 12

400

pos += 12

402

t.append(bin[pos:pos + l])

401

t.append(bin[pos:pos + l])

403

pos += l

402

pos += l

404

return "".join(t)

403

return "".join(t)

405

404

406

def patch(a, bin):

405

def patch(a, bin):

407

if len(a) == 0:

406

if len(a) == 0:

408

# skip over trivial delta header

407

# skip over trivial delta header

409

return util.buffer(bin, 12)

408

return util.buffer(bin, 12)

410

return mpatch.patches(a, [bin])

409

return mpatch.patches(a, [bin])

411

410

412

# similar to difflib.SequenceMatcher.get_matching_blocks

411

# similar to difflib.SequenceMatcher.get_matching_blocks

413

def get_matching_blocks(a, b):

412

def get_matching_blocks(a, b):

414

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

413

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

415

414

416

def trivialdiffheader(length):

415

def trivialdiffheader(length):

417

return struct.pack(">lll", 0, 0, length) if length else ''

416

return struct.pack(">lll", 0, 0, length) if length else ''

418

417

419

def replacediffheader(oldlen, newlen):

418

def replacediffheader(oldlen, newlen):

420

return struct.pack(">lll", 0, oldlen, newlen)

419

return struct.pack(">lll", 0, oldlen, newlen)

421

420

422

patches = mpatch.patches

421

patches = mpatch.patches

423

patchedsize = mpatch.patchedsize

422

patchedsize = mpatch.patchedsize

424

textdiff = bdiff.bdiff

423

textdiff = bdiff.bdiff

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import re
             import struct
             import zlib
             from .i18n import _
             from . import (
                 base85,
                 bdiff,
                 error,
                 mpatch,
                 util,
             )
             def splitnewlines(text):
                 '''like str.splitlines, but only split on newlines.'''
                 lines = [l + '\n' for l in text.split('\n')]
                 if lines:
                     if lines[-1] == '\n':
                         lines.pop()
                     else:
                         lines[-1] = lines[-1][:-1]
                 return lines
             class diffopts(object):
                 '''context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 nobinary ignores binary files
                 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 '''
                 defaults = {
                     'context': 3,
                     'text': False,
                     'showfunc': False,
                     'git': False,
                     'nodates': False,
                     'nobinary': False,
                     'noprefix': False,
                     'index': 0,
                     'ignorews': False,
                     'ignorewsamount': False,
                     'ignoreblanklines': False,
                     'upgrade': False,
                     'showsimilarity': False,
                     }
                 def __init__(self, **opts):
                     for k in self.defaults.keys():
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, k, v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise error.Abort(_('diff context lines count must be '
                                            'an integer, not %r') % self.context)
                 def copy(self, **kwargs):
                     opts = dict((k, getattr(self, k)) for k in self.defaults)
                     opts.update(kwargs)
                     return diffopts(**opts)
             defaultopts = diffopts()
             def wsclean(opts, text, blank=True):
                 if opts.ignorews:
                     text = bdiff.fixws(text, 1)
                 elif opts.ignorewsamount:
                     text = bdiff.fixws(text, 0)
                 if blank and opts.ignoreblanklines:
                     text = re.sub('\n+', '\n', text).strip('\n')
                 return text
             def splitblock(base1, lines1, base2, lines2, opts):
                 # The input lines matches except for interwoven blank lines. We
                 # transform it into a sequence of matching blocks and blank blocks.
                 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
                 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
                 s1, e1 = 0, len(lines1)
                 s2, e2 = 0, len(lines2)
                 while s1 < e1 or s2 < e2:
                     i1, i2, btype = s1, s2, '='
                     if (i1 >= e1 or lines1[i1] == 0
                         or i2 >= e2 or lines2[i2] == 0):
                         # Consume the block of blank lines
                         btype = '~'
                         while i1 < e1 and lines1[i1] == 0:
                             i1 += 1
                         while i2 < e2 and lines2[i2] == 0:
                             i2 += 1
                     else:
                         # Consume the matching lines
                         while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
                             i1 += 1
                             i2 += 1
                     yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
                     s1 = i1
                     s2 = i2
             def blocksinrange(blocks, rangeb):
                 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
                 `rangeb` from ``(b1, b2)`` point of view.
                 Return `filteredblocks, rangea` where:
                 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
                   `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
                   block ``(b1, b2)`` being inside `rangeb` if
                   ``rangeb[0] < b2 and b1 < rangeb[1]``;
                 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
                 """
                 lbb, ubb = rangeb
                 lba, uba = None, None
                 filteredblocks = []
                 for block in blocks:
                     (a1, a2, b1, b2), stype = block
                     if lbb >= b1 and ubb <= b2 and stype == '=':
                         # rangeb is within a single "=" hunk, restrict back linerange1
                         # by offsetting rangeb
                         lba = lbb - b1 + a1
                         uba = ubb - b1 + a1
                     else:
                         if b1 <= lbb < b2:
                             if stype == '=':
                                 lba = a2 - (b2 - lbb)
                             else:
                                 lba = a1
                         if b1 < ubb <= b2:
                             if stype == '=':
                                 uba = a1 + (ubb - b1)
                             else:
                                 uba = a2
                     if lbb < b2 and b1 < ubb:
                         filteredblocks.append(block)
                 if lba is None or uba is None or uba < lba:
                     raise error.Abort(_('line range exceeds file size'))
                 return filteredblocks, (lba, uba)
             def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
                 """Return (block, type) tuples, where block is an mdiff.blocks
                 line entry. type is '=' for blocks matching exactly one another
                 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
                 matching only after having filtered blank lines.
                 line1 and line2 are text1 and text2 split with splitnewlines() if
                 they are already available.
                 """
                 if opts is None:
                     opts = defaultopts
                 if opts.ignorews or opts.ignorewsamount:
                     text1 = wsclean(opts, text1, False)
                     text2 = wsclean(opts, text2, False)
                 diff = bdiff.blocks(text1, text2)
                 for i, s1 in enumerate(diff):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i - 1]
                     else:
                         s = [0, 0, 0, 0]
                     s = [s[1], s1[0], s[3], s1[2]]
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if s[0] != s[1] or s[2] != s[3]:
                         type = '!'
                         if opts.ignoreblanklines:
                             if lines1 is None:
                                 lines1 = splitnewlines(text1)
                             if lines2 is None:
                                 lines2 = splitnewlines(text2)
                             old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
                             new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
                             if old == new:
                                 type = '~'
                         yield s, type
                     yield s1, '='
             def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
                 def datetag(date, fn=None):
                     if not opts.git and not opts.nodates:
                         return '\t%s\n' % date
                     if fn and ' ' in fn:
                         return '\t\n'
                     return '\n'
                 if not a and not b:
                     return ""
                 if opts.noprefix:
                     aprefix = bprefix = ''
                 else:
                     aprefix = 'a/'
                     bprefix = 'b/'
                 epoch = util.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
                 if not opts.text and (util.binary(a) or util.binary(b)):
                     if a and b and len(a) == len(b) and a == b:
                         return ""
                     l = ['Binary file %s has changed\n' % fn1]
                 elif not a:
                     b = splitnewlines(b)
                     if a is None:
                         l1 = '--- /dev/null%s' % datetag(epoch)
                     else:
                         l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
                     l3 = "@@ -0,0 +1,%d @@\n" % len(b)
                     l = [l1, l2, l3] + ["+" + e for e in b]
                 elif not b:
                     a = splitnewlines(a)
                     l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     if b is None:
                         l2 = '+++ /dev/null%s' % datetag(epoch)
                     else:
                         l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
                     l3 = "@@ -1,%d +0,0 @@\n" % len(a)
                     l = [l1, l2, l3] + ["-" + e for e in a]
                 else:
-                    al = splitnewlines(a)
+                    l = list(_unidiff(a, b, opts=opts))
-                    bl = splitnewlines(b)
-                    l = list(_unidiff(a, b, al, bl, opts=opts))
                     if not l:
                         return ""
                     l.insert(0, "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)))
                     l.insert(1, "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)))
                 for ln in xrange(len(l)):
                     if l[ln][-1] != '\n':
                         l[ln] += "\n\ No newline at end of file\n"
                 return "".join(l)
             # creates a headerless unified diff
             # t1 and t2 are the text to be diffed
-            # l1 and l2 are the text broken up into lines
+            def _unidiff(t1, t2, opts=defaultopts):
-            def _unidiff(t1, t2, l1, l2, opts=defaultopts):
+                l1 = splitnewlines(t1)
+                l2 = splitnewlines(t2)
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, '']
                 def yieldhunk(hunk):
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = ""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in xrange(astart - 1, lastpos - 1, -1):
                             if l1[i][0].isalnum():
                                 func = ' ' + l1[i].rstrip()[:40]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
                     yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
                                                        bstart, blen, func)
                     for x in delta:
                         yield x
                     for x in xrange(a2, aend):
                         yield ' ' + l1[x]
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 hunk = None
                 ignoredlines = 0
                 for s, stype in allblocks(t1, t2, opts, l1, l2):
                     a1, a2, b1, b2 = s
                     if stype != '!':
                         if stype == '~':
                             # The diff context lines are based on t1 content. When
                             # blank lines are ignored, the new lines offsets must
                             # be adjusted as if equivalent blocks ('~') had the
                             # same sizes on both sides.
                             ignoredlines += (b2 - b1) - (a2 - a1)
                         continue
                     delta = []
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     b1 -= ignoredlines
                     b2 -= ignoredlines
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [astart, a2, bstart, b2, delta]
                     delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
                     delta[len(delta):] = ['-' + x for x in old]
                     delta[len(delta):] = ['+' + x for x in new]
                 if hunk:
                     for x in yieldhunk(hunk):
                         yield x
             def b85diff(to, tn):
                 '''print base85-encoded binary diff'''
                 def fmtline(line):
                     l = len(line)
                     if l <= 26:
                         l = chr(ord('A') + l - 1)
                     else:
                         l = chr(l - 26 + ord('a') - 1)
                     return '%c%s\n' % (l, base85.b85encode(line, True))
                 def chunk(text, csize=52):
                     l = len(text)
                     i = 0
                     while i < l:
                         yield text[i:i + csize]
                         i += csize
                 if to is None:
                     to = ''
                 if tn is None:
                     tn = ''
                 if to == tn:
                     return ''
                 # TODO: deltas
                 ret = []
                 ret.append('GIT binary patch\n')
                 ret.append('literal %s\n' % len(tn))
                 for l in chunk(zlib.compress(tn)):
                     ret.append(fmtline(l))
                 ret.append('\n')
                 return ''.join(ret)
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
                     pos += 12
                     t.append(bin[pos:pos + l])
                     pos += l
                 return "".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return util.buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
             def get_matching_blocks(a, b):
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
             def trivialdiffheader(length):
                 return struct.pack(">lll", 0, 0, length) if length else ''
             def replacediffheader(oldlen, newlen):
                 return struct.pack(">lll", 0, oldlen, newlen)
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff