upstream/mercurial-mirror Commit - r31272:e41946f3

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import re

10

import re

11

import struct

11

import struct

12

import zlib

12

import zlib

13

14

from .i18n import _

14

from .i18n import _

15

from . import (

15

from . import (

16

base85,

16

base85,

17

bdiff,

17

bdiff,

18

error,

18

error,

19

mpatch,

19

mpatch,

20

util,

20

util,

21

)

21

)

22

23

def splitnewlines(text):

23

def splitnewlines(text):

24

'''like str.splitlines, but only split on newlines.'''

24

'''like str.splitlines, but only split on newlines.'''

25

lines = [l + '\n' for l in text.split('\n')]

25

lines = [l + '\n' for l in text.split('\n')]

26

if lines:

26

if lines:

27

if lines[-1] == '\n':

27

if lines[-1] == '\n':

28

lines.pop()

28

lines.pop()

29

else:

29

else:

30

lines[-1] = lines[-1][:-1]

30

lines[-1] = lines[-1][:-1]

31

return lines

31

return lines

32

33

class diffopts(object):

33

class diffopts(object):

34

'''context is the number of context lines

34

'''context is the number of context lines

35

text treats all files as text

35

text treats all files as text

36

showfunc enables diff -p output

36

showfunc enables diff -p output

37

git enables the git extended patch format

37

git enables the git extended patch format

38

nodates removes dates from diff headers

38

nodates removes dates from diff headers

39

nobinary ignores binary files

39

nobinary ignores binary files

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

40

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

41

ignorews ignores all whitespace changes in the diff

41

ignorews ignores all whitespace changes in the diff

42

ignorewsamount ignores changes in the amount of whitespace

42

ignorewsamount ignores changes in the amount of whitespace

43

ignoreblanklines ignores changes whose lines are all blank

43

ignoreblanklines ignores changes whose lines are all blank

44

upgrade generates git diffs to avoid data loss

44

upgrade generates git diffs to avoid data loss

45

'''

45

'''

46

47

defaults = {

47

defaults = {

48

'context': 3,

48

'context': 3,

49

'text': False,

49

'text': False,

50

'showfunc': False,

50

'showfunc': False,

51

'git': False,

51

'git': False,

52

'nodates': False,

52

'nodates': False,

53

'nobinary': False,

53

'nobinary': False,

54

'noprefix': False,

54

'noprefix': False,

55

'index': 0,

55

'index': 0,

56

'ignorews': False,

56

'ignorews': False,

57

'ignorewsamount': False,

57

'ignorewsamount': False,

58

'ignoreblanklines': False,

58

'ignoreblanklines': False,

59

'upgrade': False,

59

'upgrade': False,

60

'showsimilarity': False,

60

'showsimilarity': False,

61

}

61

}

62

63

def __init__(self, **opts):

63

def __init__(self, **opts):

64

for k in self.defaults.keys():

64

for k in self.defaults.keys():

65

v = opts.get(k)

65

v = opts.get(k)

66

if v is None:

66

if v is None:

67

v = self.defaults[k]

67

v = self.defaults[k]

68

setattr(self, k, v)

68

setattr(self, k, v)

69

70

try:

70

try:

71

self.context = int(self.context)

71

self.context = int(self.context)

72

except ValueError:

72

except ValueError:

73

raise error.Abort(_('diff context lines count must be '

73

raise error.Abort(_('diff context lines count must be '

74

'an integer, not %r') % self.context)

74

'an integer, not %r') % self.context)

75

76

def copy(self, **kwargs):

76

def copy(self, **kwargs):

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

77

opts = dict((k, getattr(self, k)) for k in self.defaults)

78

opts.update(kwargs)

78

opts.update(kwargs)

79

return diffopts(**opts)

79

return diffopts(**opts)

80

81

defaultopts = diffopts()

81

defaultopts = diffopts()

82

83

def wsclean(opts, text, blank=True):

83

def wsclean(opts, text, blank=True):

84

if opts.ignorews:

84

if opts.ignorews:

85

text = bdiff.fixws(text, 1)

85

text = bdiff.fixws(text, 1)

86

elif opts.ignorewsamount:

86

elif opts.ignorewsamount:

87

text = bdiff.fixws(text, 0)

87

text = bdiff.fixws(text, 0)

88

if blank and opts.ignoreblanklines:

88

if blank and opts.ignoreblanklines:

89

text = re.sub('\n+', '\n', text).strip('\n')

89

text = re.sub('\n+', '\n', text).strip('\n')

90

return text

90

return text

91

92

def splitblock(base1, lines1, base2, lines2, opts):

92

def splitblock(base1, lines1, base2, lines2, opts):

93

# The input lines matches except for interwoven blank lines. We

93

# The input lines matches except for interwoven blank lines. We

94

# transform it into a sequence of matching blocks and blank blocks.

94

# transform it into a sequence of matching blocks and blank blocks.

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

95

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

96

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

97

s1, e1 = 0, len(lines1)

97

s1, e1 = 0, len(lines1)

98

s2, e2 = 0, len(lines2)

98

s2, e2 = 0, len(lines2)

99

while s1 < e1 or s2 < e2:

99

while s1 < e1 or s2 < e2:

100

i1, i2, btype = s1, s2, '='

100

i1, i2, btype = s1, s2, '='

101

if (i1 >= e1 or lines1[i1] == 0

101

if (i1 >= e1 or lines1[i1] == 0

102

or i2 >= e2 or lines2[i2] == 0):

102

or i2 >= e2 or lines2[i2] == 0):

103

# Consume the block of blank lines

103

# Consume the block of blank lines

104

btype = '~'

104

btype = '~'

105

while i1 < e1 and lines1[i1] == 0:

105

while i1 < e1 and lines1[i1] == 0:

106

i1 += 1

106

i1 += 1

107

while i2 < e2 and lines2[i2] == 0:

107

while i2 < e2 and lines2[i2] == 0:

108

i2 += 1

108

i2 += 1

109

else:

109

else:

110

# Consume the matching lines

110

# Consume the matching lines

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

111

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

112

i1 += 1

112

i1 += 1

113

i2 += 1

113

i2 += 1

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

114

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

115

s1 = i1

115

s1 = i1

116

s2 = i2

116

s2 = i2

117

118

def blocksinrange(blocks, rangeb):

118

def blocksinrange(blocks, rangeb):

119

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

119

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

120

`rangeb` from ``(b1, b2)`` point of view.

120

`rangeb` from ``(b1, b2)`` point of view.

121

122

Return `filteredblocks, rangea` where:

122

Return `filteredblocks, rangea` where:

123

124

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

124

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

125

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

125

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

126

block ``(b1, b2)`` being inside `rangeb` if

126

block ``(b1, b2)`` being inside `rangeb` if

127

``rangeb[0] < b2 and b1 < rangeb[1]``;

127

``rangeb[0] < b2 and b1 < rangeb[1]``;

128

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

128

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

129

"""

129

"""

130

lbb, ubb = rangeb

130

lbb, ubb = rangeb

131

lba, uba = None, None

131

lba, uba = None, None

132

filteredblocks = []

132

filteredblocks = []

133

for block in blocks:

133

for block in blocks:

134

(a1, a2, b1, b2), stype = block

134

(a1, a2, b1, b2), stype = block

135

if lbb >= b1 and ubb <= b2 and stype == '=':

135

if lbb >= b1 and ubb <= b2 and stype == '=':

136

# rangeb is within a single "=" hunk, restrict back linerange1

136

# rangeb is within a single "=" hunk, restrict back linerange1

137

# by offsetting rangeb

137

# by offsetting rangeb

138

lba = lbb - b1 + a1

138

lba = lbb - b1 + a1

139

uba = ubb - b1 + a1

139

uba = ubb - b1 + a1

140

else:

140

else:

141

if b1 <= lbb < b2:

141

if b1 <= lbb < b2:

142

if stype == '=':

142

if stype == '=':

143

lba = a2 - (b2 - lbb)

143

lba = a2 - (b2 - lbb)

144

else:

144

else:

145

lba = a1

145

lba = a1

146

if b1 < ubb <= b2:

146

if b1 < ubb <= b2:

147

if stype == '=':

147

if stype == '=':

148

uba = a1 + (ubb - b1)

148

uba = a1 + (ubb - b1)

149

else:

149

else:

150

uba = a2

150

uba = a2

151

if lbb < b2 and b1 < ubb:

151

if lbb < b2 and b1 < ubb:

152

filteredblocks.append(block)

152

filteredblocks.append(block)

153

if lba is None or uba is None or uba < lba:

153

if lba is None or uba is None or uba < lba:

154

raise error.Abort(_('line range exceeds file size'))

154

raise error.Abort(_('line range exceeds file size'))

155

return filteredblocks, (lba, uba)

155

return filteredblocks, (lba, uba)

156

157

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

157

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

158

"""Return (block, type) tuples, where block is an mdiff.blocks

158

"""Return (block, type) tuples, where block is an mdiff.blocks

159

line entry. type is '=' for blocks matching exactly one another

159

line entry. type is '=' for blocks matching exactly one another

160

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

160

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

161

matching only after having filtered blank lines.

161

matching only after having filtered blank lines.

162

line1 and line2 are text1 and text2 split with splitnewlines() if

162

line1 and line2 are text1 and text2 split with splitnewlines() if

163

they are already available.

163

they are already available.

164

"""

164

"""

165

if opts is None:

165

if opts is None:

166

opts = defaultopts

166

opts = defaultopts

167

if opts.ignorews or opts.ignorewsamount:

167

if opts.ignorews or opts.ignorewsamount:

168

text1 = wsclean(opts, text1, False)

168

text1 = wsclean(opts, text1, False)

169

text2 = wsclean(opts, text2, False)

169

text2 = wsclean(opts, text2, False)

170

diff = bdiff.blocks(text1, text2)

170

diff = bdiff.blocks(text1, text2)

171

for i, s1 in enumerate(diff):

171

for i, s1 in enumerate(diff):

172

# The first match is special.

172

# The first match is special.

173

# we've either found a match starting at line 0 or a match later

173

# we've either found a match starting at line 0 or a match later

174

# in the file. If it starts later, old and new below will both be

174

# in the file. If it starts later, old and new below will both be

175

# empty and we'll continue to the next match.

175

# empty and we'll continue to the next match.

176

if i > 0:

176

if i > 0:

177

s = diff[i - 1]

177

s = diff[i - 1]

178

else:

178

else:

179

s = [0, 0, 0, 0]

179

s = [0, 0, 0, 0]

180

s = [s[1], s1[0], s[3], s1[2]]

180

s = [s[1], s1[0], s[3], s1[2]]

181

182

# bdiff sometimes gives huge matches past eof, this check eats them,

182

# bdiff sometimes gives huge matches past eof, this check eats them,

183

# and deals with the special first match case described above

183

# and deals with the special first match case described above

184

if s[0] != s[1] or s[2] != s[3]:

184

if s[0] != s[1] or s[2] != s[3]:

185

type = '!'

185

type = '!'

186

if opts.ignoreblanklines:

186

if opts.ignoreblanklines:

187

if lines1 is None:

187

if lines1 is None:

188

lines1 = splitnewlines(text1)

188

lines1 = splitnewlines(text1)

189

if lines2 is None:

189

if lines2 is None:

190

lines2 = splitnewlines(text2)

190

lines2 = splitnewlines(text2)

191

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

191

old = wsclean(opts, "".join(lines1[s[0]:s[1]]))

192

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

192

new = wsclean(opts, "".join(lines2[s[2]:s[3]]))

193

if old == new:

193

if old == new:

194

type = '~'

194

type = '~'

195

yield s, type

195

yield s, type

196

yield s1, '='

196

yield s1, '='

197

198

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

198

def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):

199

"""Return a unified diff as a (headers, hunkstext) tuple.

199

"""Return a unified diff as a (headers, hunkstext) tuple.

200

201

If the diff is not null, `headers` is a list with unified diff header

201

If the diff is not null, `headers` is a list with unified diff header

202

lines "--- <original>" and "+++ <new>" and `hunkstext` is a string

202

lines "--- <original>" and "+++ <new>" and `hunkstext` is a string

203

containing diff hunks. Otherwise, both `headers` and `hunkstext` are

203

containing diff hunks. Otherwise, both `headers` and `hunkstext` are

204

empty.

204

empty.

205

"""

205

"""

206

def datetag(date, fn=None):

206

def datetag(date, fn=None):

207

if not opts.git and not opts.nodates:

207

if not opts.git and not opts.nodates:

208

return '\t%s' % date

208

return '\t%s' % date

209

if fn and ' ' in fn:

209

if fn and ' ' in fn:

210

return '\t'

210

return '\t'

211

return ''

211

return ''

212

213

sentinel = [], ""

213

sentinel = [], ""

214

if not a and not b:

214

if not a and not b:

215

return sentinel

215

return sentinel

216

217

if opts.noprefix:

217

if opts.noprefix:

218

aprefix = bprefix = ''

218

aprefix = bprefix = ''

219

else:

219

else:

220

aprefix = 'a/'

220

aprefix = 'a/'

221

bprefix = 'b/'

221

bprefix = 'b/'

222

223

epoch = util.datestr((0, 0))

223

epoch = util.datestr((0, 0))

224

225

fn1 = util.pconvert(fn1)

225

fn1 = util.pconvert(fn1)

226

fn2 = util.pconvert(fn2)

226

fn2 = util.pconvert(fn2)

227

228

def checknonewline(lines):

229

for text in lines:

230

if text[-1] != '\n':

231

text += "\n\ No newline at end of file\n"

232

yield text

233

228

if not opts.text and (util.binary(a) or util.binary(b)):

234

if not opts.text and (util.binary(a) or util.binary(b)):

229

if a and b and len(a) == len(b) and a == b:

235

if a and b and len(a) == len(b) and a == b:

230

return sentinel

236

return sentinel

231

headerlines = []

237

headerlines = []

232

l = ['Binary file %s has changed\n' % fn1]

238

l = ['Binary file %s has changed\n' % fn1]

233

elif not a:

239

elif not a:

234

b = splitnewlines(b)

240

b = splitnewlines(b)

235

if a is None:

241

if a is None:

236

l1 = '--- /dev/null%s' % datetag(epoch)

242

l1 = '--- /dev/null%s' % datetag(epoch)

237

else:

243

else:

238

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

244

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

239

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

245

l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

240

headerlines = [l1, l2]

246

headerlines = [l1, l2]

241

l = ["@@ -0,0 +1,%d @@\n" % len(b)] + ["+" + e for e in b]

247

l = ["@@ -0,0 +1,%d @@\n" % len(b)] + ["+" + e for e in b]

242

elif not b:

248

elif not b:

243

a = splitnewlines(a)

249

a = splitnewlines(a)

244

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

250

l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

245

if b is None:

251

if b is None:

246

l2 = '+++ /dev/null%s' % datetag(epoch)

252

l2 = '+++ /dev/null%s' % datetag(epoch)

247

else:

253

else:

248

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

254

l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

249

headerlines = [l1, l2]

255

headerlines = [l1, l2]

250

l = ["@@ -1,%d +0,0 @@\n" % len(a)] + ["-" + e for e in a]

256

l = ["@@ -1,%d +0,0 @@\n" % len(a)] + ["-" + e for e in a]

251

else:

257

else:

252

l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])

258

l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])

253

if not l:

259

if not l:

254

return sentinel

260

return sentinel

255

261

256

headerlines = [

262

headerlines = [

257

"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),

263

"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),

258

"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),

264

"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),

259

]

265

]

260

266

261

for ln in xrange(len(l)):

267

return headerlines, "".join(checknonewline(l))

262

if l[ln][-1] != '\n':

263

l[ln] += "\n\ No newline at end of file\n"

264

265

return headerlines, "".join(l)

266

268

267

def _unidiff(t1, t2, opts=defaultopts):

269

def _unidiff(t1, t2, opts=defaultopts):

268

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

270

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

269

271

270

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

272

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

271

tuple (s1, l1, s2, l2) representing the range information of the hunk to

273

tuple (s1, l1, s2, l2) representing the range information of the hunk to

272

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

274

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

273

of the hunk combining said header followed by line additions and

275

of the hunk combining said header followed by line additions and

274

deletions.

276

deletions.

275

"""

277

"""

276

l1 = splitnewlines(t1)

278

l1 = splitnewlines(t1)

277

l2 = splitnewlines(t2)

279

l2 = splitnewlines(t2)

278

def contextend(l, len):

280

def contextend(l, len):

279

ret = l + opts.context

281

ret = l + opts.context

280

if ret > len:

282

if ret > len:

281

ret = len

283

ret = len

282

return ret

284

return ret

283

285

284

def contextstart(l):

286

def contextstart(l):

285

ret = l - opts.context

287

ret = l - opts.context

286

if ret < 0:

288

if ret < 0:

287

return 0

289

return 0

288

return ret

290

return ret

289

291

290

lastfunc = [0, '']

292

lastfunc = [0, '']

291

def yieldhunk(hunk):

293

def yieldhunk(hunk):

292

(astart, a2, bstart, b2, delta) = hunk

294

(astart, a2, bstart, b2, delta) = hunk

293

aend = contextend(a2, len(l1))

295

aend = contextend(a2, len(l1))

294

alen = aend - astart

296

alen = aend - astart

295

blen = b2 - bstart + aend - a2

297

blen = b2 - bstart + aend - a2

296

298

297

func = ""

299

func = ""

298

if opts.showfunc:

300

if opts.showfunc:

299

lastpos, func = lastfunc

301

lastpos, func = lastfunc

300

# walk backwards from the start of the context up to the start of

302

# walk backwards from the start of the context up to the start of

301

# the previous hunk context until we find a line starting with an

303

# the previous hunk context until we find a line starting with an

302

# alphanumeric char.

304

# alphanumeric char.

303

for i in xrange(astart - 1, lastpos - 1, -1):

305

for i in xrange(astart - 1, lastpos - 1, -1):

304

if l1[i][0].isalnum():

306

if l1[i][0].isalnum():

305

func = ' ' + l1[i].rstrip()[:40]

307

func = ' ' + l1[i].rstrip()[:40]

306

lastfunc[1] = func

308

lastfunc[1] = func

307

break

309

break

308

# by recording this hunk's starting point as the next place to

310

# by recording this hunk's starting point as the next place to

309

# start looking for function lines, we avoid reading any line in

311

# start looking for function lines, we avoid reading any line in

310

# the file more than once.

312

# the file more than once.

311

lastfunc[0] = astart

313

lastfunc[0] = astart

312

314

313

# zero-length hunk ranges report their start line as one less

315

# zero-length hunk ranges report their start line as one less

314

if alen:

316

if alen:

315

astart += 1

317

astart += 1

316

if blen:

318

if blen:

317

bstart += 1

319

bstart += 1

318

320

319

hunkrange = astart, alen, bstart, blen

321

hunkrange = astart, alen, bstart, blen

320

hunklines = (

322

hunklines = (

321

["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

323

["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

322

+ delta

324

+ delta

323

+ [' ' + l1[x] for x in xrange(a2, aend)]

325

+ [' ' + l1[x] for x in xrange(a2, aend)]

324

)

326

)

325

yield hunkrange, hunklines

327

yield hunkrange, hunklines

326

328

327

# bdiff.blocks gives us the matching sequences in the files. The loop

329

# bdiff.blocks gives us the matching sequences in the files. The loop

328

# below finds the spaces between those matching sequences and translates

330

# below finds the spaces between those matching sequences and translates

329

# them into diff output.

331

# them into diff output.

330

#

332

#

331

hunk = None

333

hunk = None

332

ignoredlines = 0

334

ignoredlines = 0

333

for s, stype in allblocks(t1, t2, opts, l1, l2):

335

for s, stype in allblocks(t1, t2, opts, l1, l2):

334

a1, a2, b1, b2 = s

336

a1, a2, b1, b2 = s

335

if stype != '!':

337

if stype != '!':

336

if stype == '~':

338

if stype == '~':

337

# The diff context lines are based on t1 content. When

339

# The diff context lines are based on t1 content. When

338

# blank lines are ignored, the new lines offsets must

340

# blank lines are ignored, the new lines offsets must

339

# be adjusted as if equivalent blocks ('~') had the

341

# be adjusted as if equivalent blocks ('~') had the

340

# same sizes on both sides.

342

# same sizes on both sides.

341

ignoredlines += (b2 - b1) - (a2 - a1)

343

ignoredlines += (b2 - b1) - (a2 - a1)

342

continue

344

continue

343

delta = []

345

delta = []

344

old = l1[a1:a2]

346

old = l1[a1:a2]

345

new = l2[b1:b2]

347

new = l2[b1:b2]

346

348

347

b1 -= ignoredlines

349

b1 -= ignoredlines

348

b2 -= ignoredlines

350

b2 -= ignoredlines

349

astart = contextstart(a1)

351

astart = contextstart(a1)

350

bstart = contextstart(b1)

352

bstart = contextstart(b1)

351

prev = None

353

prev = None

352

if hunk:

354

if hunk:

353

# join with the previous hunk if it falls inside the context

355

# join with the previous hunk if it falls inside the context

354

if astart < hunk[1] + opts.context + 1:

356

if astart < hunk[1] + opts.context + 1:

355

prev = hunk

357

prev = hunk

356

astart = hunk[1]

358

astart = hunk[1]

357

bstart = hunk[3]

359

bstart = hunk[3]

358

else:

360

else:

359

for x in yieldhunk(hunk):

361

for x in yieldhunk(hunk):

360

yield x

362

yield x

361

if prev:

363

if prev:

362

# we've joined the previous hunk, record the new ending points.

364

# we've joined the previous hunk, record the new ending points.

363

hunk[1] = a2

365

hunk[1] = a2

364

hunk[3] = b2

366

hunk[3] = b2

365

delta = hunk[4]

367

delta = hunk[4]

366

else:

368

else:

367

# create a new hunk

369

# create a new hunk

368

hunk = [astart, a2, bstart, b2, delta]

370

hunk = [astart, a2, bstart, b2, delta]

369

371

370

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

372

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

371

delta[len(delta):] = ['-' + x for x in old]

373

delta[len(delta):] = ['-' + x for x in old]

372

delta[len(delta):] = ['+' + x for x in new]

374

delta[len(delta):] = ['+' + x for x in new]

373

375

374

if hunk:

376

if hunk:

375

for x in yieldhunk(hunk):

377

for x in yieldhunk(hunk):

376

yield x

378

yield x

377

379

378

def b85diff(to, tn):

380

def b85diff(to, tn):

379

'''print base85-encoded binary diff'''

381

'''print base85-encoded binary diff'''

380

def fmtline(line):

382

def fmtline(line):

381

l = len(line)

383

l = len(line)

382

if l <= 26:

384

if l <= 26:

383

l = chr(ord('A') + l - 1)

385

l = chr(ord('A') + l - 1)

384

else:

386

else:

385

l = chr(l - 26 + ord('a') - 1)

387

l = chr(l - 26 + ord('a') - 1)

386

return '%c%s\n' % (l, base85.b85encode(line, True))

388

return '%c%s\n' % (l, base85.b85encode(line, True))

387

389

388

def chunk(text, csize=52):

390

def chunk(text, csize=52):

389

l = len(text)

391

l = len(text)

390

i = 0

392

i = 0

391

while i < l:

393

while i < l:

392

yield text[i:i + csize]

394

yield text[i:i + csize]

393

i += csize

395

i += csize

394

396

395

if to is None:

397

if to is None:

396

to = ''

398

to = ''

397

if tn is None:

399

if tn is None:

398

tn = ''

400

tn = ''

399

401

400

if to == tn:

402

if to == tn:

401

return ''

403

return ''

402

404

403

# TODO: deltas

405

# TODO: deltas

404

ret = []

406

ret = []

405

ret.append('GIT binary patch\n')

407

ret.append('GIT binary patch\n')

406

ret.append('literal %s\n' % len(tn))

408

ret.append('literal %s\n' % len(tn))

407

for l in chunk(zlib.compress(tn)):

409

for l in chunk(zlib.compress(tn)):

408

ret.append(fmtline(l))

410

ret.append(fmtline(l))

409

ret.append('\n')

411

ret.append('\n')

410

412

411

return ''.join(ret)

413

return ''.join(ret)

412

414

413

def patchtext(bin):

415

def patchtext(bin):

414

pos = 0

416

pos = 0

415

t = []

417

t = []

416

while pos < len(bin):

418

while pos < len(bin):

417

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

419

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

418

pos += 12

420

pos += 12

419

t.append(bin[pos:pos + l])

421

t.append(bin[pos:pos + l])

420

pos += l

422

pos += l

421

return "".join(t)

423

return "".join(t)

422

424

423

def patch(a, bin):

425

def patch(a, bin):

424

if len(a) == 0:

426

if len(a) == 0:

425

# skip over trivial delta header

427

# skip over trivial delta header

426

return util.buffer(bin, 12)

428

return util.buffer(bin, 12)

427

return mpatch.patches(a, [bin])

429

return mpatch.patches(a, [bin])

428

430

429

# similar to difflib.SequenceMatcher.get_matching_blocks

431

# similar to difflib.SequenceMatcher.get_matching_blocks

430

def get_matching_blocks(a, b):

432

def get_matching_blocks(a, b):

431

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

433

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

432

434

433

def trivialdiffheader(length):

435

def trivialdiffheader(length):

434

return struct.pack(">lll", 0, 0, length) if length else ''

436

return struct.pack(">lll", 0, 0, length) if length else ''

435

437

436

def replacediffheader(oldlen, newlen):

438

def replacediffheader(oldlen, newlen):

437

return struct.pack(">lll", 0, oldlen, newlen)

439

return struct.pack(">lll", 0, oldlen, newlen)

438

440

439

patches = mpatch.patches

441

patches = mpatch.patches

440

patchedsize = mpatch.patchedsize

442

patchedsize = mpatch.patchedsize

441

textdiff = bdiff.bdiff

443

textdiff = bdiff.bdiff

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import re
             import struct
             import zlib
             from .i18n import _
             from . import (
                 base85,
                 bdiff,
                 error,
                 mpatch,
                 util,
             )
             def splitnewlines(text):
                 '''like str.splitlines, but only split on newlines.'''
                 lines = [l + '\n' for l in text.split('\n')]
                 if lines:
                     if lines[-1] == '\n':
                         lines.pop()
                     else:
                         lines[-1] = lines[-1][:-1]
                 return lines
             class diffopts(object):
                 '''context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 nobinary ignores binary files
                 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 '''
                 defaults = {
                     'context': 3,
                     'text': False,
                     'showfunc': False,
                     'git': False,
                     'nodates': False,
                     'nobinary': False,
                     'noprefix': False,
                     'index': 0,
                     'ignorews': False,
                     'ignorewsamount': False,
                     'ignoreblanklines': False,
                     'upgrade': False,
                     'showsimilarity': False,
                     }
                 def __init__(self, **opts):
                     for k in self.defaults.keys():
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, k, v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise error.Abort(_('diff context lines count must be '
                                            'an integer, not %r') % self.context)
                 def copy(self, **kwargs):
                     opts = dict((k, getattr(self, k)) for k in self.defaults)
                     opts.update(kwargs)
                     return diffopts(**opts)
             defaultopts = diffopts()
             def wsclean(opts, text, blank=True):
                 if opts.ignorews:
                     text = bdiff.fixws(text, 1)
                 elif opts.ignorewsamount:
                     text = bdiff.fixws(text, 0)
                 if blank and opts.ignoreblanklines:
                     text = re.sub('\n+', '\n', text).strip('\n')
                 return text
             def splitblock(base1, lines1, base2, lines2, opts):
                 # The input lines matches except for interwoven blank lines. We
                 # transform it into a sequence of matching blocks and blank blocks.
                 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
                 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
                 s1, e1 = 0, len(lines1)
                 s2, e2 = 0, len(lines2)
                 while s1 < e1 or s2 < e2:
                     i1, i2, btype = s1, s2, '='
                     if (i1 >= e1 or lines1[i1] == 0
                         or i2 >= e2 or lines2[i2] == 0):
                         # Consume the block of blank lines
                         btype = '~'
                         while i1 < e1 and lines1[i1] == 0:
                             i1 += 1
                         while i2 < e2 and lines2[i2] == 0:
                             i2 += 1
                     else:
                         # Consume the matching lines
                         while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
                             i1 += 1
                             i2 += 1
                     yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
                     s1 = i1
                     s2 = i2
             def blocksinrange(blocks, rangeb):
                 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
                 `rangeb` from ``(b1, b2)`` point of view.
                 Return `filteredblocks, rangea` where:
                 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
                   `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
                   block ``(b1, b2)`` being inside `rangeb` if
                   ``rangeb[0] < b2 and b1 < rangeb[1]``;
                 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
                 """
                 lbb, ubb = rangeb
                 lba, uba = None, None
                 filteredblocks = []
                 for block in blocks:
                     (a1, a2, b1, b2), stype = block
                     if lbb >= b1 and ubb <= b2 and stype == '=':
                         # rangeb is within a single "=" hunk, restrict back linerange1
                         # by offsetting rangeb
                         lba = lbb - b1 + a1
                         uba = ubb - b1 + a1
                     else:
                         if b1 <= lbb < b2:
                             if stype == '=':
                                 lba = a2 - (b2 - lbb)
                             else:
                                 lba = a1
                         if b1 < ubb <= b2:
                             if stype == '=':
                                 uba = a1 + (ubb - b1)
                             else:
                                 uba = a2
                     if lbb < b2 and b1 < ubb:
                         filteredblocks.append(block)
                 if lba is None or uba is None or uba < lba:
                     raise error.Abort(_('line range exceeds file size'))
                 return filteredblocks, (lba, uba)
             def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
                 """Return (block, type) tuples, where block is an mdiff.blocks
                 line entry. type is '=' for blocks matching exactly one another
                 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
                 matching only after having filtered blank lines.
                 line1 and line2 are text1 and text2 split with splitnewlines() if
                 they are already available.
                 """
                 if opts is None:
                     opts = defaultopts
                 if opts.ignorews or opts.ignorewsamount:
                     text1 = wsclean(opts, text1, False)
                     text2 = wsclean(opts, text2, False)
                 diff = bdiff.blocks(text1, text2)
                 for i, s1 in enumerate(diff):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i - 1]
                     else:
                         s = [0, 0, 0, 0]
                     s = [s[1], s1[0], s[3], s1[2]]
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if s[0] != s[1] or s[2] != s[3]:
                         type = '!'
                         if opts.ignoreblanklines:
                             if lines1 is None:
                                 lines1 = splitnewlines(text1)
                             if lines2 is None:
                                 lines2 = splitnewlines(text2)
                             old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
                             new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
                             if old == new:
                                 type = '~'
                         yield s, type
                     yield s1, '='
             def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts):
                 """Return a unified diff as a (headers, hunkstext) tuple.
                 If the diff is not null, `headers` is a list with unified diff header
                 lines "--- <original>" and "+++ <new>" and `hunkstext` is a string
                 containing diff hunks. Otherwise, both `headers` and `hunkstext` are
                 empty.
                 """
                 def datetag(date, fn=None):
                     if not opts.git and not opts.nodates:
                         return '\t%s' % date
                     if fn and ' ' in fn:
                         return '\t'
                     return ''
                 sentinel = [], ""
                 if not a and not b:
                     return sentinel
                 if opts.noprefix:
                     aprefix = bprefix = ''
                 else:
                     aprefix = 'a/'
                     bprefix = 'b/'
                 epoch = util.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
+                def checknonewline(lines):
+                    for text in lines:
+                        if text[-1] != '\n':
+                            text += "\n\ No newline at end of file\n"
+                        yield text
                 if not opts.text and (util.binary(a) or util.binary(b)):
                     if a and b and len(a) == len(b) and a == b:
                         return sentinel
                     headerlines = []
                     l = ['Binary file %s has changed\n' % fn1]
                 elif not a:
                     b = splitnewlines(b)
                     if a is None:
                         l1 = '--- /dev/null%s' % datetag(epoch)
                     else:
                         l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     l2 = "+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
                     headerlines = [l1, l2]
                     l = ["@@ -0,0 +1,%d @@\n" % len(b)] + ["+" + e for e in b]
                 elif not b:
                     a = splitnewlines(a)
                     l1 = "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     if b is None:
                         l2 = '+++ /dev/null%s' % datetag(epoch)
                     else:
                         l2 = "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
                     headerlines = [l1, l2]
                     l = ["@@ -1,%d +0,0 @@\n" % len(a)] + ["-" + e for e in a]
                 else:
                     l = sum((hlines for hrange, hlines in _unidiff(a, b, opts=opts)), [])
                     if not l:
                         return sentinel
                     headerlines = [
                         "--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
                         "+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
                     ]
-                for ln in xrange(len(l)):
+                return headerlines, "".join(checknonewline(l))
-                    if l[ln][-1] != '\n':
-                        l[ln] += "\n\ No newline at end of file\n"
-                return headerlines, "".join(l)
             def _unidiff(t1, t2, opts=defaultopts):
                 """Yield hunks of a headerless unified diff from t1 and t2 texts.
                 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
                 tuple (s1, l1, s2, l2) representing the range information of the hunk to
                 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
                 of the hunk combining said header followed by line additions and
                 deletions.
                 """
                 l1 = splitnewlines(t1)
                 l2 = splitnewlines(t2)
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, '']
                 def yieldhunk(hunk):
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = ""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in xrange(astart - 1, lastpos - 1, -1):
                             if l1[i][0].isalnum():
                                 func = ' ' + l1[i].rstrip()[:40]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
                     hunkrange = astart, alen, bstart, blen
                     hunklines = (
                         ["@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
                         + delta
                         + [' ' + l1[x] for x in xrange(a2, aend)]
                     )
                     yield hunkrange, hunklines
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 hunk = None
                 ignoredlines = 0
                 for s, stype in allblocks(t1, t2, opts, l1, l2):
                     a1, a2, b1, b2 = s
                     if stype != '!':
                         if stype == '~':
                             # The diff context lines are based on t1 content. When
                             # blank lines are ignored, the new lines offsets must
                             # be adjusted as if equivalent blocks ('~') had the
                             # same sizes on both sides.
                             ignoredlines += (b2 - b1) - (a2 - a1)
                         continue
                     delta = []
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     b1 -= ignoredlines
                     b2 -= ignoredlines
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [astart, a2, bstart, b2, delta]
                     delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
                     delta[len(delta):] = ['-' + x for x in old]
                     delta[len(delta):] = ['+' + x for x in new]
                 if hunk:
                     for x in yieldhunk(hunk):
                         yield x
             def b85diff(to, tn):
                 '''print base85-encoded binary diff'''
                 def fmtline(line):
                     l = len(line)
                     if l <= 26:
                         l = chr(ord('A') + l - 1)
                     else:
                         l = chr(l - 26 + ord('a') - 1)
                     return '%c%s\n' % (l, base85.b85encode(line, True))
                 def chunk(text, csize=52):
                     l = len(text)
                     i = 0
                     while i < l:
                         yield text[i:i + csize]
                         i += csize
                 if to is None:
                     to = ''
                 if tn is None:
                     tn = ''
                 if to == tn:
                     return ''
                 # TODO: deltas
                 ret = []
                 ret.append('GIT binary patch\n')
                 ret.append('literal %s\n' % len(tn))
                 for l in chunk(zlib.compress(tn)):
                     ret.append(fmtline(l))
                 ret.append('\n')
                 return ''.join(ret)
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
                     pos += 12
                     t.append(bin[pos:pos + l])
                     pos += l
                 return "".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return util.buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
             def get_matching_blocks(a, b):
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
             def trivialdiffheader(length):
                 return struct.pack(">lll", 0, 0, length) if length else ''
             def replacediffheader(oldlen, newlen):
                 return struct.pack(">lll", 0, oldlen, newlen)
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff