upstream/mercurial-mirror Commit - r50792:a78dfb1a

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

9

import re

9

import re

10

import struct

10

import struct

11

import zlib

11

import zlib

12

13

from .i18n import _

13

from .i18n import _

14

from .pycompat import (

14

from .pycompat import (

15

getattr,

15

getattr,

16

setattr,

16

setattr,

17

)

17

)

18

from . import (

18

from . import (

19

diffhelper,

19

diffhelper,

20

encoding,

20

encoding,

21

error,

21

error,

22

policy,

22

policy,

23

pycompat,

23

pycompat,

24

util,

24

util,

25

)

25

)

26

from .utils import dateutil

26

from .utils import dateutil

27

28

bdiff = policy.importmod('bdiff')

28

bdiff = policy.importmod('bdiff')

29

mpatch = policy.importmod('mpatch')

29

mpatch = policy.importmod('mpatch')

30

31

blocks = bdiff.blocks

31

blocks = bdiff.blocks

32

fixws = bdiff.fixws

32

fixws = bdiff.fixws

33

patches = mpatch.patches

33

patches = mpatch.patches

34

patchedsize = mpatch.patchedsize

34

patchedsize = mpatch.patchedsize

35

textdiff = bdiff.bdiff

35

textdiff = bdiff.bdiff

36

splitnewlines = bdiff.splitnewlines

36

splitnewlines = bdiff.splitnewlines

37

38

39

# TODO: this looks like it could be an attrs, which might help pytype

39

# TODO: this looks like it could be an attrs, which might help pytype

40

class diffopts:

40

class diffopts:

41

"""context is the number of context lines

41

"""context is the number of context lines

42

text treats all files as text

42

text treats all files as text

43

showfunc enables diff -p output

43

showfunc enables diff -p output

44

git enables the git extended patch format

44

git enables the git extended patch format

45

nodates removes dates from diff headers

45

nodates removes dates from diff headers

46

nobinary ignores binary files

46

nobinary ignores binary files

47

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

47

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

48

ignorews ignores all whitespace changes in the diff

48

ignorews ignores all whitespace changes in the diff

49

ignorewsamount ignores changes in the amount of whitespace

49

ignorewsamount ignores changes in the amount of whitespace

50

ignoreblanklines ignores changes whose lines are all blank

50

ignoreblanklines ignores changes whose lines are all blank

51

upgrade generates git diffs to avoid data loss

51

upgrade generates git diffs to avoid data loss

52

"""

52

"""

53

54

_HAS_DYNAMIC_ATTRIBUTES = True

54

_HAS_DYNAMIC_ATTRIBUTES = True

55

56

defaults = {

56

defaults = {

57

b'context': 3,

57

b'context': 3,

58

b'text': False,

58

b'text': False,

59

b'showfunc': False,

59

b'showfunc': False,

60

b'git': False,

60

b'git': False,

61

b'nodates': False,

61

b'nodates': False,

62

b'nobinary': False,

62

b'nobinary': False,

63

b'noprefix': False,

63

b'noprefix': False,

64

b'index': 0,

64

b'index': 0,

65

b'ignorews': False,

65

b'ignorews': False,

66

b'ignorewsamount': False,

66

b'ignorewsamount': False,

67

b'ignorewseol': False,

67

b'ignorewseol': False,

68

b'ignoreblanklines': False,

68

b'ignoreblanklines': False,

69

b'upgrade': False,

69

b'upgrade': False,

70

b'showsimilarity': False,

70

b'showsimilarity': False,

71

b'worddiff': False,

71

b'worddiff': False,

72

b'xdiff': False,

72

b'xdiff': False,

73

}

73

}

74

75

def __init__(self, **opts):

75

def __init__(self, **opts):

76

opts = pycompat.byteskwargs(opts)

76

opts = pycompat.byteskwargs(opts)

77

for k in self.defaults.keys():

77

for k in self.defaults.keys():

78

v = opts.get(k)

78

v = opts.get(k)

79

if v is None:

79

if v is None:

80

v = self.defaults[k]

80

v = self.defaults[k]

81

setattr(self, k, v)

81

setattr(self, k, v)

82

83

try:

83

try:

84

self.context = int(self.context)

84

self.context = int(self.context)

85

except ValueError:

85

except ValueError:

86

raise error.InputError(

86

raise error.InputError(

87

_(b'diff context lines count must be an integer, not %r')

87

_(b'diff context lines count must be an integer, not %r')

88

% pycompat.bytestr(self.context)

88

% pycompat.bytestr(self.context)

89

)

89

)

90

91

def copy(self, **kwargs):

91

def copy(self, **kwargs):

92

opts = {k: getattr(self, k) for k in self.defaults}

92

opts = {k: getattr(self, k) for k in self.defaults}

93

opts = pycompat.strkwargs(opts)

93

opts = pycompat.strkwargs(opts)

94

opts.update(kwargs)

94

opts.update(kwargs)

95

return diffopts(**opts)

95

return diffopts(**opts)

96

97

def __bytes__(self):

98

return b", ".join(

99

b"%s: %r" % (k, getattr(self, k)) for k in self.defaults

100

)

101

102

__str__ = encoding.strmethod(__bytes__)

103

97

104

98

defaultopts = diffopts()

105

defaultopts = diffopts()

99

106

100

107

101

def wsclean(opts, text, blank=True):

108

def wsclean(opts, text, blank=True):

102

if opts.ignorews:

109

if opts.ignorews:

103

text = bdiff.fixws(text, 1)

110

text = bdiff.fixws(text, 1)

104

elif opts.ignorewsamount:

111

elif opts.ignorewsamount:

105

text = bdiff.fixws(text, 0)

112

text = bdiff.fixws(text, 0)

106

if blank and opts.ignoreblanklines:

113

if blank and opts.ignoreblanklines:

107

text = re.sub(b'\n+', b'\n', text).strip(b'\n')

114

text = re.sub(b'\n+', b'\n', text).strip(b'\n')

108

if opts.ignorewseol:

115

if opts.ignorewseol:

109

text = re.sub(br'[ \t\r\f]+\n', br'\n', text)

116

text = re.sub(br'[ \t\r\f]+\n', br'\n', text)

110

return text

117

return text

111

118

112

119

113

def splitblock(base1, lines1, base2, lines2, opts):

120

def splitblock(base1, lines1, base2, lines2, opts):

114

# The input lines matches except for interwoven blank lines. We

121

# The input lines matches except for interwoven blank lines. We

115

# transform it into a sequence of matching blocks and blank blocks.

122

# transform it into a sequence of matching blocks and blank blocks.

116

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

123

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

117

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

124

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

118

s1, e1 = 0, len(lines1)

125

s1, e1 = 0, len(lines1)

119

s2, e2 = 0, len(lines2)

126

s2, e2 = 0, len(lines2)

120

while s1 < e1 or s2 < e2:

127

while s1 < e1 or s2 < e2:

121

i1, i2, btype = s1, s2, b'='

128

i1, i2, btype = s1, s2, b'='

122

if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:

129

if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:

123

# Consume the block of blank lines

130

# Consume the block of blank lines

124

btype = b'~'

131

btype = b'~'

125

while i1 < e1 and lines1[i1] == 0:

132

while i1 < e1 and lines1[i1] == 0:

126

i1 += 1

133

i1 += 1

127

while i2 < e2 and lines2[i2] == 0:

134

while i2 < e2 and lines2[i2] == 0:

128

i2 += 1

135

i2 += 1

129

else:

136

else:

130

# Consume the matching lines

137

# Consume the matching lines

131

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

138

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

132

i1 += 1

139

i1 += 1

133

i2 += 1

140

i2 += 1

134

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

141

yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype

135

s1 = i1

142

s1 = i1

136

s2 = i2

143

s2 = i2

137

144

138

145

139

def hunkinrange(hunk, linerange):

146

def hunkinrange(hunk, linerange):

140

"""Return True if `hunk` defined as (start, length) is in `linerange`

147

"""Return True if `hunk` defined as (start, length) is in `linerange`

141

defined as (lowerbound, upperbound).

148

defined as (lowerbound, upperbound).

142

149

143

>>> hunkinrange((5, 10), (2, 7))

150

>>> hunkinrange((5, 10), (2, 7))

144

True

151

True

145

>>> hunkinrange((5, 10), (6, 12))

152

>>> hunkinrange((5, 10), (6, 12))

146

True

153

True

147

>>> hunkinrange((5, 10), (13, 17))

154

>>> hunkinrange((5, 10), (13, 17))

148

True

155

True

149

>>> hunkinrange((5, 10), (3, 17))

156

>>> hunkinrange((5, 10), (3, 17))

150

True

157

True

151

>>> hunkinrange((5, 10), (1, 3))

158

>>> hunkinrange((5, 10), (1, 3))

152

False

159

False

153

>>> hunkinrange((5, 10), (18, 20))

160

>>> hunkinrange((5, 10), (18, 20))

154

False

161

False

155

>>> hunkinrange((5, 10), (1, 5))

162

>>> hunkinrange((5, 10), (1, 5))

156

False

163

False

157

>>> hunkinrange((5, 10), (15, 27))

164

>>> hunkinrange((5, 10), (15, 27))

158

False

165

False

159

"""

166

"""

160

start, length = hunk

167

start, length = hunk

161

lowerbound, upperbound = linerange

168

lowerbound, upperbound = linerange

162

return lowerbound < start + length and start < upperbound

169

return lowerbound < start + length and start < upperbound

163

170

164

171

165

def blocksinrange(blocks, rangeb):

172

def blocksinrange(blocks, rangeb):

166

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

173

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

167

`rangeb` from ``(b1, b2)`` point of view.

174

`rangeb` from ``(b1, b2)`` point of view.

168

175

169

Return `filteredblocks, rangea` where:

176

Return `filteredblocks, rangea` where:

170

177

171

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

178

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

172

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

179

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

173

block ``(b1, b2)`` being inside `rangeb` if

180

block ``(b1, b2)`` being inside `rangeb` if

174

``rangeb[0] < b2 and b1 < rangeb[1]``;

181

``rangeb[0] < b2 and b1 < rangeb[1]``;

175

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

182

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

176

"""

183

"""

177

lbb, ubb = rangeb

184

lbb, ubb = rangeb

178

lba, uba = None, None

185

lba, uba = None, None

179

filteredblocks = []

186

filteredblocks = []

180

for block in blocks:

187

for block in blocks:

181

(a1, a2, b1, b2), stype = block

188

(a1, a2, b1, b2), stype = block

182

if lbb >= b1 and ubb <= b2 and stype == b'=':

189

if lbb >= b1 and ubb <= b2 and stype == b'=':

183

# rangeb is within a single "=" hunk, restrict back linerange1

190

# rangeb is within a single "=" hunk, restrict back linerange1

184

# by offsetting rangeb

191

# by offsetting rangeb

185

lba = lbb - b1 + a1

192

lba = lbb - b1 + a1

186

uba = ubb - b1 + a1

193

uba = ubb - b1 + a1

187

else:

194

else:

188

if b1 <= lbb < b2:

195

if b1 <= lbb < b2:

189

if stype == b'=':

196

if stype == b'=':

190

lba = a2 - (b2 - lbb)

197

lba = a2 - (b2 - lbb)

191

else:

198

else:

192

lba = a1

199

lba = a1

193

if b1 < ubb <= b2:

200

if b1 < ubb <= b2:

194

if stype == b'=':

201

if stype == b'=':

195

uba = a1 + (ubb - b1)

202

uba = a1 + (ubb - b1)

196

else:

203

else:

197

uba = a2

204

uba = a2

198

if hunkinrange((b1, (b2 - b1)), rangeb):

205

if hunkinrange((b1, (b2 - b1)), rangeb):

199

filteredblocks.append(block)

206

filteredblocks.append(block)

200

if lba is None or uba is None or uba < lba:

207

if lba is None or uba is None or uba < lba:

201

raise error.InputError(_(b'line range exceeds file size'))

208

raise error.InputError(_(b'line range exceeds file size'))

202

return filteredblocks, (lba, uba)

209

return filteredblocks, (lba, uba)

203

210

204

211

205

def chooseblocksfunc(opts=None):

212

def chooseblocksfunc(opts=None):

206

if (

213

if (

207

opts is None

214

opts is None

208

or not opts.xdiff

215

or not opts.xdiff

209

or not util.safehasattr(bdiff, b'xdiffblocks')

216

or not util.safehasattr(bdiff, b'xdiffblocks')

210

):

217

):

211

return bdiff.blocks

218

return bdiff.blocks

212

else:

219

else:

213

return bdiff.xdiffblocks

220

return bdiff.xdiffblocks

214

221

215

222

216

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

223

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

217

"""Return (block, type) tuples, where block is an mdiff.blocks

224

"""Return (block, type) tuples, where block is an mdiff.blocks

218

line entry. type is '=' for blocks matching exactly one another

225

line entry. type is '=' for blocks matching exactly one another

219

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

226

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

220

matching only after having filtered blank lines.

227

matching only after having filtered blank lines.

221

line1 and line2 are text1 and text2 split with splitnewlines() if

228

line1 and line2 are text1 and text2 split with splitnewlines() if

222

they are already available.

229

they are already available.

223

"""

230

"""

224

if opts is None:

231

if opts is None:

225

opts = defaultopts

232

opts = defaultopts

226

if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:

233

if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:

227

text1 = wsclean(opts, text1, False)

234

text1 = wsclean(opts, text1, False)

228

text2 = wsclean(opts, text2, False)

235

text2 = wsclean(opts, text2, False)

229

diff = chooseblocksfunc(opts)(text1, text2)

236

diff = chooseblocksfunc(opts)(text1, text2)

230

for i, s1 in enumerate(diff):

237

for i, s1 in enumerate(diff):

231

# The first match is special.

238

# The first match is special.

232

# we've either found a match starting at line 0 or a match later

239

# we've either found a match starting at line 0 or a match later

233

# in the file. If it starts later, old and new below will both be

240

# in the file. If it starts later, old and new below will both be

234

# empty and we'll continue to the next match.

241

# empty and we'll continue to the next match.

235

if i > 0:

242

if i > 0:

236

s = diff[i - 1]

243

s = diff[i - 1]

237

else:

244

else:

238

s = [0, 0, 0, 0]

245

s = [0, 0, 0, 0]

239

s = [s[1], s1[0], s[3], s1[2]]

246

s = [s[1], s1[0], s[3], s1[2]]

240

247

241

# bdiff sometimes gives huge matches past eof, this check eats them,

248

# bdiff sometimes gives huge matches past eof, this check eats them,

242

# and deals with the special first match case described above

249

# and deals with the special first match case described above

243

if s[0] != s[1] or s[2] != s[3]:

250

if s[0] != s[1] or s[2] != s[3]:

244

type = b'!'

251

type = b'!'

245

if opts.ignoreblanklines:

252

if opts.ignoreblanklines:

246

if lines1 is None:

253

if lines1 is None:

247

lines1 = splitnewlines(text1)

254

lines1 = splitnewlines(text1)

248

if lines2 is None:

255

if lines2 is None:

249

lines2 = splitnewlines(text2)

256

lines2 = splitnewlines(text2)

250

old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))

257

old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))

251

new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))

258

new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))

252

if old == new:

259

if old == new:

253

type = b'~'

260

type = b'~'

254

yield s, type

261

yield s, type

255

yield s1, b'='

262

yield s1, b'='

256

263

257

264

258

def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):

265

def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):

259

"""Return a unified diff as a (headers, hunks) tuple.

266

"""Return a unified diff as a (headers, hunks) tuple.

260

267

261

If the diff is not null, `headers` is a list with unified diff header

268

If the diff is not null, `headers` is a list with unified diff header

262

lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding

269

lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding

263

(hunkrange, hunklines) coming from _unidiff().

270

(hunkrange, hunklines) coming from _unidiff().

264

Otherwise, `headers` and `hunks` are empty.

271

Otherwise, `headers` and `hunks` are empty.

265

272

266

Set binary=True if either a or b should be taken as a binary file.

273

Set binary=True if either a or b should be taken as a binary file.

267

"""

274

"""

268

275

269

def datetag(date, fn=None):

276

def datetag(date, fn=None):

270

if not opts.git and not opts.nodates:

277

if not opts.git and not opts.nodates:

271

return b'\t%s' % date

278

return b'\t%s' % date

272

if fn and b' ' in fn:

279

if fn and b' ' in fn:

273

return b'\t'

280

return b'\t'

274

return b''

281

return b''

275

282

276

sentinel = [], ()

283

sentinel = [], ()

277

if not a and not b:

284

if not a and not b:

278

return sentinel

285

return sentinel

279

286

280

if opts.noprefix:

287

if opts.noprefix:

281

aprefix = bprefix = b''

288

aprefix = bprefix = b''

282

else:

289

else:

283

aprefix = b'a/'

290

aprefix = b'a/'

284

bprefix = b'b/'

291

bprefix = b'b/'

285

292

286

epoch = dateutil.datestr((0, 0))

293

epoch = dateutil.datestr((0, 0))

287

294

288

fn1 = util.pconvert(fn1)

295

fn1 = util.pconvert(fn1)

289

fn2 = util.pconvert(fn2)

296

fn2 = util.pconvert(fn2)

290

297

291

if binary:

298

if binary:

292

if a and b and len(a) == len(b) and a == b:

299

if a and b and len(a) == len(b) and a == b:

293

return sentinel

300

return sentinel

294

headerlines = []

301

headerlines = []

295

hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)

302

hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)

296

elif not a:

303

elif not a:

297

without_newline = not b.endswith(b'\n')

304

without_newline = not b.endswith(b'\n')

298

b = splitnewlines(b)

305

b = splitnewlines(b)

299

if a is None:

306

if a is None:

300

l1 = b'--- /dev/null%s' % datetag(epoch)

307

l1 = b'--- /dev/null%s' % datetag(epoch)

301

else:

308

else:

302

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

309

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

303

l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

310

l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

304

headerlines = [l1, l2]

311

headerlines = [l1, l2]

305

size = len(b)

312

size = len(b)

306

hunkrange = (0, 0, 1, size)

313

hunkrange = (0, 0, 1, size)

307

hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]

314

hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]

308

if without_newline:

315

if without_newline:

309

hunklines[-1] += b'\n'

316

hunklines[-1] += b'\n'

310

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

317

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

311

hunks = ((hunkrange, hunklines),)

318

hunks = ((hunkrange, hunklines),)

312

elif not b:

319

elif not b:

313

without_newline = not a.endswith(b'\n')

320

without_newline = not a.endswith(b'\n')

314

a = splitnewlines(a)

321

a = splitnewlines(a)

315

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

322

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

316

if b is None:

323

if b is None:

317

l2 = b'+++ /dev/null%s' % datetag(epoch)

324

l2 = b'+++ /dev/null%s' % datetag(epoch)

318

else:

325

else:

319

l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

326

l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

320

headerlines = [l1, l2]

327

headerlines = [l1, l2]

321

size = len(a)

328

size = len(a)

322

hunkrange = (1, size, 0, 0)

329

hunkrange = (1, size, 0, 0)

323

hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]

330

hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]

324

if without_newline:

331

if without_newline:

325

hunklines[-1] += b'\n'

332

hunklines[-1] += b'\n'

326

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

333

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

327

hunks = ((hunkrange, hunklines),)

334

hunks = ((hunkrange, hunklines),)

328

else:

335

else:

329

hunks = _unidiff(a, b, opts=opts)

336

hunks = _unidiff(a, b, opts=opts)

330

if not next(hunks):

337

if not next(hunks):

331

return sentinel

338

return sentinel

332

339

333

headerlines = [

340

headerlines = [

334

b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),

341

b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),

335

b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),

342

b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),

336

]

343

]

337

344

338

return headerlines, hunks

345

return headerlines, hunks

339

346

340

347

341

def _unidiff(t1, t2, opts=defaultopts):

348

def _unidiff(t1, t2, opts=defaultopts):

342

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

349

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

343

350

344

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

351

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

345

tuple (s1, l1, s2, l2) representing the range information of the hunk to

352

tuple (s1, l1, s2, l2) representing the range information of the hunk to

346

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

353

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

347

of the hunk combining said header followed by line additions and

354

of the hunk combining said header followed by line additions and

348

deletions.

355

deletions.

349

356

350

The hunks are prefixed with a bool.

357

The hunks are prefixed with a bool.

351

"""

358

"""

352

l1 = splitnewlines(t1)

359

l1 = splitnewlines(t1)

353

l2 = splitnewlines(t2)

360

l2 = splitnewlines(t2)

354

361

355

def contextend(l, len):

362

def contextend(l, len):

356

ret = l + opts.context

363

ret = l + opts.context

357

if ret > len:

364

if ret > len:

358

ret = len

365

ret = len

359

return ret

366

return ret

360

367

361

def contextstart(l):

368

def contextstart(l):

362

ret = l - opts.context

369

ret = l - opts.context

363

if ret < 0:

370

if ret < 0:

364

return 0

371

return 0

365

return ret

372

return ret

366

373

367

lastfunc = [0, b'']

374

lastfunc = [0, b'']

368

375

369

def yieldhunk(hunk):

376

def yieldhunk(hunk):

370

(astart, a2, bstart, b2, delta) = hunk

377

(astart, a2, bstart, b2, delta) = hunk

371

aend = contextend(a2, len(l1))

378

aend = contextend(a2, len(l1))

372

alen = aend - astart

379

alen = aend - astart

373

blen = b2 - bstart + aend - a2

380

blen = b2 - bstart + aend - a2

374

381

375

func = b""

382

func = b""

376

if opts.showfunc:

383

if opts.showfunc:

377

lastpos, func = lastfunc

384

lastpos, func = lastfunc

378

# walk backwards from the start of the context up to the start of

385

# walk backwards from the start of the context up to the start of

379

# the previous hunk context until we find a line starting with an

386

# the previous hunk context until we find a line starting with an

380

# alphanumeric char.

387

# alphanumeric char.

381

for i in range(astart - 1, lastpos - 1, -1):

388

for i in range(astart - 1, lastpos - 1, -1):

382

if l1[i][0:1].isalnum():

389

if l1[i][0:1].isalnum():

383

func = b' ' + l1[i].rstrip()

390

func = b' ' + l1[i].rstrip()

384

# split long function name if ASCII. otherwise we have no

391

# split long function name if ASCII. otherwise we have no

385

# idea where the multi-byte boundary is, so just leave it.

392

# idea where the multi-byte boundary is, so just leave it.

386

if encoding.isasciistr(func):

393

if encoding.isasciistr(func):

387

func = func[:41]

394

func = func[:41]

388

lastfunc[1] = func

395

lastfunc[1] = func

389

break

396

break

390

# by recording this hunk's starting point as the next place to

397

# by recording this hunk's starting point as the next place to

391

# start looking for function lines, we avoid reading any line in

398

# start looking for function lines, we avoid reading any line in

392

# the file more than once.

399

# the file more than once.

393

lastfunc[0] = astart

400

lastfunc[0] = astart

394

401

395

# zero-length hunk ranges report their start line as one less

402

# zero-length hunk ranges report their start line as one less

396

if alen:

403

if alen:

397

astart += 1

404

astart += 1

398

if blen:

405

if blen:

399

bstart += 1

406

bstart += 1

400

407

401

hunkrange = astart, alen, bstart, blen

408

hunkrange = astart, alen, bstart, blen

402

hunklines = (

409

hunklines = (

403

[b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

410

[b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

404

+ delta

411

+ delta

405

+ [b' ' + l1[x] for x in range(a2, aend)]

412

+ [b' ' + l1[x] for x in range(a2, aend)]

406

)

413

)

407

# If either file ends without a newline and the last line of

414

# If either file ends without a newline and the last line of

408

# that file is part of a hunk, a marker is printed. If the

415

# that file is part of a hunk, a marker is printed. If the

409

# last line of both files is identical and neither ends in

416

# last line of both files is identical and neither ends in

410

# a newline, print only one marker. That's the only case in

417

# a newline, print only one marker. That's the only case in

411

# which the hunk can end in a shared line without a newline.

418

# which the hunk can end in a shared line without a newline.

412

skip = False

419

skip = False

413

if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:

420

if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:

414

for i in range(len(hunklines) - 1, -1, -1):

421

for i in range(len(hunklines) - 1, -1, -1):

415

if hunklines[i].startswith((b'-', b' ')):

422

if hunklines[i].startswith((b'-', b' ')):

416

if hunklines[i].startswith(b' '):

423

if hunklines[i].startswith(b' '):

417

skip = True

424

skip = True

418

hunklines[i] += b'\n'

425

hunklines[i] += b'\n'

419

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

426

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

420

break

427

break

421

if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:

428

if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:

422

for i in range(len(hunklines) - 1, -1, -1):

429

for i in range(len(hunklines) - 1, -1, -1):

423

if hunklines[i].startswith(b'+'):

430

if hunklines[i].startswith(b'+'):

424

hunklines[i] += b'\n'

431

hunklines[i] += b'\n'

425

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

432

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

426

break

433

break

427

yield hunkrange, hunklines

434

yield hunkrange, hunklines

428

435

429

# bdiff.blocks gives us the matching sequences in the files. The loop

436

# bdiff.blocks gives us the matching sequences in the files. The loop

430

# below finds the spaces between those matching sequences and translates

437

# below finds the spaces between those matching sequences and translates

431

# them into diff output.

438

# them into diff output.

432

#

439

#

433

hunk = None

440

hunk = None

434

ignoredlines = 0

441

ignoredlines = 0

435

has_hunks = False

442

has_hunks = False

436

for s, stype in allblocks(t1, t2, opts, l1, l2):

443

for s, stype in allblocks(t1, t2, opts, l1, l2):

437

a1, a2, b1, b2 = s

444

a1, a2, b1, b2 = s

438

if stype != b'!':

445

if stype != b'!':

439

if stype == b'~':

446

if stype == b'~':

440

# The diff context lines are based on t1 content. When

447

# The diff context lines are based on t1 content. When

441

# blank lines are ignored, the new lines offsets must

448

# blank lines are ignored, the new lines offsets must

442

# be adjusted as if equivalent blocks ('~') had the

449

# be adjusted as if equivalent blocks ('~') had the

443

# same sizes on both sides.

450

# same sizes on both sides.

444

ignoredlines += (b2 - b1) - (a2 - a1)

451

ignoredlines += (b2 - b1) - (a2 - a1)

445

continue

452

continue

446

delta = []

453

delta = []

447

old = l1[a1:a2]

454

old = l1[a1:a2]

448

new = l2[b1:b2]

455

new = l2[b1:b2]

449

456

450

b1 -= ignoredlines

457

b1 -= ignoredlines

451

b2 -= ignoredlines

458

b2 -= ignoredlines

452

astart = contextstart(a1)

459

astart = contextstart(a1)

453

bstart = contextstart(b1)

460

bstart = contextstart(b1)

454

prev = None

461

prev = None

455

if hunk:

462

if hunk:

456

# join with the previous hunk if it falls inside the context

463

# join with the previous hunk if it falls inside the context

457

if astart < hunk[1] + opts.context + 1:

464

if astart < hunk[1] + opts.context + 1:

458

prev = hunk

465

prev = hunk

459

astart = hunk[1]

466

astart = hunk[1]

460

bstart = hunk[3]

467

bstart = hunk[3]

461

else:

468

else:

462

if not has_hunks:

469

if not has_hunks:

463

has_hunks = True

470

has_hunks = True

464

yield True

471

yield True

465

for x in yieldhunk(hunk):

472

for x in yieldhunk(hunk):

466

yield x

473

yield x

467

if prev:

474

if prev:

468

# we've joined the previous hunk, record the new ending points.

475

# we've joined the previous hunk, record the new ending points.

469

hunk[1] = a2

476

hunk[1] = a2

470

hunk[3] = b2

477

hunk[3] = b2

471

delta = hunk[4]

478

delta = hunk[4]

472

else:

479

else:

473

# create a new hunk

480

# create a new hunk

474

hunk = [astart, a2, bstart, b2, delta]

481

hunk = [astart, a2, bstart, b2, delta]

475

482

476

delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]

483

delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]

477

delta[len(delta) :] = [b'-' + x for x in old]

484

delta[len(delta) :] = [b'-' + x for x in old]

478

delta[len(delta) :] = [b'+' + x for x in new]

485

delta[len(delta) :] = [b'+' + x for x in new]

479

486

480

if hunk:

487

if hunk:

481

if not has_hunks:

488

if not has_hunks:

482

has_hunks = True

489

has_hunks = True

483

yield True

490

yield True

484

for x in yieldhunk(hunk):

491

for x in yieldhunk(hunk):

485

yield x

492

yield x

486

elif not has_hunks:

493

elif not has_hunks:

487

yield False

494

yield False

488

495

489

496

490

def b85diff(to, tn):

497

def b85diff(to, tn):

491

'''print base85-encoded binary diff'''

498

'''print base85-encoded binary diff'''

492

499

493

def fmtline(line):

500

def fmtline(line):

494

l = len(line)

501

l = len(line)

495

if l <= 26:

502

if l <= 26:

496

l = pycompat.bytechr(ord(b'A') + l - 1)

503

l = pycompat.bytechr(ord(b'A') + l - 1)

497

else:

504

else:

498

l = pycompat.bytechr(l - 26 + ord(b'a') - 1)

505

l = pycompat.bytechr(l - 26 + ord(b'a') - 1)

499

return b'%c%s\n' % (l, util.b85encode(line, True))

506

return b'%c%s\n' % (l, util.b85encode(line, True))

500

507

501

def chunk(text, csize=52):

508

def chunk(text, csize=52):

502

l = len(text)

509

l = len(text)

503

i = 0

510

i = 0

504

while i < l:

511

while i < l:

505

yield text[i : i + csize]

512

yield text[i : i + csize]

506

i += csize

513

i += csize

507

514

508

if to is None:

515

if to is None:

509

to = b''

516

to = b''

510

if tn is None:

517

if tn is None:

511

tn = b''

518

tn = b''

512

519

513

if to == tn:

520

if to == tn:

514

return b''

521

return b''

515

522

516

# TODO: deltas

523

# TODO: deltas

517

ret = []

524

ret = []

518

ret.append(b'GIT binary patch\n')

525

ret.append(b'GIT binary patch\n')

519

ret.append(b'literal %d\n' % len(tn))

526

ret.append(b'literal %d\n' % len(tn))

520

for l in chunk(zlib.compress(tn)):

527

for l in chunk(zlib.compress(tn)):

521

ret.append(fmtline(l))

528

ret.append(fmtline(l))

522

ret.append(b'\n')

529

ret.append(b'\n')

523

530

524

return b''.join(ret)

531

return b''.join(ret)

525

532

526

533

527

def patchtext(bin):

534

def patchtext(bin):

528

pos = 0

535

pos = 0

529

t = []

536

t = []

530

while pos < len(bin):

537

while pos < len(bin):

531

p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])

538

p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])

532

pos += 12

539

pos += 12

533

t.append(bin[pos : pos + l])

540

t.append(bin[pos : pos + l])

534

pos += l

541

pos += l

535

return b"".join(t)

542

return b"".join(t)

536

543

537

544

538

def patch(a, bin):

545

def patch(a, bin):

539

if len(a) == 0:

546

if len(a) == 0:

540

# skip over trivial delta header

547

# skip over trivial delta header

541

return util.buffer(bin, 12)

548

return util.buffer(bin, 12)

542

return mpatch.patches(a, [bin])

549

return mpatch.patches(a, [bin])

543

550

544

551

545

# similar to difflib.SequenceMatcher.get_matching_blocks

552

# similar to difflib.SequenceMatcher.get_matching_blocks

546

def get_matching_blocks(a, b):

553

def get_matching_blocks(a, b):

547

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

554

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

548

555

549

556

550

def trivialdiffheader(length):

557

def trivialdiffheader(length):

551

return struct.pack(b">lll", 0, 0, length) if length else b''

558

return struct.pack(b">lll", 0, 0, length) if length else b''

552

559

553

560

554

def replacediffheader(oldlen, newlen):

561

def replacediffheader(oldlen, newlen):

555

return struct.pack(b">lll", 0, oldlen, newlen)

562

return struct.pack(b">lll", 0, oldlen, newlen)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import re
             import struct
             import zlib
             from .i18n import _
             from .pycompat import (
                 getattr,
                 setattr,
             )
             from . import (
                 diffhelper,
                 encoding,
                 error,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import dateutil
             bdiff = policy.importmod('bdiff')
             mpatch = policy.importmod('mpatch')
             blocks = bdiff.blocks
             fixws = bdiff.fixws
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff
             splitnewlines = bdiff.splitnewlines
             # TODO: this looks like it could be an attrs, which might help pytype
             class diffopts:
                 """context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 nobinary ignores binary files
                 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 """
                 _HAS_DYNAMIC_ATTRIBUTES = True
                 defaults = {
                     b'context': 3,
                     b'text': False,
                     b'showfunc': False,
                     b'git': False,
                     b'nodates': False,
                     b'nobinary': False,
                     b'noprefix': False,
                     b'index': 0,
                     b'ignorews': False,
                     b'ignorewsamount': False,
                     b'ignorewseol': False,
                     b'ignoreblanklines': False,
                     b'upgrade': False,
                     b'showsimilarity': False,
                     b'worddiff': False,
                     b'xdiff': False,
                 }
                 def __init__(self, **opts):
                     opts = pycompat.byteskwargs(opts)
                     for k in self.defaults.keys():
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, k, v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise error.InputError(
                             _(b'diff context lines count must be an integer, not %r')
                             % pycompat.bytestr(self.context)
                         )
                 def copy(self, **kwargs):
                     opts = {k: getattr(self, k) for k in self.defaults}
                     opts = pycompat.strkwargs(opts)
                     opts.update(kwargs)
                     return diffopts(**opts)
+                def __bytes__(self):
+                    return b", ".join(
+                        b"%s: %r" % (k, getattr(self, k)) for k in self.defaults
+                    )
+                __str__ = encoding.strmethod(__bytes__)
             defaultopts = diffopts()
             def wsclean(opts, text, blank=True):
                 if opts.ignorews:
                     text = bdiff.fixws(text, 1)
                 elif opts.ignorewsamount:
                     text = bdiff.fixws(text, 0)
                 if blank and opts.ignoreblanklines:
                     text = re.sub(b'\n+', b'\n', text).strip(b'\n')
                 if opts.ignorewseol:
                     text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
                 return text
             def splitblock(base1, lines1, base2, lines2, opts):
                 # The input lines matches except for interwoven blank lines. We
                 # transform it into a sequence of matching blocks and blank blocks.
                 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
                 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
                 s1, e1 = 0, len(lines1)
                 s2, e2 = 0, len(lines2)
                 while s1 < e1 or s2 < e2:
                     i1, i2, btype = s1, s2, b'='
                     if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
                         # Consume the block of blank lines
                         btype = b'~'
                         while i1 < e1 and lines1[i1] == 0:
                             i1 += 1
                         while i2 < e2 and lines2[i2] == 0:
                             i2 += 1
                     else:
                         # Consume the matching lines
                         while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
                             i1 += 1
                             i2 += 1
                     yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
                     s1 = i1
                     s2 = i2
             def hunkinrange(hunk, linerange):
                 """Return True if `hunk` defined as (start, length) is in `linerange`
                 defined as (lowerbound, upperbound).
                 >>> hunkinrange((5, 10), (2, 7))
                 True
                 >>> hunkinrange((5, 10), (6, 12))
                 True
                 >>> hunkinrange((5, 10), (13, 17))
                 True
                 >>> hunkinrange((5, 10), (3, 17))
                 True
                 >>> hunkinrange((5, 10), (1, 3))
                 False
                 >>> hunkinrange((5, 10), (18, 20))
                 False
                 >>> hunkinrange((5, 10), (1, 5))
                 False
                 >>> hunkinrange((5, 10), (15, 27))
                 False
                 """
                 start, length = hunk
                 lowerbound, upperbound = linerange
                 return lowerbound < start + length and start < upperbound
             def blocksinrange(blocks, rangeb):
                 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
                 `rangeb` from ``(b1, b2)`` point of view.
                 Return `filteredblocks, rangea` where:
                 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
                   `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
                   block ``(b1, b2)`` being inside `rangeb` if
                   ``rangeb[0] < b2 and b1 < rangeb[1]``;
                 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
                 """
                 lbb, ubb = rangeb
                 lba, uba = None, None
                 filteredblocks = []
                 for block in blocks:
                     (a1, a2, b1, b2), stype = block
                     if lbb >= b1 and ubb <= b2 and stype == b'=':
                         # rangeb is within a single "=" hunk, restrict back linerange1
                         # by offsetting rangeb
                         lba = lbb - b1 + a1
                         uba = ubb - b1 + a1
                     else:
                         if b1 <= lbb < b2:
                             if stype == b'=':
                                 lba = a2 - (b2 - lbb)
                             else:
                                 lba = a1
                         if b1 < ubb <= b2:
                             if stype == b'=':
                                 uba = a1 + (ubb - b1)
                             else:
                                 uba = a2
                     if hunkinrange((b1, (b2 - b1)), rangeb):
                         filteredblocks.append(block)
                 if lba is None or uba is None or uba < lba:
                     raise error.InputError(_(b'line range exceeds file size'))
                 return filteredblocks, (lba, uba)
             def chooseblocksfunc(opts=None):
                 if (
                     opts is None
                     or not opts.xdiff
                     or not util.safehasattr(bdiff, b'xdiffblocks')
                 ):
                     return bdiff.blocks
                 else:
                     return bdiff.xdiffblocks
             def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
                 """Return (block, type) tuples, where block is an mdiff.blocks
                 line entry. type is '=' for blocks matching exactly one another
                 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
                 matching only after having filtered blank lines.
                 line1 and line2 are text1 and text2 split with splitnewlines() if
                 they are already available.
                 """
                 if opts is None:
                     opts = defaultopts
                 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
                     text1 = wsclean(opts, text1, False)
                     text2 = wsclean(opts, text2, False)
                 diff = chooseblocksfunc(opts)(text1, text2)
                 for i, s1 in enumerate(diff):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i - 1]
                     else:
                         s = [0, 0, 0, 0]
                     s = [s[1], s1[0], s[3], s1[2]]
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if s[0] != s[1] or s[2] != s[3]:
                         type = b'!'
                         if opts.ignoreblanklines:
                             if lines1 is None:
                                 lines1 = splitnewlines(text1)
                             if lines2 is None:
                                 lines2 = splitnewlines(text2)
                             old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
                             new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
                             if old == new:
                                 type = b'~'
                         yield s, type
                     yield s1, b'='
             def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
                 """Return a unified diff as a (headers, hunks) tuple.
                 If the diff is not null, `headers` is a list with unified diff header
                 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
                 (hunkrange, hunklines) coming from _unidiff().
                 Otherwise, `headers` and `hunks` are empty.
                 Set binary=True if either a or b should be taken as a binary file.
                 """
                 def datetag(date, fn=None):
                     if not opts.git and not opts.nodates:
                         return b'\t%s' % date
                     if fn and b' ' in fn:
                         return b'\t'
                     return b''
                 sentinel = [], ()
                 if not a and not b:
                     return sentinel
                 if opts.noprefix:
                     aprefix = bprefix = b''
                 else:
                     aprefix = b'a/'
                     bprefix = b'b/'
                 epoch = dateutil.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
                 if binary:
                     if a and b and len(a) == len(b) and a == b:
                         return sentinel
                     headerlines = []
                     hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
                 elif not a:
                     without_newline = not b.endswith(b'\n')
                     b = splitnewlines(b)
                     if a is None:
                         l1 = b'--- /dev/null%s' % datetag(epoch)
                     else:
                         l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
                     headerlines = [l1, l2]
                     size = len(b)
                     hunkrange = (0, 0, 1, size)
                     hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
                     if without_newline:
                         hunklines[-1] += b'\n'
                         hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
                     hunks = ((hunkrange, hunklines),)
                 elif not b:
                     without_newline = not a.endswith(b'\n')
                     a = splitnewlines(a)
                     l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     if b is None:
                         l2 = b'+++ /dev/null%s' % datetag(epoch)
                     else:
                         l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
                     headerlines = [l1, l2]
                     size = len(a)
                     hunkrange = (1, size, 0, 0)
                     hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
                     if without_newline:
                         hunklines[-1] += b'\n'
                         hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
                     hunks = ((hunkrange, hunklines),)
                 else:
                     hunks = _unidiff(a, b, opts=opts)
                     if not next(hunks):
                         return sentinel
                     headerlines = [
                         b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
                         b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
                     ]
                 return headerlines, hunks
             def _unidiff(t1, t2, opts=defaultopts):
                 """Yield hunks of a headerless unified diff from t1 and t2 texts.
                 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
                 tuple (s1, l1, s2, l2) representing the range information of the hunk to
                 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
                 of the hunk combining said header followed by line additions and
                 deletions.
                 The hunks are prefixed with a bool.
                 """
                 l1 = splitnewlines(t1)
                 l2 = splitnewlines(t2)
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, b'']
                 def yieldhunk(hunk):
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = b""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in range(astart - 1, lastpos - 1, -1):
                             if l1[i][0:1].isalnum():
                                 func = b' ' + l1[i].rstrip()
                                 # split long function name if ASCII. otherwise we have no
                                 # idea where the multi-byte boundary is, so just leave it.
                                 if encoding.isasciistr(func):
                                     func = func[:41]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
                     hunkrange = astart, alen, bstart, blen
                     hunklines = (
                         [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
                         + delta
                         + [b' ' + l1[x] for x in range(a2, aend)]
                     )
                     # If either file ends without a newline and the last line of
                     # that file is part of a hunk, a marker is printed. If the
                     # last line of both files is identical and neither ends in
                     # a newline, print only one marker. That's the only case in
                     # which the hunk can end in a shared line without a newline.
                     skip = False
                     if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
                         for i in range(len(hunklines) - 1, -1, -1):
                             if hunklines[i].startswith((b'-', b' ')):
                                 if hunklines[i].startswith(b' '):
                                     skip = True
                                 hunklines[i] += b'\n'
                                 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
                                 break
                     if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
                         for i in range(len(hunklines) - 1, -1, -1):
                             if hunklines[i].startswith(b'+'):
                                 hunklines[i] += b'\n'
                                 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
                                 break
                     yield hunkrange, hunklines
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 hunk = None
                 ignoredlines = 0
                 has_hunks = False
                 for s, stype in allblocks(t1, t2, opts, l1, l2):
                     a1, a2, b1, b2 = s
                     if stype != b'!':
                         if stype == b'~':
                             # The diff context lines are based on t1 content. When
                             # blank lines are ignored, the new lines offsets must
                             # be adjusted as if equivalent blocks ('~') had the
                             # same sizes on both sides.
                             ignoredlines += (b2 - b1) - (a2 - a1)
                         continue
                     delta = []
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     b1 -= ignoredlines
                     b2 -= ignoredlines
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             if not has_hunks:
                                 has_hunks = True
                                 yield True
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [astart, a2, bstart, b2, delta]
                     delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
                     delta[len(delta) :] = [b'-' + x for x in old]
                     delta[len(delta) :] = [b'+' + x for x in new]
                 if hunk:
                     if not has_hunks:
                         has_hunks = True
                         yield True
                     for x in yieldhunk(hunk):
                         yield x
                 elif not has_hunks:
                     yield False
             def b85diff(to, tn):
                 '''print base85-encoded binary diff'''
                 def fmtline(line):
                     l = len(line)
                     if l <= 26:
                         l = pycompat.bytechr(ord(b'A') + l - 1)
                     else:
                         l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
                     return b'%c%s\n' % (l, util.b85encode(line, True))
                 def chunk(text, csize=52):
                     l = len(text)
                     i = 0
                     while i < l:
                         yield text[i : i + csize]
                         i += csize
                 if to is None:
                     to = b''
                 if tn is None:
                     tn = b''
                 if to == tn:
                     return b''
                 # TODO: deltas
                 ret = []
                 ret.append(b'GIT binary patch\n')
                 ret.append(b'literal %d\n' % len(tn))
                 for l in chunk(zlib.compress(tn)):
                     ret.append(fmtline(l))
                 ret.append(b'\n')
                 return b''.join(ret)
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
                     pos += 12
                     t.append(bin[pos : pos + l])
                     pos += l
                 return b"".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return util.buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
             def get_matching_blocks(a, b):
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
             def trivialdiffheader(length):
                 return struct.pack(b">lll", 0, 0, length) if length else b''
             def replacediffheader(oldlen, newlen):
                 return struct.pack(b">lll", 0, oldlen, newlen)