upstream/mercurial-mirror Commit - r52829:c6899b33

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import annotations

8

from __future__ import annotations

9

10

import re

10

import re

11

import struct

11

import struct

12

import typing

12

import zlib

13

import zlib

13

14

15

from typing import (

16

Iterable,

17

Iterator,

18

List,

19

Optional,

20

Sequence,

21

Tuple,

22

Union,

23

cast,

24

)

25

14

from .i18n import _

26

from .i18n import _

15

from . import (

27

from . import (

16

diffhelper,

28

diffhelper,

17

encoding,

29

encoding,

18

error,

30

error,

19

policy,

31

policy,

20

pycompat,

32

pycompat,

21

util,

33

util,

22

)

34

)

23

from .interfaces import (

35

from .interfaces import (

24

modules as intmod,

36

modules as intmod,

25

)

37

)

26

38

27

from .utils import dateutil

39

from .utils import dateutil

28

40

29

bdiff: intmod.BDiff = policy.importmod('bdiff')

41

bdiff: intmod.BDiff = policy.importmod('bdiff')

30

mpatch = policy.importmod('mpatch')

42

mpatch = policy.importmod('mpatch')

31

43

32

blocks = bdiff.blocks

44

blocks = bdiff.blocks

33

fixws = bdiff.fixws

45

fixws = bdiff.fixws

34

patches = mpatch.patches

46

patches = mpatch.patches

35

patchedsize = mpatch.patchedsize

47

patchedsize = mpatch.patchedsize

36

textdiff = bdiff.bdiff

48

textdiff = bdiff.bdiff

37

splitnewlines = bdiff.splitnewlines

49

splitnewlines = bdiff.splitnewlines

38

50

51

if typing.TYPE_CHECKING:

52

HunkLines = List[bytes]

53

"""Lines of a hunk- a header, followed by line additions and deletions."""

54

55

HunkRange = Tuple[int, int, int, int]

56

"""HunkRange represents the range information of a hunk.

57

58

The tuple (s1, l1, s2, l2) forms the header '@@ -s1,l1 +s2,l2 @@'."""

59

60

Range = Tuple[int, int]

61

"""A (lowerbound, upperbound) range tuple."""

62

63

TypedBlock = Tuple[intmod.BDiffBlock, bytes]

64

"""A bdiff block with its type."""

65

39

66

40

# TODO: this looks like it could be an attrs, which might help pytype

67

# TODO: this looks like it could be an attrs, which might help pytype

41

class diffopts:

68

class diffopts:

42

"""context is the number of context lines

69

"""context is the number of context lines

43

text treats all files as text

70

text treats all files as text

44

showfunc enables diff -p output

71

showfunc enables diff -p output

45

git enables the git extended patch format

72

git enables the git extended patch format

46

nodates removes dates from diff headers

73

nodates removes dates from diff headers

47

nobinary ignores binary files

74

nobinary ignores binary files

48

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

75

noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)

49

ignorews ignores all whitespace changes in the diff

76

ignorews ignores all whitespace changes in the diff

50

ignorewsamount ignores changes in the amount of whitespace

77

ignorewsamount ignores changes in the amount of whitespace

51

ignoreblanklines ignores changes whose lines are all blank

78

ignoreblanklines ignores changes whose lines are all blank

52

upgrade generates git diffs to avoid data loss

79

upgrade generates git diffs to avoid data loss

53

"""

80

"""

54

81

55

_HAS_DYNAMIC_ATTRIBUTES = True

82

_HAS_DYNAMIC_ATTRIBUTES = True

56

83

57

defaults = {

84

defaults = {

58

b'context': 3,

85

b'context': 3,

59

b'text': False,

86

b'text': False,

60

b'showfunc': False,

87

b'showfunc': False,

61

b'git': False,

88

b'git': False,

62

b'nodates': False,

89

b'nodates': False,

63

b'nobinary': False,

90

b'nobinary': False,

64

b'noprefix': False,

91

b'noprefix': False,

65

b'index': 0,

92

b'index': 0,

66

b'ignorews': False,

93

b'ignorews': False,

67

b'ignorewsamount': False,

94

b'ignorewsamount': False,

68

b'ignorewseol': False,

95

b'ignorewseol': False,

69

b'ignoreblanklines': False,

96

b'ignoreblanklines': False,

70

b'upgrade': False,

97

b'upgrade': False,

71

b'showsimilarity': False,

98

b'showsimilarity': False,

72

b'worddiff': False,

99

b'worddiff': False,

73

b'xdiff': False,

100

b'xdiff': False,

74

}

101

}

75

102

76

def __init__(self, **opts):

103

def __init__(self, **opts):

77

opts = pycompat.byteskwargs(opts)

104

opts = pycompat.byteskwargs(opts)

78

for k in self.defaults.keys():

105

for k in self.defaults.keys():

79

v = opts.get(k)

106

v = opts.get(k)

80

if v is None:

107

if v is None:

81

v = self.defaults[k]

108

v = self.defaults[k]

82

setattr(self, pycompat.sysstr(k), v)

109

setattr(self, pycompat.sysstr(k), v)

83

110

84

try:

111

try:

85

self.context = int(self.context)

112

self.context = int(self.context)

86

except ValueError:

113

except ValueError:

87

raise error.InputError(

114

raise error.InputError(

88

_(b'diff context lines count must be an integer, not %r')

115

_(b'diff context lines count must be an integer, not %r')

89

% pycompat.bytestr(self.context)

116

% pycompat.bytestr(self.context)

90

)

117

)

91

118

92

def copy(self, **kwargs):

119

def copy(self, **kwargs):

93

opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults}

120

opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults}

94

opts = pycompat.strkwargs(opts)

121

opts = pycompat.strkwargs(opts)

95

opts.update(kwargs)

122

opts.update(kwargs)

96

return diffopts(**opts)

123

return diffopts(**opts)

97

124

98

def __bytes__(self):

125

def __bytes__(self):

99

return b", ".join(

126

return b", ".join(

100

b"%s: %r" % (k, getattr(self, pycompat.sysstr(k)))

127

b"%s: %r" % (k, getattr(self, pycompat.sysstr(k)))

101

for k in self.defaults

128

for k in self.defaults

102

)

129

)

103

130

104

__str__ = encoding.strmethod(__bytes__)

131

__str__ = encoding.strmethod(__bytes__)

105

132

106

133

107

defaultopts = diffopts()

134

defaultopts = diffopts()

108

135

109

136

110

def wsclean(opts, text, blank=True):

137

def wsclean(opts: diffopts, text: bytes, blank: bool = True) -> bytes:

111

if opts.ignorews:

138

if opts.ignorews:

112

text = bdiff.fixws(text, True)

139

text = bdiff.fixws(text, True)

113

elif opts.ignorewsamount:

140

elif opts.ignorewsamount:

114

text = bdiff.fixws(text, False)

141

text = bdiff.fixws(text, False)

115

if blank and opts.ignoreblanklines:

142

if blank and opts.ignoreblanklines:

116

text = re.sub(b'\n+', b'\n', text).strip(b'\n')

143

text = re.sub(b'\n+', b'\n', text).strip(b'\n')

117

if opts.ignorewseol:

144

if opts.ignorewseol:

118

text = re.sub(br'[ \t\r\f]+\n', br'\n', text)

145

text = re.sub(br'[ \t\r\f]+\n', br'\n', text)

119

return text

146

return text

120

147

121

148

122

def splitblock(base1, lines1, base2, lines2, opts):

149

def splitblock(

150

base1: int,

151

lines1: Iterable[bytes],

152

base2: int,

153

lines2: Iterable[bytes],

154

opts: diffopts,

155

) -> Iterable[TypedBlock]:

123

# The input lines matches except for interwoven blank lines. We

156

# The input lines matches except for interwoven blank lines. We

124

# transform it into a sequence of matching blocks and blank blocks.

157

# transform it into a sequence of matching blocks and blank blocks.

125

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

158

lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]

126

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

159

lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]

127

s1, e1 = 0, len(lines1)

160

s1, e1 = 0, len(lines1)

128

s2, e2 = 0, len(lines2)

161

s2, e2 = 0, len(lines2)

129

while s1 < e1 or s2 < e2:

162

while s1 < e1 or s2 < e2:

130

i1, i2, btype = s1, s2, b'='

163

i1, i2, btype = s1, s2, b'='

131

if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:

164

if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:

132

# Consume the block of blank lines

165

# Consume the block of blank lines

133

btype = b'~'

166

btype = b'~'

134

while i1 < e1 and lines1[i1] == 0:

167

while i1 < e1 and lines1[i1] == 0:

135

i1 += 1

168

i1 += 1

136

while i2 < e2 and lines2[i2] == 0:

169

while i2 < e2 and lines2[i2] == 0:

137

i2 += 1

170

i2 += 1

138

else:

171

else:

139

# Consume the matching lines

172

# Consume the matching lines

140

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

173

while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:

141

i1 += 1

174

i1 += 1

142

i2 += 1

175

i2 += 1

143

yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype

176

yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype

144

s1 = i1

177

s1 = i1

145

s2 = i2

178

s2 = i2

146

179

147

180

148

def hunkinrange(hunk, linerange):

181

def hunkinrange(hunk: Tuple[int, int], linerange: Range) -> bool:

149

"""Return True if `hunk` defined as (start, length) is in `linerange`

182

"""Return True if `hunk` defined as (start, length) is in `linerange`

150

defined as (lowerbound, upperbound).

183

defined as (lowerbound, upperbound).

151

184

152

>>> hunkinrange((5, 10), (2, 7))

185

>>> hunkinrange((5, 10), (2, 7))

153

True

186

True

154

>>> hunkinrange((5, 10), (6, 12))

187

>>> hunkinrange((5, 10), (6, 12))

155

True

188

True

156

>>> hunkinrange((5, 10), (13, 17))

189

>>> hunkinrange((5, 10), (13, 17))

157

True

190

True

158

>>> hunkinrange((5, 10), (3, 17))

191

>>> hunkinrange((5, 10), (3, 17))

159

True

192

True

160

>>> hunkinrange((5, 10), (1, 3))

193

>>> hunkinrange((5, 10), (1, 3))

161

False

194

False

162

>>> hunkinrange((5, 10), (18, 20))

195

>>> hunkinrange((5, 10), (18, 20))

163

False

196

False

164

>>> hunkinrange((5, 10), (1, 5))

197

>>> hunkinrange((5, 10), (1, 5))

165

False

198

False

166

>>> hunkinrange((5, 10), (15, 27))

199

>>> hunkinrange((5, 10), (15, 27))

167

False

200

False

168

"""

201

"""

169

start, length = hunk

202

start, length = hunk

170

lowerbound, upperbound = linerange

203

lowerbound, upperbound = linerange

171

return lowerbound < start + length and start < upperbound

204

return lowerbound < start + length and start < upperbound

172

205

173

206

174

def blocksinrange(~~blocks~~, ~~rangeb~~):

207

def blocksinrange(

208

blocks: Iterable[TypedBlock], rangeb: Range

209

) -> Tuple[List[TypedBlock], Range]:

175

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

210

"""filter `blocks` like (a1, a2, b1, b2) from items outside line range

176

`rangeb` from ``(b1, b2)`` point of view.

211

`rangeb` from ``(b1, b2)`` point of view.

177

212

178

Return `filteredblocks, rangea` where:

213

Return `filteredblocks, rangea` where:

179

214

180

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

215

* `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of

181

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

216

`blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a

182

block ``(b1, b2)`` being inside `rangeb` if

217

block ``(b1, b2)`` being inside `rangeb` if

183

``rangeb[0] < b2 and b1 < rangeb[1]``;

218

``rangeb[0] < b2 and b1 < rangeb[1]``;

184

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

219

* `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.

185

"""

220

"""

186

lbb, ubb = rangeb

221

lbb, ubb = rangeb

187

lba, uba = None, None

222

lba, uba = None, None

188

filteredblocks = []

223

filteredblocks = []

189

for block in blocks:

224

for block in blocks:

190

(a1, a2, b1, b2), stype = block

225

(a1, a2, b1, b2), stype = block

191

if lbb >= b1 and ubb <= b2 and stype == b'=':

226

if lbb >= b1 and ubb <= b2 and stype == b'=':

192

# rangeb is within a single "=" hunk, restrict back linerange1

227

# rangeb is within a single "=" hunk, restrict back linerange1

193

# by offsetting rangeb

228

# by offsetting rangeb

194

lba = lbb - b1 + a1

229

lba = lbb - b1 + a1

195

uba = ubb - b1 + a1

230

uba = ubb - b1 + a1

196

else:

231

else:

197

if b1 <= lbb < b2:

232

if b1 <= lbb < b2:

198

if stype == b'=':

233

if stype == b'=':

199

lba = a2 - (b2 - lbb)

234

lba = a2 - (b2 - lbb)

200

else:

235

else:

201

lba = a1

236

lba = a1

202

if b1 < ubb <= b2:

237

if b1 < ubb <= b2:

203

if stype == b'=':

238

if stype == b'=':

204

uba = a1 + (ubb - b1)

239

uba = a1 + (ubb - b1)

205

else:

240

else:

206

uba = a2

241

uba = a2

207

if hunkinrange((b1, (b2 - b1)), rangeb):

242

if hunkinrange((b1, (b2 - b1)), rangeb):

208

filteredblocks.append(block)

243

filteredblocks.append(block)

209

if lba is None or uba is None or uba < lba:

244

if lba is None or uba is None or uba < lba:

210

raise error.InputError(_(b'line range exceeds file size'))

245

raise error.InputError(_(b'line range exceeds file size'))

211

return filteredblocks, (lba, uba)

246

return filteredblocks, (lba, uba)

212

247

213

248

214

def chooseblocksfunc(opts=None):

249

def chooseblocksfunc(opts: Optional[diffopts] = None) -> intmod.BDiffBlocksFnc:

215

if (

250

if (

216

opts is None

251

opts is None

217

or not opts.xdiff

252

or not opts.xdiff

218

or not getattr(bdiff, 'xdiffblocks', None)

253

or not getattr(bdiff, 'xdiffblocks', None)

219

):

254

):

220

return bdiff.blocks

255

return bdiff.blocks

221

else:

256

else:

222

return bdiff.xdiffblocks

257

return bdiff.xdiffblocks

223

258

224

259

225

def allblocks(text1, text2, opts=None, lines1=None, lines2=None):

260

def allblocks(

261

text1: bytes,

262

text2: bytes,

263

opts: Optional[diffopts] = None,

264

lines1: Optional[Sequence[bytes]] = None,

265

lines2: Optional[Sequence[bytes]] = None,

266

) -> Iterable[TypedBlock]:

226

"""Return (block, type) tuples, where block is an mdiff.blocks

267

"""Return (block, type) tuples, where block is an mdiff.blocks

227

line entry. type is '=' for blocks matching exactly one another

268

line entry. type is '=' for blocks matching exactly one another

228

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

269

(bdiff blocks), '!' for non-matching blocks and '~' for blocks

229

matching only after having filtered blank lines.

270

matching only after having filtered blank lines.

230

line1 and line2 are text1 and text2 split with splitnewlines() if

271

line1 and line2 are text1 and text2 split with splitnewlines() if

231

they are already available.

272

they are already available.

232

"""

273

"""

233

if opts is None:

274

if opts is None:

234

opts = defaultopts

275

opts = defaultopts

235

if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:

276

if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:

236

text1 = wsclean(opts, text1, False)

277

text1 = wsclean(opts, text1, False)

237

text2 = wsclean(opts, text2, False)

278

text2 = wsclean(opts, text2, False)

238

diff = chooseblocksfunc(opts)(text1, text2)

279

diff = chooseblocksfunc(opts)(text1, text2)

239

for i, s1 in enumerate(diff):

280

for i, s1 in enumerate(diff):

240

# The first match is special.

281

# The first match is special.

241

# we've either found a match starting at line 0 or a match later

282

# we've either found a match starting at line 0 or a match later

242

# in the file. If it starts later, old and new below will both be

283

# in the file. If it starts later, old and new below will both be

243

# empty and we'll continue to the next match.

284

# empty and we'll continue to the next match.

244

if i > 0:

285

if i > 0:

245

s = diff[i - 1]

286

s = diff[i - 1]

246

else:

287

else:

247

s = (0, 0, 0, 0)

288

s = (0, 0, 0, 0)

248

s = (s[1], s1[0], s[3], s1[2])

289

s = (s[1], s1[0], s[3], s1[2])

249

290

250

# bdiff sometimes gives huge matches past eof, this check eats them,

291

# bdiff sometimes gives huge matches past eof, this check eats them,

251

# and deals with the special first match case described above

292

# and deals with the special first match case described above

252

if s[0] != s[1] or s[2] != s[3]:

293

if s[0] != s[1] or s[2] != s[3]:

253

type = b'!'

294

type = b'!'

254

if opts.ignoreblanklines:

295

if opts.ignoreblanklines:

255

if lines1 is None:

296

if lines1 is None:

256

lines1 = splitnewlines(text1)

297

lines1 = splitnewlines(text1)

257

if lines2 is None:

298

if lines2 is None:

258

lines2 = splitnewlines(text2)

299

lines2 = splitnewlines(text2)

259

old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))

300

old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))

260

new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))

301

new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))

261

if old == new:

302

if old == new:

262

type = b'~'

303

type = b'~'

263

yield s, type

304

yield s, type

264

yield s1, b'='

305

yield s1, b'='

265

306

266

307

267

def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):

308

def unidiff(

309

a: bytes,

310

ad: bytes,

311

b: bytes,

312

bd: bytes,

313

fn1: bytes,

314

fn2: bytes,

315

binary: bool,

316

opts: diffopts = defaultopts,

317

) -> Tuple[List[bytes], Iterable[Tuple[Optional[HunkRange], HunkLines]]]:

268

"""Return a unified diff as a (headers, hunks) tuple.

318

"""Return a unified diff as a (headers, hunks) tuple.

269

319

270

If the diff is not null, `headers` is a list with unified diff header

320

If the diff is not null, `headers` is a list with unified diff header

271

lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding

321

lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding

272

(hunkrange, hunklines) coming from _unidiff().

322

(hunkrange, hunklines) coming from _unidiff().

273

Otherwise, `headers` and `hunks` are empty.

323

Otherwise, `headers` and `hunks` are empty.

274

324

275

Set binary=True if either a or b should be taken as a binary file.

325

Set binary=True if either a or b should be taken as a binary file.

276

"""

326

"""

277

327

278

def datetag(date, fn=None):

328

def datetag(date: bytes, fn: Optional[bytes] = None):

279

if not opts.git and not opts.nodates:

329

if not opts.git and not opts.nodates:

280

return b'\t%s' % date

330

return b'\t%s' % date

281

if fn and b' ' in fn:

331

if fn and b' ' in fn:

282

return b'\t'

332

return b'\t'

283

return b''

333

return b''

284

334

285

sentinel = [], ()

335

sentinel = [], ()

286

if not a and not b:

336

if not a and not b:

287

return sentinel

337

return sentinel

288

338

289

if opts.noprefix:

339

if opts.noprefix:

290

aprefix = bprefix = b''

340

aprefix = bprefix = b''

291

else:

341

else:

292

aprefix = b'a/'

342

aprefix = b'a/'

293

bprefix = b'b/'

343

bprefix = b'b/'

294

344

295

epoch = dateutil.datestr((0, 0))

345

epoch = dateutil.datestr((0, 0))

296

346

297

fn1 = util.pconvert(fn1)

347

fn1 = util.pconvert(fn1)

298

fn2 = util.pconvert(fn2)

348

fn2 = util.pconvert(fn2)

299

349

300

if binary:

350

if binary:

301

if a and b and len(a) == len(b) and a == b:

351

if a and b and len(a) == len(b) and a == b:

302

return sentinel

352

return sentinel

303

headerlines = []

353

headerlines = []

304

hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)

354

hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)

305

elif not a:

355

elif not a:

306

without_newline = not b.endswith(b'\n')

356

without_newline = not b.endswith(b'\n')

307

b = splitnewlines(b)

357

b = splitnewlines(b)

308

if a is None:

358

if a is None:

309

l1 = b'--- /dev/null%s' % datetag(epoch)

359

l1 = b'--- /dev/null%s' % datetag(epoch)

310

else:

360

else:

311

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

361

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

312

l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

362

l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))

313

headerlines = [l1, l2]

363

headerlines = [l1, l2]

314

size = len(b)

364

size = len(b)

315

hunkrange = (0, 0, 1, size)

365

hunkrange = (0, 0, 1, size)

316

hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]

366

hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]

317

if without_newline:

367

if without_newline:

318

hunklines[-1] += b'\n'

368

hunklines[-1] += b'\n'

319

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

369

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

320

hunks = ((hunkrange, hunklines),)

370

hunks = ((hunkrange, hunklines),)

321

elif not b:

371

elif not b:

322

without_newline = not a.endswith(b'\n')

372

without_newline = not a.endswith(b'\n')

323

a = splitnewlines(a)

373

a = splitnewlines(a)

324

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

374

l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))

325

if b is None:

375

if b is None:

326

l2 = b'+++ /dev/null%s' % datetag(epoch)

376

l2 = b'+++ /dev/null%s' % datetag(epoch)

327

else:

377

else:

328

l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

378

l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))

329

headerlines = [l1, l2]

379

headerlines = [l1, l2]

330

size = len(a)

380

size = len(a)

331

hunkrange = (1, size, 0, 0)

381

hunkrange = (1, size, 0, 0)

332

hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]

382

hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]

333

if without_newline:

383

if without_newline:

334

hunklines[-1] += b'\n'

384

hunklines[-1] += b'\n'

335

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

385

hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)

336

hunks = ((hunkrange, hunklines),)

386

hunks = ((hunkrange, hunklines),)

337

else:

387

else:

338

hunks = _unidiff(a, b, opts=opts)

388

hunks = _unidiff(a, b, opts=opts)

339

if not next(hunks):

389

if not next(hunks):

340

return sentinel

390

return sentinel

341

391

342

headerlines = [

392

headerlines = [

343

b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),

393

b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),

344

b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),

394

b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),

345

]

395

]

346

396

347

return headerlines, hunks

397

# The possible bool is consumed from the iterator above in the `next()`

398

# call.

399

return headerlines, cast(

400

"Iterable[Tuple[Optional[HunkRange], HunkLines]]", hunks

401

)

348

402

349

403

350

def _unidiff(t1, t2, opts=defaultopts):

404

def _unidiff(

405

t1: bytes, t2: bytes, opts: diffopts = defaultopts

406

) -> Iterator[Union[bool, Tuple[HunkRange, HunkLines]]]:

351

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

407

"""Yield hunks of a headerless unified diff from t1 and t2 texts.

352

408

353

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

409

Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a

354

tuple (s1, l1, s2, l2) representing the range information of the hunk to

410

tuple (s1, l1, s2, l2) representing the range information of the hunk to

355

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

411

form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines

356

of the hunk combining said header followed by line additions and

412

of the hunk combining said header followed by line additions and

357

deletions.

413

deletions.

358

414

359

The hunks are prefixed with a bool.

415

The hunks are prefixed with a bool.

360

"""

416

"""

361

l1 = splitnewlines(t1)

417

l1 = splitnewlines(t1)

362

l2 = splitnewlines(t2)

418

l2 = splitnewlines(t2)

363

419

364

def contextend(l, len):

420

def contextend(l, len):

365

ret = l + opts.context

421

ret = l + opts.context

366

if ret > len:

422

if ret > len:

367

ret = len

423

ret = len

368

return ret

424

return ret

369

425

370

def contextstart(l):

426

def contextstart(l):

371

ret = l - opts.context

427

ret = l - opts.context

372

if ret < 0:

428

if ret < 0:

373

return 0

429

return 0

374

return ret

430

return ret

375

431

376

lastfunc = [0, b'']

432

lastfunc = [0, b'']

377

433

378

def yieldhunk(~~hunk~~):

434

def yieldhunk(

435

hunk: Tuple[int, int, int, int, List[bytes]]

436

) -> Iterable[Tuple[HunkRange, HunkLines]]:

379

(astart, a2, bstart, b2, delta) = hunk

437

(astart, a2, bstart, b2, delta) = hunk

380

aend = contextend(a2, len(l1))

438

aend = contextend(a2, len(l1))

381

alen = aend - astart

439

alen = aend - astart

382

blen = b2 - bstart + aend - a2

440

blen = b2 - bstart + aend - a2

383

441

384

func = b""

442

func = b""

385

if opts.showfunc:

443

if opts.showfunc:

386

lastpos, func = lastfunc

444

lastpos, func = lastfunc

387

# walk backwards from the start of the context up to the start of

445

# walk backwards from the start of the context up to the start of

388

# the previous hunk context until we find a line starting with an

446

# the previous hunk context until we find a line starting with an

389

# alphanumeric char.

447

# alphanumeric char.

390

for i in range(astart - 1, lastpos - 1, -1):

448

for i in range(astart - 1, lastpos - 1, -1):

391

if l1[i][0:1].isalnum():

449

if l1[i][0:1].isalnum():

392

func = b' ' + l1[i].rstrip()

450

func = b' ' + l1[i].rstrip()

393

# split long function name if ASCII. otherwise we have no

451

# split long function name if ASCII. otherwise we have no

394

# idea where the multi-byte boundary is, so just leave it.

452

# idea where the multi-byte boundary is, so just leave it.

395

if encoding.isasciistr(func):

453

if encoding.isasciistr(func):

396

func = func[:41]

454

func = func[:41]

397

lastfunc[1] = func

455

lastfunc[1] = func

398

break

456

break

399

# by recording this hunk's starting point as the next place to

457

# by recording this hunk's starting point as the next place to

400

# start looking for function lines, we avoid reading any line in

458

# start looking for function lines, we avoid reading any line in

401

# the file more than once.

459

# the file more than once.

402

lastfunc[0] = astart

460

lastfunc[0] = astart

403

461

404

# zero-length hunk ranges report their start line as one less

462

# zero-length hunk ranges report their start line as one less

405

if alen:

463

if alen:

406

astart += 1

464

astart += 1

407

if blen:

465

if blen:

408

bstart += 1

466

bstart += 1

409

467

410

hunkrange = astart, alen, bstart, blen

468

hunkrange = astart, alen, bstart, blen

411

hunklines = (

469

hunklines = (

412

[b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

470

[b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]

413

+ delta

471

+ delta

414

+ [b' ' + l1[x] for x in range(a2, aend)]

472

+ [b' ' + l1[x] for x in range(a2, aend)]

415

)

473

)

416

# If either file ends without a newline and the last line of

474

# If either file ends without a newline and the last line of

417

# that file is part of a hunk, a marker is printed. If the

475

# that file is part of a hunk, a marker is printed. If the

418

# last line of both files is identical and neither ends in

476

# last line of both files is identical and neither ends in

419

# a newline, print only one marker. That's the only case in

477

# a newline, print only one marker. That's the only case in

420

# which the hunk can end in a shared line without a newline.

478

# which the hunk can end in a shared line without a newline.

421

skip = False

479

skip = False

422

if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:

480

if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:

423

for i in range(len(hunklines) - 1, -1, -1):

481

for i in range(len(hunklines) - 1, -1, -1):

424

if hunklines[i].startswith((b'-', b' ')):

482

if hunklines[i].startswith((b'-', b' ')):

425

if hunklines[i].startswith(b' '):

483

if hunklines[i].startswith(b' '):

426

skip = True

484

skip = True

427

hunklines[i] += b'\n'

485

hunklines[i] += b'\n'

428

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

486

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

429

break

487

break

430

if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:

488

if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:

431

for i in range(len(hunklines) - 1, -1, -1):

489

for i in range(len(hunklines) - 1, -1, -1):

432

if hunklines[i].startswith(b'+'):

490

if hunklines[i].startswith(b'+'):

433

hunklines[i] += b'\n'

491

hunklines[i] += b'\n'

434

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

492

hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)

435

break

493

break

436

yield hunkrange, hunklines

494

yield hunkrange, hunklines

437

495

438

# bdiff.blocks gives us the matching sequences in the files. The loop

496

# bdiff.blocks gives us the matching sequences in the files. The loop

439

# below finds the spaces between those matching sequences and translates

497

# below finds the spaces between those matching sequences and translates

440

# them into diff output.

498

# them into diff output.

441

#

499

#

442

hunk = None

500

hunk = None

443

ignoredlines = 0

501

ignoredlines = 0

444

has_hunks = False

502

has_hunks = False

445

for s, stype in allblocks(t1, t2, opts, l1, l2):

503

for s, stype in allblocks(t1, t2, opts, l1, l2):

446

a1, a2, b1, b2 = s

504

a1, a2, b1, b2 = s

447

if stype != b'!':

505

if stype != b'!':

448

if stype == b'~':

506

if stype == b'~':

449

# The diff context lines are based on t1 content. When

507

# The diff context lines are based on t1 content. When

450

# blank lines are ignored, the new lines offsets must

508

# blank lines are ignored, the new lines offsets must

451

# be adjusted as if equivalent blocks ('~') had the

509

# be adjusted as if equivalent blocks ('~') had the

452

# same sizes on both sides.

510

# same sizes on both sides.

453

ignoredlines += (b2 - b1) - (a2 - a1)

511

ignoredlines += (b2 - b1) - (a2 - a1)

454

continue

512

continue

455

delta = []

513

delta = []

456

old = l1[a1:a2]

514

old = l1[a1:a2]

457

new = l2[b1:b2]

515

new = l2[b1:b2]

458

516

459

b1 -= ignoredlines

517

b1 -= ignoredlines

460

b2 -= ignoredlines

518

b2 -= ignoredlines

461

astart = contextstart(a1)

519

astart = contextstart(a1)

462

bstart = contextstart(b1)

520

bstart = contextstart(b1)

463

prev = None

521

prev = None

464

if hunk:

522

if hunk:

465

# join with the previous hunk if it falls inside the context

523

# join with the previous hunk if it falls inside the context

466

if astart < hunk[1] + opts.context + 1:

524

if astart < hunk[1] + opts.context + 1:

467

prev = hunk

525

prev = hunk

468

astart = hunk[1]

526

astart = hunk[1]

469

bstart = hunk[3]

527

bstart = hunk[3]

470

else:

528

else:

471

if not has_hunks:

529

if not has_hunks:

472

has_hunks = True

530

has_hunks = True

473

yield True

531

yield True

474

for x in yieldhunk(hunk):

532

for x in yieldhunk(hunk):

475

yield x

533

yield x

476

if prev:

534

if prev:

477

# we've joined the previous hunk, record the new ending points.

535

# we've joined the previous hunk, record the new ending points.

478

hunk = (hunk[0], a2, hunk[2], b2, hunk[4])

536

hunk = (hunk[0], a2, hunk[2], b2, hunk[4])

479

delta = hunk[4]

537

delta = hunk[4]

480

else:

538

else:

481

# create a new hunk

539

# create a new hunk

482

hunk = (astart, a2, bstart, b2, delta)

540

hunk = (astart, a2, bstart, b2, delta)

483

541

484

delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]

542

delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]

485

delta[len(delta) :] = [b'-' + x for x in old]

543

delta[len(delta) :] = [b'-' + x for x in old]

486

delta[len(delta) :] = [b'+' + x for x in new]

544

delta[len(delta) :] = [b'+' + x for x in new]

487

545

488

if hunk:

546

if hunk:

489

if not has_hunks:

547

if not has_hunks:

490

has_hunks = True

548

has_hunks = True

491

yield True

549

yield True

492

for x in yieldhunk(hunk):

550

for x in yieldhunk(hunk):

493

yield x

551

yield x

494

elif not has_hunks:

552

elif not has_hunks:

495

yield False

553

yield False

496

554

497

555

498

def b85diff(to, tn):

556

def b85diff(to: Optional[bytes], tn: Optional[bytes]) -> bytes:

499

'''print base85-encoded binary diff'''

557

'''print base85-encoded binary diff'''

500

558

501

def fmtline(line):

559

def fmtline(line):

502

l = len(line)

560

l = len(line)

503

if l <= 26:

561

if l <= 26:

504

l = pycompat.bytechr(ord(b'A') + l - 1)

562

l = pycompat.bytechr(ord(b'A') + l - 1)

505

else:

563

else:

506

l = pycompat.bytechr(l - 26 + ord(b'a') - 1)

564

l = pycompat.bytechr(l - 26 + ord(b'a') - 1)

507

return b'%c%s\n' % (l, util.b85encode(line, True))

565

return b'%c%s\n' % (l, util.b85encode(line, True))

508

566

509

def chunk(text, csize=52):

567

def chunk(text, csize=52):

510

l = len(text)

568

l = len(text)

511

i = 0

569

i = 0

512

while i < l:

570

while i < l:

513

yield text[i : i + csize]

571

yield text[i : i + csize]

514

i += csize

572

i += csize

515

573

516

if to is None:

574

if to is None:

517

to = b''

575

to = b''

518

if tn is None:

576

if tn is None:

519

tn = b''

577

tn = b''

520

578

521

if to == tn:

579

if to == tn:

522

return b''

580

return b''

523

581

524

# TODO: deltas

582

# TODO: deltas

525

ret = []

583

ret = []

526

ret.append(b'GIT binary patch\n')

584

ret.append(b'GIT binary patch\n')

527

ret.append(b'literal %d\n' % len(tn))

585

ret.append(b'literal %d\n' % len(tn))

528

for l in chunk(zlib.compress(tn)):

586

for l in chunk(zlib.compress(tn)):

529

ret.append(fmtline(l))

587

ret.append(fmtline(l))

530

ret.append(b'\n')

588

ret.append(b'\n')

531

589

532

return b''.join(ret)

590

return b''.join(ret)

533

591

534

592

535

def patchtext(bin):

593

def patchtext(bin: bytes) -> bytes:

536

pos = 0

594

pos = 0

537

t = []

595

t = []

538

while pos < len(bin):

596

while pos < len(bin):

539

p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])

597

p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])

540

pos += 12

598

pos += 12

541

t.append(bin[pos : pos + l])

599

t.append(bin[pos : pos + l])

542

pos += l

600

pos += l

543

return b"".join(t)

601

return b"".join(t)

544

602

545

603

546

def patch(a, bin):

604

def patch(a, bin):

547

if len(a) == 0:

605

if len(a) == 0:

548

# skip over trivial delta header

606

# skip over trivial delta header

549

return util.buffer(bin, 12)

607

return util.buffer(bin, 12)

550

return mpatch.patches(a, [bin])

608

return mpatch.patches(a, [bin])

551

609

552

610

553

# similar to difflib.SequenceMatcher.get_matching_blocks

611

# similar to difflib.SequenceMatcher.get_matching_blocks

554

def get_matching_blocks(a, b):

612

def get_matching_blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int]]:

555

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

613

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

556

614

557

615

558

def trivialdiffheader(length):

616

def trivialdiffheader(length: int) -> bytes:

559

return struct.pack(b">lll", 0, 0, length) if length else b''

617

return struct.pack(b">lll", 0, 0, length) if length else b''

560

618

561

619

562

def replacediffheader(oldlen, newlen):

620

def replacediffheader(oldlen: int, newlen: int) -> bytes:

563

return struct.pack(b">lll", 0, oldlen, newlen)

621

return struct.pack(b">lll", 0, oldlen, newlen)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import annotations
             import re
             import struct
+            import typing
             import zlib
+            from typing import (
+                Iterable,
+                Iterator,
+                List,
+                Optional,
+                Sequence,
+                Tuple,
+                Union,
+                cast,
+            )
             from .i18n import _
             from . import (
                 diffhelper,
                 encoding,
                 error,
                 policy,
                 pycompat,
                 util,
             )
             from .interfaces import (
                 modules as intmod,
             )
             from .utils import dateutil
             bdiff: intmod.BDiff = policy.importmod('bdiff')
             mpatch = policy.importmod('mpatch')
             blocks = bdiff.blocks
             fixws = bdiff.fixws
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff
             splitnewlines = bdiff.splitnewlines
+            if typing.TYPE_CHECKING:
+                HunkLines = List[bytes]
+                """Lines of a hunk- a header, followed by line additions and deletions."""
+                HunkRange = Tuple[int, int, int, int]
+                """HunkRange represents the range information of a hunk.
+                The tuple (s1, l1, s2, l2) forms the header '@@ -s1,l1 +s2,l2 @@'."""
+                Range = Tuple[int, int]
+                """A (lowerbound, upperbound) range tuple."""
+                TypedBlock = Tuple[intmod.BDiffBlock, bytes]
+                """A bdiff block with its type."""
             # TODO: this looks like it could be an attrs, which might help pytype
             class diffopts:
                 """context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 nobinary ignores binary files
                 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 """
                 _HAS_DYNAMIC_ATTRIBUTES = True
                 defaults = {
                     b'context': 3,
                     b'text': False,
                     b'showfunc': False,
                     b'git': False,
                     b'nodates': False,
                     b'nobinary': False,
                     b'noprefix': False,
                     b'index': 0,
                     b'ignorews': False,
                     b'ignorewsamount': False,
                     b'ignorewseol': False,
                     b'ignoreblanklines': False,
                     b'upgrade': False,
                     b'showsimilarity': False,
                     b'worddiff': False,
                     b'xdiff': False,
                 }
                 def __init__(self, **opts):
                     opts = pycompat.byteskwargs(opts)
                     for k in self.defaults.keys():
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, pycompat.sysstr(k), v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise error.InputError(
                             _(b'diff context lines count must be an integer, not %r')
                             % pycompat.bytestr(self.context)
                         )
                 def copy(self, **kwargs):
                     opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults}
                     opts = pycompat.strkwargs(opts)
                     opts.update(kwargs)
                     return diffopts(**opts)
                 def __bytes__(self):
                     return b", ".join(
                         b"%s: %r" % (k, getattr(self, pycompat.sysstr(k)))
                         for k in self.defaults
                     )
                 __str__ = encoding.strmethod(__bytes__)
             defaultopts = diffopts()
-            def wsclean(opts, text, blank=True):
+            def wsclean(opts: diffopts, text: bytes, blank: bool = True) -> bytes:
                 if opts.ignorews:
                     text = bdiff.fixws(text, True)
                 elif opts.ignorewsamount:
                     text = bdiff.fixws(text, False)
                 if blank and opts.ignoreblanklines:
                     text = re.sub(b'\n+', b'\n', text).strip(b'\n')
                 if opts.ignorewseol:
                     text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
                 return text
-            def splitblock(base1, lines1, base2, lines2, opts):
+            def splitblock(
+                base1: int,
+                lines1: Iterable[bytes],
+                base2: int,
+                lines2: Iterable[bytes],
+                opts: diffopts,
+            ) -> Iterable[TypedBlock]:
                 # The input lines matches except for interwoven blank lines. We
                 # transform it into a sequence of matching blocks and blank blocks.
                 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
                 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
                 s1, e1 = 0, len(lines1)
                 s2, e2 = 0, len(lines2)
                 while s1 < e1 or s2 < e2:
                     i1, i2, btype = s1, s2, b'='
                     if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
                         # Consume the block of blank lines
                         btype = b'~'
                         while i1 < e1 and lines1[i1] == 0:
                             i1 += 1
                         while i2 < e2 and lines2[i2] == 0:
                             i2 += 1
                     else:
                         # Consume the matching lines
                         while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
                             i1 += 1
                             i2 += 1
                     yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype
                     s1 = i1
                     s2 = i2
-            def hunkinrange(hunk, linerange):
+            def hunkinrange(hunk: Tuple[int, int], linerange: Range) -> bool:
                 """Return True if `hunk` defined as (start, length) is in `linerange`
                 defined as (lowerbound, upperbound).
                 >>> hunkinrange((5, 10), (2, 7))
                 True
                 >>> hunkinrange((5, 10), (6, 12))
                 True
                 >>> hunkinrange((5, 10), (13, 17))
                 True
                 >>> hunkinrange((5, 10), (3, 17))
                 True
                 >>> hunkinrange((5, 10), (1, 3))
                 False
                 >>> hunkinrange((5, 10), (18, 20))
                 False
                 >>> hunkinrange((5, 10), (1, 5))
                 False
                 >>> hunkinrange((5, 10), (15, 27))
                 False
                 """
                 start, length = hunk
                 lowerbound, upperbound = linerange
                 return lowerbound < start + length and start < upperbound
-            def blocksinrange(blocks, rangeb):
+            def blocksinrange(
+                blocks: Iterable[TypedBlock], rangeb: Range
+            ) -> Tuple[List[TypedBlock], Range]:
                 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
                 `rangeb` from ``(b1, b2)`` point of view.
                 Return `filteredblocks, rangea` where:
                 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
                   `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
                   block ``(b1, b2)`` being inside `rangeb` if
                   ``rangeb[0] < b2 and b1 < rangeb[1]``;
                 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
                 """
                 lbb, ubb = rangeb
                 lba, uba = None, None
                 filteredblocks = []
                 for block in blocks:
                     (a1, a2, b1, b2), stype = block
                     if lbb >= b1 and ubb <= b2 and stype == b'=':
                         # rangeb is within a single "=" hunk, restrict back linerange1
                         # by offsetting rangeb
                         lba = lbb - b1 + a1
                         uba = ubb - b1 + a1
                     else:
                         if b1 <= lbb < b2:
                             if stype == b'=':
                                 lba = a2 - (b2 - lbb)
                             else:
                                 lba = a1
                         if b1 < ubb <= b2:
                             if stype == b'=':
                                 uba = a1 + (ubb - b1)
                             else:
                                 uba = a2
                     if hunkinrange((b1, (b2 - b1)), rangeb):
                         filteredblocks.append(block)
                 if lba is None or uba is None or uba < lba:
                     raise error.InputError(_(b'line range exceeds file size'))
                 return filteredblocks, (lba, uba)
-            def chooseblocksfunc(opts=None):
+            def chooseblocksfunc(opts: Optional[diffopts] = None) -> intmod.BDiffBlocksFnc:
                 if (
                     opts is None
                     or not opts.xdiff
                     or not getattr(bdiff, 'xdiffblocks', None)
                 ):
                     return bdiff.blocks
                 else:
                     return bdiff.xdiffblocks
-            def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
+            def allblocks(
+                text1: bytes,
+                text2: bytes,
+                opts: Optional[diffopts] = None,
+                lines1: Optional[Sequence[bytes]] = None,
+                lines2: Optional[Sequence[bytes]] = None,
+            ) -> Iterable[TypedBlock]:
                 """Return (block, type) tuples, where block is an mdiff.blocks
                 line entry. type is '=' for blocks matching exactly one another
                 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
                 matching only after having filtered blank lines.
                 line1 and line2 are text1 and text2 split with splitnewlines() if
                 they are already available.
                 """
                 if opts is None:
                     opts = defaultopts
                 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
                     text1 = wsclean(opts, text1, False)
                     text2 = wsclean(opts, text2, False)
                 diff = chooseblocksfunc(opts)(text1, text2)
                 for i, s1 in enumerate(diff):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i - 1]
                     else:
                         s = (0, 0, 0, 0)
                     s = (s[1], s1[0], s[3], s1[2])
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if s[0] != s[1] or s[2] != s[3]:
                         type = b'!'
                         if opts.ignoreblanklines:
                             if lines1 is None:
                                 lines1 = splitnewlines(text1)
                             if lines2 is None:
                                 lines2 = splitnewlines(text2)
                             old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
                             new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
                             if old == new:
                                 type = b'~'
                         yield s, type
                     yield s1, b'='
-            def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
+            def unidiff(
+                a: bytes,
+                ad: bytes,
+                b: bytes,
+                bd: bytes,
+                fn1: bytes,
+                fn2: bytes,
+                binary: bool,
+                opts: diffopts = defaultopts,
+            ) -> Tuple[List[bytes], Iterable[Tuple[Optional[HunkRange], HunkLines]]]:
                 """Return a unified diff as a (headers, hunks) tuple.
                 If the diff is not null, `headers` is a list with unified diff header
                 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
                 (hunkrange, hunklines) coming from _unidiff().
                 Otherwise, `headers` and `hunks` are empty.
                 Set binary=True if either a or b should be taken as a binary file.
                 """
-                def datetag(date, fn=None):
+                def datetag(date: bytes, fn: Optional[bytes] = None):
                     if not opts.git and not opts.nodates:
                         return b'\t%s' % date
                     if fn and b' ' in fn:
                         return b'\t'
                     return b''
                 sentinel = [], ()
                 if not a and not b:
                     return sentinel
                 if opts.noprefix:
                     aprefix = bprefix = b''
                 else:
                     aprefix = b'a/'
                     bprefix = b'b/'
                 epoch = dateutil.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
                 if binary:
                     if a and b and len(a) == len(b) and a == b:
                         return sentinel
                     headerlines = []
                     hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
                 elif not a:
                     without_newline = not b.endswith(b'\n')
                     b = splitnewlines(b)
                     if a is None:
                         l1 = b'--- /dev/null%s' % datetag(epoch)
                     else:
                         l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
                     headerlines = [l1, l2]
                     size = len(b)
                     hunkrange = (0, 0, 1, size)
                     hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
                     if without_newline:
                         hunklines[-1] += b'\n'
                         hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
                     hunks = ((hunkrange, hunklines),)
                 elif not b:
                     without_newline = not a.endswith(b'\n')
                     a = splitnewlines(a)
                     l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
                     if b is None:
                         l2 = b'+++ /dev/null%s' % datetag(epoch)
                     else:
                         l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
                     headerlines = [l1, l2]
                     size = len(a)
                     hunkrange = (1, size, 0, 0)
                     hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
                     if without_newline:
                         hunklines[-1] += b'\n'
                         hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
                     hunks = ((hunkrange, hunklines),)
                 else:
                     hunks = _unidiff(a, b, opts=opts)
                     if not next(hunks):
                         return sentinel
                     headerlines = [
                         b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
                         b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
                     ]
-                return headerlines, hunks
+                # The possible bool is consumed from the iterator above in the `next()`
+                # call.
+                return headerlines, cast(
+                    "Iterable[Tuple[Optional[HunkRange], HunkLines]]", hunks
+                )
-            def _unidiff(t1, t2, opts=defaultopts):
+            def _unidiff(
+                t1: bytes, t2: bytes, opts: diffopts = defaultopts
+            ) -> Iterator[Union[bool, Tuple[HunkRange, HunkLines]]]:
                 """Yield hunks of a headerless unified diff from t1 and t2 texts.
                 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
                 tuple (s1, l1, s2, l2) representing the range information of the hunk to
                 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
                 of the hunk combining said header followed by line additions and
                 deletions.
                 The hunks are prefixed with a bool.
                 """
                 l1 = splitnewlines(t1)
                 l2 = splitnewlines(t2)
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, b'']
-                def yieldhunk(hunk):
+                def yieldhunk(
+                    hunk: Tuple[int, int, int, int, List[bytes]]
+                ) -> Iterable[Tuple[HunkRange, HunkLines]]:
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = b""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in range(astart - 1, lastpos - 1, -1):
                             if l1[i][0:1].isalnum():
                                 func = b' ' + l1[i].rstrip()
                                 # split long function name if ASCII. otherwise we have no
                                 # idea where the multi-byte boundary is, so just leave it.
                                 if encoding.isasciistr(func):
                                     func = func[:41]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
                     hunkrange = astart, alen, bstart, blen
                     hunklines = (
                         [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
                         + delta
                         + [b' ' + l1[x] for x in range(a2, aend)]
                     )
                     # If either file ends without a newline and the last line of
                     # that file is part of a hunk, a marker is printed. If the
                     # last line of both files is identical and neither ends in
                     # a newline, print only one marker. That's the only case in
                     # which the hunk can end in a shared line without a newline.
                     skip = False
                     if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
                         for i in range(len(hunklines) - 1, -1, -1):
                             if hunklines[i].startswith((b'-', b' ')):
                                 if hunklines[i].startswith(b' '):
                                     skip = True
                                 hunklines[i] += b'\n'
                                 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
                                 break
                     if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
                         for i in range(len(hunklines) - 1, -1, -1):
                             if hunklines[i].startswith(b'+'):
                                 hunklines[i] += b'\n'
                                 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
                                 break
                     yield hunkrange, hunklines
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 hunk = None
                 ignoredlines = 0
                 has_hunks = False
                 for s, stype in allblocks(t1, t2, opts, l1, l2):
                     a1, a2, b1, b2 = s
                     if stype != b'!':
                         if stype == b'~':
                             # The diff context lines are based on t1 content. When
                             # blank lines are ignored, the new lines offsets must
                             # be adjusted as if equivalent blocks ('~') had the
                             # same sizes on both sides.
                             ignoredlines += (b2 - b1) - (a2 - a1)
                         continue
                     delta = []
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     b1 -= ignoredlines
                     b2 -= ignoredlines
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             if not has_hunks:
                                 has_hunks = True
                                 yield True
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk = (hunk[0], a2, hunk[2], b2, hunk[4])
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = (astart, a2, bstart, b2, delta)
                     delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
                     delta[len(delta) :] = [b'-' + x for x in old]
                     delta[len(delta) :] = [b'+' + x for x in new]
                 if hunk:
                     if not has_hunks:
                         has_hunks = True
                         yield True
                     for x in yieldhunk(hunk):
                         yield x
                 elif not has_hunks:
                     yield False
-            def b85diff(to, tn):
+            def b85diff(to: Optional[bytes], tn: Optional[bytes]) -> bytes:
                 '''print base85-encoded binary diff'''
                 def fmtline(line):
                     l = len(line)
                     if l <= 26:
                         l = pycompat.bytechr(ord(b'A') + l - 1)
                     else:
                         l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
                     return b'%c%s\n' % (l, util.b85encode(line, True))
                 def chunk(text, csize=52):
                     l = len(text)
                     i = 0
                     while i < l:
                         yield text[i : i + csize]
                         i += csize
                 if to is None:
                     to = b''
                 if tn is None:
                     tn = b''
                 if to == tn:
                     return b''
                 # TODO: deltas
                 ret = []
                 ret.append(b'GIT binary patch\n')
                 ret.append(b'literal %d\n' % len(tn))
                 for l in chunk(zlib.compress(tn)):
                     ret.append(fmtline(l))
                 ret.append(b'\n')
                 return b''.join(ret)
-            def patchtext(bin):
+            def patchtext(bin: bytes) -> bytes:
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
                     pos += 12
                     t.append(bin[pos : pos + l])
                     pos += l
                 return b"".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return util.buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
-            def get_matching_blocks(a, b):
+            def get_matching_blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int]]:
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
-            def trivialdiffheader(length):
+            def trivialdiffheader(length: int) -> bytes:
                 return struct.pack(b">lll", 0, 0, length) if length else b''
-            def replacediffheader(oldlen, newlen):
+            def replacediffheader(oldlen: int, newlen: int) -> bytes:
                 return struct.pack(b">lll", 0, oldlen, newlen)