upstream/mercurial-mirror Commit - r15525:935bf2e7

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from i18n import _

8

from i18n import _

9

import bdiff, mpatch, util

9

import bdiff, mpatch, util

10

import re, struct

10

import re, struct

11

12

def splitnewlines(text):

12

def splitnewlines(text):

13

'''like str.splitlines, but only split on newlines.'''

13

'''like str.splitlines, but only split on newlines.'''

14

lines = [l + '\n' for l in text.split('\n')]

14

lines = [l + '\n' for l in text.split('\n')]

15

if lines:

15

if lines:

16

if lines[-1] == '\n':

16

if lines[-1] == '\n':

17

lines.pop()

17

lines.pop()

18

else:

18

else:

19

lines[-1] = lines[-1][:-1]

19

lines[-1] = lines[-1][:-1]

20

return lines

20

return lines

21

22

class diffopts(object):

22

class diffopts(object):

23

'''context is the number of context lines

23

'''context is the number of context lines

24

text treats all files as text

24

text treats all files as text

25

showfunc enables diff -p output

25

showfunc enables diff -p output

26

git enables the git extended patch format

26

git enables the git extended patch format

27

nodates removes dates from diff headers

27

nodates removes dates from diff headers

28

ignorews ignores all whitespace changes in the diff

28

ignorews ignores all whitespace changes in the diff

29

ignorewsamount ignores changes in the amount of whitespace

29

ignorewsamount ignores changes in the amount of whitespace

30

ignoreblanklines ignores changes whose lines are all blank

30

ignoreblanklines ignores changes whose lines are all blank

31

upgrade generates git diffs to avoid data loss

31

upgrade generates git diffs to avoid data loss

32

'''

32

'''

33

34

defaults = {

34

defaults = {

35

'context': 3,

35

'context': 3,

36

'text': False,

36

'text': False,

37

'showfunc': False,

37

'showfunc': False,

38

'git': False,

38

'git': False,

39

'nodates': False,

39

'nodates': False,

40

'ignorews': False,

40

'ignorews': False,

41

'ignorewsamount': False,

41

'ignorewsamount': False,

42

'ignoreblanklines': False,

42

'ignoreblanklines': False,

43

'upgrade': False,

43

'upgrade': False,

44

}

44

}

45

46

__slots__ = defaults.keys()

46

__slots__ = defaults.keys()

47

48

def __init__(self, **opts):

48

def __init__(self, **opts):

49

for k in self.__slots__:

49

for k in self.__slots__:

50

v = opts.get(k)

50

v = opts.get(k)

51

if v is None:

51

if v is None:

52

v = self.defaults[k]

52

v = self.defaults[k]

53

setattr(self, k, v)

53

setattr(self, k, v)

54

55

try:

55

try:

56

self.context = int(self.context)

56

self.context = int(self.context)

57

except ValueError:

57

except ValueError:

58

raise util.Abort(_('diff context lines count must be '

58

raise util.Abort(_('diff context lines count must be '

59

'an integer, not %r') % self.context)

59

'an integer, not %r') % self.context)

60

61

def copy(self, **kwargs):

61

def copy(self, **kwargs):

62

opts = dict((k, getattr(self, k)) for k in self.defaults)

62

opts = dict((k, getattr(self, k)) for k in self.defaults)

63

opts.update(kwargs)

63

opts.update(kwargs)

64

return diffopts(**opts)

64

return diffopts(**opts)

65

66

defaultopts = diffopts()

66

defaultopts = diffopts()

67

68

def wsclean(opts, text, blank=True):

68

def wsclean(opts, text, blank=True):

69

if opts.ignorews:

69

if opts.ignorews:

70

text = re.sub('[ \t\r]+', '', text)

70

text = re.sub('[ \t\r]+', '', text)

71

elif opts.ignorewsamount:

71

elif opts.ignorewsamount:

72

text = re.sub('[ \t\r]+', ' ', text)

72

text = re.sub('[ \t\r]+', ' ', text)

73

text = text.replace(' \n', '\n')

73

text = text.replace(' \n', '\n')

74

if blank and opts.ignoreblanklines:

74

if blank and opts.ignoreblanklines:

75

text = re.sub('\n+', '\n', text).strip('\n')

75

text = re.sub('\n+', '\n', text).strip('\n')

76

return text

76

return text

77

78

def diffblocks(text1, text2, opts=None, lines1=None, lines2=None):

79

"""Return changed blocks between text1 and text2, the blocks in-between

80

those emitted by bdiff.blocks. Take in account the whitespace normalization

81

rules defined by opts.

82

line1 and line2 are text1 and text2 split with splitnewlines() if they are

83

already available.

84

"""

85

if opts is None:

86

opts = defaultopts

87

if lines1 is None:

88

lines1 = splitnewlines(text1)

89

if lines2 is None:

90

lines2 = splitnewlines(text2)

91

if opts.ignorews or opts.ignorewsamount:

92

text1 = wsclean(opts, text1, False)

93

text2 = wsclean(opts, text2, False)

94

diff = bdiff.blocks(text1, text2)

95

for i, s1 in enumerate(diff):

96

# The first match is special.

97

# we've either found a match starting at line 0 or a match later

98

# in the file. If it starts later, old and new below will both be

99

# empty and we'll continue to the next match.

100

if i > 0:

101

s = diff[i - 1]

102

else:

103

s = [0, 0, 0, 0]

104

s = [s[1], s1[0], s[3], s1[2]]

105

old = lines1[s[0]:s[1]]

106

new = lines2[s[2]:s[3]]

107

108

# bdiff sometimes gives huge matches past eof, this check eats them,

109

# and deals with the special first match case described above

110

if not old and not new:

111

continue

112

113

if opts.ignoreblanklines:

114

if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):

115

continue

116

yield s

117

78

def diffline(revs, a, b, opts):

118

def diffline(revs, a, b, opts):

79

parts = ['diff']

119

parts = ['diff']

80

if opts.git:

120

if opts.git:

81

parts.append('--git')

121

parts.append('--git')

82

if revs and not opts.git:

122

if revs and not opts.git:

83

parts.append(' '.join(["-r %s" % rev for rev in revs]))

123

parts.append(' '.join(["-r %s" % rev for rev in revs]))

84

if opts.git:

124

if opts.git:

85

parts.append('a/%s' % a)

125

parts.append('a/%s' % a)

86

parts.append('b/%s' % b)

126

parts.append('b/%s' % b)

87

else:

127

else:

88

parts.append(a)

128

parts.append(a)

89

return ' '.join(parts) + '\n'

129

return ' '.join(parts) + '\n'

90

130

91

def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):

131

def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):

92

def datetag(date, addtab=True):

132

def datetag(date, addtab=True):

93

if not opts.git and not opts.nodates:

133

if not opts.git and not opts.nodates:

94

return '\t%s\n' % date

134

return '\t%s\n' % date

95

if addtab and ' ' in fn1:

135

if addtab and ' ' in fn1:

96

return '\t\n'

136

return '\t\n'

97

return '\n'

137

return '\n'

98

138

99

if not a and not b:

139

if not a and not b:

100

return ""

140

return ""

101

epoch = util.datestr((0, 0))

141

epoch = util.datestr((0, 0))

102

142

103

fn1 = util.pconvert(fn1)

143

fn1 = util.pconvert(fn1)

104

fn2 = util.pconvert(fn2)

144

fn2 = util.pconvert(fn2)

105

145

106

if not opts.text and (util.binary(a) or util.binary(b)):

146

if not opts.text and (util.binary(a) or util.binary(b)):

107

if a and b and len(a) == len(b) and a == b:

147

if a and b and len(a) == len(b) and a == b:

108

return ""

148

return ""

109

l = ['Binary file %s has changed\n' % fn1]

149

l = ['Binary file %s has changed\n' % fn1]

110

elif not a:

150

elif not a:

111

b = splitnewlines(b)

151

b = splitnewlines(b)

112

if a is None:

152

if a is None:

113

l1 = '--- /dev/null%s' % datetag(epoch, False)

153

l1 = '--- /dev/null%s' % datetag(epoch, False)

114

else:

154

else:

115

l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))

155

l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))

116

l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))

156

l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))

117

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

157

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

118

l = [l1, l2, l3] + ["+" + e for e in b]

158

l = [l1, l2, l3] + ["+" + e for e in b]

119

elif not b:

159

elif not b:

120

a = splitnewlines(a)

160

a = splitnewlines(a)

121

l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))

161

l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))

122

if b is None:

162

if b is None:

123

l2 = '+++ /dev/null%s' % datetag(epoch, False)

163

l2 = '+++ /dev/null%s' % datetag(epoch, False)

124

else:

164

else:

125

l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))

165

l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))

126

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

166

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

127

l = [l1, l2, l3] + ["-" + e for e in a]

167

l = [l1, l2, l3] + ["-" + e for e in a]

128

else:

168

else:

129

al = splitnewlines(a)

169

al = splitnewlines(a)

130

bl = splitnewlines(b)

170

bl = splitnewlines(b)

131

l = list(_unidiff(a, b, al, bl, opts=opts))

171

l = list(_unidiff(a, b, al, bl, opts=opts))

132

if not l:

172

if not l:

133

return ""

173

return ""

134

174

135

l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))

175

l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))

136

l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))

176

l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))

137

177

138

for ln in xrange(len(l)):

178

for ln in xrange(len(l)):

139

if l[ln][-1] != '\n':

179

if l[ln][-1] != '\n':

140

l[ln] += "\n\ No newline at end of file\n"

180

l[ln] += "\n\ No newline at end of file\n"

141

181

142

if r:

182

if r:

143

l.insert(0, diffline(r, fn1, fn2, opts))

183

l.insert(0, diffline(r, fn1, fn2, opts))

144

184

145

return "".join(l)

185

return "".join(l)

146

186

147

# creates a headerless unified diff

187

# creates a headerless unified diff

148

# t1 and t2 are the text to be diffed

188

# t1 and t2 are the text to be diffed

149

# l1 and l2 are the text broken up into lines

189

# l1 and l2 are the text broken up into lines

150

def _unidiff(t1, t2, l1, l2, opts=defaultopts):

190

def _unidiff(t1, t2, l1, l2, opts=defaultopts):

151

def contextend(l, len):

191

def contextend(l, len):

152

ret = l + opts.context

192

ret = l + opts.context

153

if ret > len:

193

if ret > len:

154

ret = len

194

ret = len

155

return ret

195

return ret

156

196

157

def contextstart(l):

197

def contextstart(l):

158

ret = l - opts.context

198

ret = l - opts.context

159

if ret < 0:

199

if ret < 0:

160

return 0

200

return 0

161

return ret

201

return ret

162

202

163

lastfunc = [0, '']

203

lastfunc = [0, '']

164

def yieldhunk(hunk):

204

def yieldhunk(hunk):

165

(astart, a2, bstart, b2, delta) = hunk

205

(astart, a2, bstart, b2, delta) = hunk

166

aend = contextend(a2, len(l1))

206

aend = contextend(a2, len(l1))

167

alen = aend - astart

207

alen = aend - astart

168

blen = b2 - bstart + aend - a2

208

blen = b2 - bstart + aend - a2

169

209

170

func = ""

210

func = ""

171

if opts.showfunc:

211

if opts.showfunc:

172

lastpos, func = lastfunc

212

lastpos, func = lastfunc

173

# walk backwards from the start of the context up to the start of

213

# walk backwards from the start of the context up to the start of

174

# the previous hunk context until we find a line starting with an

214

# the previous hunk context until we find a line starting with an

175

# alphanumeric char.

215

# alphanumeric char.

176

for i in xrange(astart - 1, lastpos - 1, -1):

216

for i in xrange(astart - 1, lastpos - 1, -1):

177

if l1[i][0].isalnum():

217

if l1[i][0].isalnum():

178

func = ' ' + l1[i].rstrip()[:40]

218

func = ' ' + l1[i].rstrip()[:40]

179

lastfunc[1] = func

219

lastfunc[1] = func

180

break

220

break

181

# by recording this hunk's starting point as the next place to

221

# by recording this hunk's starting point as the next place to

182

# start looking for function lines, we avoid reading any line in

222

# start looking for function lines, we avoid reading any line in

183

# the file more than once.

223

# the file more than once.

184

lastfunc[0] = astart

224

lastfunc[0] = astart

185

225

186

# zero-length hunk ranges report their start line as one less

226

# zero-length hunk ranges report their start line as one less

187

if alen:

227

if alen:

188

astart += 1

228

astart += 1

189

if blen:

229

if blen:

190

bstart += 1

230

bstart += 1

191

231

192

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

232

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,

193

bstart, blen, func)

233

bstart, blen, func)

194

for x in delta:

234

for x in delta:

195

yield x

235

yield x

196

for x in xrange(a2, aend):

236

for x in xrange(a2, aend):

197

yield ' ' + l1[x]

237

yield ' ' + l1[x]

198

238

199

# bdiff.blocks gives us the matching sequences in the files. The loop

239

# bdiff.blocks gives us the matching sequences in the files. The loop

200

# below finds the spaces between those matching sequences and translates

240

# below finds the spaces between those matching sequences and translates

201

# them into diff output.

241

# them into diff output.

202

#

242

#

203

if opts.ignorews or opts.ignorewsamount:

204

t1 = wsclean(opts, t1, False)

205

t2 = wsclean(opts, t2, False)

206

207

diff = bdiff.blocks(t1, t2)

208

hunk = None

243

hunk = None

209

for i, s1 in enumerate(diff):

244

for s in diffblocks(t1, t2, opts, l1, l2):

210

# The first match is special.

211

# we've either found a match starting at line 0 or a match later

212

# in the file. If it starts later, old and new below will both be

213

# empty and we'll continue to the next match.

214

if i > 0:

215

s = diff[i - 1]

216

else:

217

s = [0, 0, 0, 0]

218

delta = []

245

delta = []

219

a1 = s[1]

246

a1, a2, b1, b2 = s

220

a2 = s1[0]

221

b1 = s[3]

222

b2 = s1[2]

223

224

old = l1[a1:a2]

247

old = l1[a1:a2]

225

new = l2[b1:b2]

248

new = l2[b1:b2]

226

249

227

# bdiff sometimes gives huge matches past eof, this check eats them,

228

# and deals with the special first match case described above

229

if not old and not new:

230

continue

231

232

if opts.ignoreblanklines:

233

if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):

234

continue

235

236

astart = contextstart(a1)

250

astart = contextstart(a1)

237

bstart = contextstart(b1)

251

bstart = contextstart(b1)

238

prev = None

252

prev = None

239

if hunk:

253

if hunk:

240

# join with the previous hunk if it falls inside the context

254

# join with the previous hunk if it falls inside the context

241

if astart < hunk[1] + opts.context + 1:

255

if astart < hunk[1] + opts.context + 1:

242

prev = hunk

256

prev = hunk

243

astart = hunk[1]

257

astart = hunk[1]

244

bstart = hunk[3]

258

bstart = hunk[3]

245

else:

259

else:

246

for x in yieldhunk(hunk):

260

for x in yieldhunk(hunk):

247

yield x

261

yield x

248

if prev:

262

if prev:

249

# we've joined the previous hunk, record the new ending points.

263

# we've joined the previous hunk, record the new ending points.

250

hunk[1] = a2

264

hunk[1] = a2

251

hunk[3] = b2

265

hunk[3] = b2

252

delta = hunk[4]

266

delta = hunk[4]

253

else:

267

else:

254

# create a new hunk

268

# create a new hunk

255

hunk = [astart, a2, bstart, b2, delta]

269

hunk = [astart, a2, bstart, b2, delta]

256

270

257

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

271

delta[len(delta):] = [' ' + x for x in l1[astart:a1]]

258

delta[len(delta):] = ['-' + x for x in old]

272

delta[len(delta):] = ['-' + x for x in old]

259

delta[len(delta):] = ['+' + x for x in new]

273

delta[len(delta):] = ['+' + x for x in new]

260

274

261

if hunk:

275

if hunk:

262

for x in yieldhunk(hunk):

276

for x in yieldhunk(hunk):

263

yield x

277

yield x

264

278

265

def patchtext(bin):

279

def patchtext(bin):

266

pos = 0

280

pos = 0

267

t = []

281

t = []

268

while pos < len(bin):

282

while pos < len(bin):

269

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

283

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

270

pos += 12

284

pos += 12

271

t.append(bin[pos:pos + l])

285

t.append(bin[pos:pos + l])

272

pos += l

286

pos += l

273

return "".join(t)

287

return "".join(t)

274

288

275

def patch(a, bin):

289

def patch(a, bin):

276

if len(a) == 0:

290

if len(a) == 0:

277

# skip over trivial delta header

291

# skip over trivial delta header

278

return buffer(bin, 12)

292

return buffer(bin, 12)

279

return mpatch.patches(a, [bin])

293

return mpatch.patches(a, [bin])

280

294

281

# similar to difflib.SequenceMatcher.get_matching_blocks

295

# similar to difflib.SequenceMatcher.get_matching_blocks

282

def get_matching_blocks(a, b):

296

def get_matching_blocks(a, b):

283

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

297

return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]

284

298

285

def trivialdiffheader(length):

299

def trivialdiffheader(length):

286

return struct.pack(">lll", 0, 0, length)

300

return struct.pack(">lll", 0, 0, length)

287

301

288

patches = mpatch.patches

302

patches = mpatch.patches

289

patchedsize = mpatch.patchedsize

303

patchedsize = mpatch.patchedsize

290

textdiff = bdiff.bdiff

304

textdiff = bdiff.bdiff

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from i18n import _
             import bdiff, mpatch, util
             import re, struct
             def splitnewlines(text):
                 '''like str.splitlines, but only split on newlines.'''
                 lines = [l + '\n' for l in text.split('\n')]
                 if lines:
                     if lines[-1] == '\n':
                         lines.pop()
                     else:
                         lines[-1] = lines[-1][:-1]
                 return lines
             class diffopts(object):
                 '''context is the number of context lines
                 text treats all files as text
                 showfunc enables diff -p output
                 git enables the git extended patch format
                 nodates removes dates from diff headers
                 ignorews ignores all whitespace changes in the diff
                 ignorewsamount ignores changes in the amount of whitespace
                 ignoreblanklines ignores changes whose lines are all blank
                 upgrade generates git diffs to avoid data loss
                 '''
                 defaults = {
                     'context': 3,
                     'text': False,
                     'showfunc': False,
                     'git': False,
                     'nodates': False,
                     'ignorews': False,
                     'ignorewsamount': False,
                     'ignoreblanklines': False,
                     'upgrade': False,
                     }
                 __slots__ = defaults.keys()
                 def __init__(self, **opts):
                     for k in self.__slots__:
                         v = opts.get(k)
                         if v is None:
                             v = self.defaults[k]
                         setattr(self, k, v)
                     try:
                         self.context = int(self.context)
                     except ValueError:
                         raise util.Abort(_('diff context lines count must be '
                                            'an integer, not %r') % self.context)
                 def copy(self, **kwargs):
                     opts = dict((k, getattr(self, k)) for k in self.defaults)
                     opts.update(kwargs)
                     return diffopts(**opts)
             defaultopts = diffopts()
             def wsclean(opts, text, blank=True):
                 if opts.ignorews:
                     text = re.sub('[ \t\r]+', '', text)
                 elif opts.ignorewsamount:
                     text = re.sub('[ \t\r]+', ' ', text)
                     text = text.replace(' \n', '\n')
                 if blank and opts.ignoreblanklines:
                     text = re.sub('\n+', '\n', text).strip('\n')
                 return text
+            def diffblocks(text1, text2, opts=None, lines1=None, lines2=None):
+                """Return changed blocks between text1 and text2, the blocks in-between
+                those emitted by bdiff.blocks. Take in account the whitespace normalization
+                rules defined by opts.
+                line1 and line2 are text1 and text2 split with splitnewlines() if they are
+                already available.
+                """
+                if opts is None:
+                    opts = defaultopts
+                if lines1 is None:
+                    lines1 = splitnewlines(text1)
+                if lines2 is None:
+                    lines2 = splitnewlines(text2)
+                if opts.ignorews or opts.ignorewsamount:
+                    text1 = wsclean(opts, text1, False)
+                    text2 = wsclean(opts, text2, False)
+                diff = bdiff.blocks(text1, text2)
+                for i, s1 in enumerate(diff):
+                    # The first match is special.
+                    # we've either found a match starting at line 0 or a match later
+                    # in the file.  If it starts later, old and new below will both be
+                    # empty and we'll continue to the next match.
+                    if i > 0:
+                        s = diff[i - 1]
+                    else:
+                        s = [0, 0, 0, 0]
+                    s = [s[1], s1[0], s[3], s1[2]]
+                    old = lines1[s[0]:s[1]]
+                    new = lines2[s[2]:s[3]]
+                    # bdiff sometimes gives huge matches past eof, this check eats them,
+                    # and deals with the special first match case described above
+                    if not old and not new:
+                        continue
+                    if opts.ignoreblanklines:
+                        if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
+                            continue
+                    yield s
             def diffline(revs, a, b, opts):
                 parts = ['diff']
                 if opts.git:
                     parts.append('--git')
                 if revs and not opts.git:
                     parts.append(' '.join(["-r %s" % rev for rev in revs]))
                 if opts.git:
                     parts.append('a/%s' % a)
                     parts.append('b/%s' % b)
                 else:
                     parts.append(a)
                 return ' '.join(parts) + '\n'
             def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
                 def datetag(date, addtab=True):
                     if not opts.git and not opts.nodates:
                         return '\t%s\n' % date
                     if addtab and ' ' in fn1:
                         return '\t\n'
                     return '\n'
                 if not a and not b:
                     return ""
                 epoch = util.datestr((0, 0))
                 fn1 = util.pconvert(fn1)
                 fn2 = util.pconvert(fn2)
                 if not opts.text and (util.binary(a) or util.binary(b)):
                     if a and b and len(a) == len(b) and a == b:
                         return ""
                     l = ['Binary file %s has changed\n' % fn1]
                 elif not a:
                     b = splitnewlines(b)
                     if a is None:
                         l1 = '--- /dev/null%s' % datetag(epoch, False)
                     else:
                         l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
                     l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
                     l3 = "@@ -0,0 +1,%d @@\n" % len(b)
                     l = [l1, l2, l3] + ["+" + e for e in b]
                 elif not b:
                     a = splitnewlines(a)
                     l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
                     if b is None:
                         l2 = '+++ /dev/null%s' % datetag(epoch, False)
                     else:
                         l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
                     l3 = "@@ -1,%d +0,0 @@\n" % len(a)
                     l = [l1, l2, l3] + ["-" + e for e in a]
                 else:
                     al = splitnewlines(a)
                     bl = splitnewlines(b)
                     l = list(_unidiff(a, b, al, bl, opts=opts))
                     if not l:
                         return ""
                     l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
                     l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
                 for ln in xrange(len(l)):
                     if l[ln][-1] != '\n':
                         l[ln] += "\n\ No newline at end of file\n"
                 if r:
                     l.insert(0, diffline(r, fn1, fn2, opts))
                 return "".join(l)
             # creates a headerless unified diff
             # t1 and t2 are the text to be diffed
             # l1 and l2 are the text broken up into lines
             def _unidiff(t1, t2, l1, l2, opts=defaultopts):
                 def contextend(l, len):
                     ret = l + opts.context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - opts.context
                     if ret < 0:
                         return 0
                     return ret
                 lastfunc = [0, '']
                 def yieldhunk(hunk):
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = ""
                     if opts.showfunc:
                         lastpos, func = lastfunc
                         # walk backwards from the start of the context up to the start of
                         # the previous hunk context until we find a line starting with an
                         # alphanumeric char.
                         for i in xrange(astart - 1, lastpos - 1, -1):
                             if l1[i][0].isalnum():
                                 func = ' ' + l1[i].rstrip()[:40]
                                 lastfunc[1] = func
                                 break
                         # by recording this hunk's starting point as the next place to
                         # start looking for function lines, we avoid reading any line in
                         # the file more than once.
                         lastfunc[0] = astart
                     # zero-length hunk ranges report their start line as one less
                     if alen:
                         astart += 1
                     if blen:
                         bstart += 1
                     yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
                                                        bstart, blen, func)
                     for x in delta:
                         yield x
                     for x in xrange(a2, aend):
                         yield ' ' + l1[x]
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
-                if opts.ignorews or opts.ignorewsamount:
-                    t1 = wsclean(opts, t1, False)
-                    t2 = wsclean(opts, t2, False)
-                diff = bdiff.blocks(t1, t2)
                 hunk = None
-                for i, s1 in enumerate(diff):
+                for s in diffblocks(t1, t2, opts, l1, l2):
-                    # The first match is special.
-                    # we've either found a match starting at line 0 or a match later
-                    # in the file.  If it starts later, old and new below will both be
-                    # empty and we'll continue to the next match.
-                    if i > 0:
-                        s = diff[i - 1]
-                    else:
-                        s = [0, 0, 0, 0]
                     delta = []
-                    a1 = s[1]
+                    a1, a2, b1, b2 = s
-                    a2 = s1[0]
-                    b1 = s[3]
-                    b2 = s1[2]
                     old = l1[a1:a2]
                     new = l2[b1:b2]
-                    # bdiff sometimes gives huge matches past eof, this check eats them,
-                    # and deals with the special first match case described above
-                    if not old and not new:
-                        continue
-                    if opts.ignoreblanklines:
-                        if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
-                            continue
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + opts.context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             for x in yieldhunk(hunk):
                                 yield x
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [astart, a2, bstart, b2, delta]
                     delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
                     delta[len(delta):] = ['-' + x for x in old]
                     delta[len(delta):] = ['+' + x for x in new]
                 if hunk:
                     for x in yieldhunk(hunk):
                         yield x
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
                     pos += 12
                     t.append(bin[pos:pos + l])
                     pos += l
                 return "".join(t)
             def patch(a, bin):
                 if len(a) == 0:
                     # skip over trivial delta header
                     return buffer(bin, 12)
                 return mpatch.patches(a, [bin])
             # similar to difflib.SequenceMatcher.get_matching_blocks
             def get_matching_blocks(a, b):
                 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
             def trivialdiffheader(length):
                 return struct.pack(">lll", 0, 0, length)
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff