upstream/mercurial-mirror Commit - r2251:35fb62a3

1

# mdiff.py - diff and patch routines for mercurial

1

# mdiff.py - diff and patch routines for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms

5

# This software may be used and distributed according to the terms

6

# of the GNU General Public License, incorporated herein by reference.

6

# of the GNU General Public License, incorporated herein by reference.

7

8

from demandload import demandload

8

from demandload import demandload

9

import struct, bdiff, util, mpatch

9

import struct, bdiff, util, mpatch

10

demandload(globals(), "re")

10

demandload(globals(), "re")

11

12

def splitnewlines(text):

13

def splitnewlines(text, keepends=False):

14

'''like str.splitlines, but only split on newlines.'''

13

'''like str.splitlines, but only split on newlines.'''

15

i = 0

14

lines = [l + '\n' for l in text.split('\n')]

16

lines = []

15

if lines:

17

while True:

16

if lines[-1] == '\n':

18

n = text.find('\n', i)

17

lines.pop()

19

if n == -1:

18

else:

20

l~~ast~~ = ~~text~~[i:]

19

lines[-1] = lines[-1][:-1]

21

if last:

20

return lines

22

lines.append(last)

23

return lines

24

lines.append(text[i:keepends and n+1 or n])

25

i = n + 1

26

21

27

def unidiff(a, ad, b, bd, fn, r=None, text=False,

22

def unidiff(a, ad, b, bd, fn, r=None, text=False,

28

showfunc=False, ignorews=False):

23

showfunc=False, ignorews=False):

29

24

30

if not a and not b: return ""

25

if not a and not b: return ""

31

epoch = util.datestr((0, 0))

26

epoch = util.datestr((0, 0))

32

27

33

if not text and (util.binary(a) or util.binary(b)):

28

if not text and (util.binary(a) or util.binary(b)):

34

l = ['Binary file %s has changed\n' % fn]

29

l = ['Binary file %s has changed\n' % fn]

35

elif not a:

30

elif not a:

36

b = splitnewlines(b, ~~keepends~~=~~True~~)

31

b = splitnewlines(b)

37

if a is None:

32

if a is None:

38

l1 = "--- %s\t%s\n" % ("/dev/null", epoch)

33

l1 = "--- %s\t%s\n" % ("/dev/null", epoch)

39

else:

34

else:

40

l1 = "--- %s\t%s\n" % ("a/" + fn, ad)

35

l1 = "--- %s\t%s\n" % ("a/" + fn, ad)

41

l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)

36

l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)

42

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

37

l3 = "@@ -0,0 +1,%d @@\n" % len(b)

43

l = [l1, l2, l3] + ["+" + e for e in b]

38

l = [l1, l2, l3] + ["+" + e for e in b]

44

elif not b:

39

elif not b:

45

a = splitnewlines(a, ~~keepends~~=~~True~~)

40

a = splitnewlines(a)

46

l1 = "--- %s\t%s\n" % ("a/" + fn, ad)

41

l1 = "--- %s\t%s\n" % ("a/" + fn, ad)

47

if b is None:

42

if b is None:

48

l2 = "+++ %s\t%s\n" % ("/dev/null", epoch)

43

l2 = "+++ %s\t%s\n" % ("/dev/null", epoch)

49

else:

44

else:

50

l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)

45

l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)

51

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

46

l3 = "@@ -1,%d +0,0 @@\n" % len(a)

52

l = [l1, l2, l3] + ["-" + e for e in a]

47

l = [l1, l2, l3] + ["-" + e for e in a]

53

else:

48

else:

54

al = splitnewlines(a, ~~keepends~~=~~True~~)

49

al = splitnewlines(a)

55

bl = splitnewlines(b, ~~keepends~~=~~True~~)

50

bl = splitnewlines(b)

56

l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn,

51

l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn,

57

showfunc=showfunc, ignorews=ignorews))

52

showfunc=showfunc, ignorews=ignorews))

58

if not l: return ""

53

if not l: return ""

59

# difflib uses a space, rather than a tab

54

# difflib uses a space, rather than a tab

60

l[0] = "%s\t%s\n" % (l[0][:-2], ad)

55

l[0] = "%s\t%s\n" % (l[0][:-2], ad)

61

l[1] = "%s\t%s\n" % (l[1][:-2], bd)

56

l[1] = "%s\t%s\n" % (l[1][:-2], bd)

62

57

63

for ln in xrange(len(l)):

58

for ln in xrange(len(l)):

64

if l[ln][-1] != '\n':

59

if l[ln][-1] != '\n':

65

l[ln] += "\n\ No newline at end of file\n"

60

l[ln] += "\n\ No newline at end of file\n"

66

61

67

if r:

62

if r:

68

l.insert(0, "diff %s %s\n" %

63

l.insert(0, "diff %s %s\n" %

69

(' '.join(["-r %s" % rev for rev in r]), fn))

64

(' '.join(["-r %s" % rev for rev in r]), fn))

70

65

71

return "".join(l)

66

return "".join(l)

72

67

73

# somewhat self contained replacement for difflib.unified_diff

68

# somewhat self contained replacement for difflib.unified_diff

74

# t1 and t2 are the text to be diffed

69

# t1 and t2 are the text to be diffed

75

# l1 and l2 are the text broken up into lines

70

# l1 and l2 are the text broken up into lines

76

# header1 and header2 are the filenames for the diff output

71

# header1 and header2 are the filenames for the diff output

77

# context is the number of context lines

72

# context is the number of context lines

78

# showfunc enables diff -p output

73

# showfunc enables diff -p output

79

# ignorews ignores all whitespace changes in the diff

74

# ignorews ignores all whitespace changes in the diff

80

def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,

75

def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,

81

ignorews=False):

76

ignorews=False):

82

def contextend(l, len):

77

def contextend(l, len):

83

ret = l + context

78

ret = l + context

84

if ret > len:

79

if ret > len:

85

ret = len

80

ret = len

86

return ret

81

return ret

87

82

88

def contextstart(l):

83

def contextstart(l):

89

ret = l - context

84

ret = l - context

90

if ret < 0:

85

if ret < 0:

91

return 0

86

return 0

92

return ret

87

return ret

93

88

94

def yieldhunk(hunk, header):

89

def yieldhunk(hunk, header):

95

if header:

90

if header:

96

for x in header:

91

for x in header:

97

yield x

92

yield x

98

(astart, a2, bstart, b2, delta) = hunk

93

(astart, a2, bstart, b2, delta) = hunk

99

aend = contextend(a2, len(l1))

94

aend = contextend(a2, len(l1))

100

alen = aend - astart

95

alen = aend - astart

101

blen = b2 - bstart + aend - a2

96

blen = b2 - bstart + aend - a2

102

97

103

func = ""

98

func = ""

104

if showfunc:

99

if showfunc:

105

# walk backwards from the start of the context

100

# walk backwards from the start of the context

106

# to find a line starting with an alphanumeric char.

101

# to find a line starting with an alphanumeric char.

107

for x in xrange(astart, -1, -1):

102

for x in xrange(astart, -1, -1):

108

t = l1[x].rstrip()

103

t = l1[x].rstrip()

109

if funcre.match(t):

104

if funcre.match(t):

110

func = ' ' + t[:40]

105

func = ' ' + t[:40]

111

break

106

break

112

107

113

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,

108

yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,

114

bstart + 1, blen, func)

109

bstart + 1, blen, func)

115

for x in delta:

110

for x in delta:

116

yield x

111

yield x

117

for x in xrange(a2, aend):

112

for x in xrange(a2, aend):

118

yield ' ' + l1[x]

113

yield ' ' + l1[x]

119

114

120

header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]

115

header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]

121

116

122

if showfunc:

117

if showfunc:

123

funcre = re.compile('\w')

118

funcre = re.compile('\w')

124

if ignorews:

119

if ignorews:

125

wsre = re.compile('[ \t]')

120

wsre = re.compile('[ \t]')

126

121

127

# bdiff.blocks gives us the matching sequences in the files. The loop

122

# bdiff.blocks gives us the matching sequences in the files. The loop

128

# below finds the spaces between those matching sequences and translates

123

# below finds the spaces between those matching sequences and translates

129

# them into diff output.

124

# them into diff output.

130

#

125

#

131

diff = bdiff.blocks(t1, t2)

126

diff = bdiff.blocks(t1, t2)

132

hunk = None

127

hunk = None

133

for i in xrange(len(diff)):

128

for i in xrange(len(diff)):

134

# The first match is special.

129

# The first match is special.

135

# we've either found a match starting at line 0 or a match later

130

# we've either found a match starting at line 0 or a match later

136

# in the file. If it starts later, old and new below will both be

131

# in the file. If it starts later, old and new below will both be

137

# empty and we'll continue to the next match.

132

# empty and we'll continue to the next match.

138

if i > 0:

133

if i > 0:

139

s = diff[i-1]

134

s = diff[i-1]

140

else:

135

else:

141

s = [0, 0, 0, 0]

136

s = [0, 0, 0, 0]

142

delta = []

137

delta = []

143

s1 = diff[i]

138

s1 = diff[i]

144

a1 = s[1]

139

a1 = s[1]

145

a2 = s1[0]

140

a2 = s1[0]

146

b1 = s[3]

141

b1 = s[3]

147

b2 = s1[2]

142

b2 = s1[2]

148

143

149

old = l1[a1:a2]

144

old = l1[a1:a2]

150

new = l2[b1:b2]

145

new = l2[b1:b2]

151

146

152

# bdiff sometimes gives huge matches past eof, this check eats them,

147

# bdiff sometimes gives huge matches past eof, this check eats them,

153

# and deals with the special first match case described above

148

# and deals with the special first match case described above

154

if not old and not new:

149

if not old and not new:

155

continue

150

continue

156

151

157

if ignorews:

152

if ignorews:

158

wsold = wsre.sub('', "".join(old))

153

wsold = wsre.sub('', "".join(old))

159

wsnew = wsre.sub('', "".join(new))

154

wsnew = wsre.sub('', "".join(new))

160

if wsold == wsnew:

155

if wsold == wsnew:

161

continue

156

continue

162

157

163

astart = contextstart(a1)

158

astart = contextstart(a1)

164

bstart = contextstart(b1)

159

bstart = contextstart(b1)

165

prev = None

160

prev = None

166

if hunk:

161

if hunk:

167

# join with the previous hunk if it falls inside the context

162

# join with the previous hunk if it falls inside the context

168

if astart < hunk[1] + context + 1:

163

if astart < hunk[1] + context + 1:

169

prev = hunk

164

prev = hunk

170

astart = hunk[1]

165

astart = hunk[1]

171

bstart = hunk[3]

166

bstart = hunk[3]

172

else:

167

else:

173

for x in yieldhunk(hunk, header):

168

for x in yieldhunk(hunk, header):

174

yield x

169

yield x

175

# we only want to yield the header if the files differ, and

170

# we only want to yield the header if the files differ, and

176

# we only want to yield it once.

171

# we only want to yield it once.

177

header = None

172

header = None

178

if prev:

173

if prev:

179

# we've joined the previous hunk, record the new ending points.

174

# we've joined the previous hunk, record the new ending points.

180

hunk[1] = a2

175

hunk[1] = a2

181

hunk[3] = b2

176

hunk[3] = b2

182

delta = hunk[4]

177

delta = hunk[4]

183

else:

178

else:

184

# create a new hunk

179

# create a new hunk

185

hunk = [ astart, a2, bstart, b2, delta ]

180

hunk = [ astart, a2, bstart, b2, delta ]

186

181

187

delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]

182

delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]

188

delta[len(delta):] = [ '-' + x for x in old ]

183

delta[len(delta):] = [ '-' + x for x in old ]

189

delta[len(delta):] = [ '+' + x for x in new ]

184

delta[len(delta):] = [ '+' + x for x in new ]

190

185

191

if hunk:

186

if hunk:

192

for x in yieldhunk(hunk, header):

187

for x in yieldhunk(hunk, header):

193

yield x

188

yield x

194

189

195

def patchtext(bin):

190

def patchtext(bin):

196

pos = 0

191

pos = 0

197

t = []

192

t = []

198

while pos < len(bin):

193

while pos < len(bin):

199

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

194

p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])

200

pos += 12

195

pos += 12

201

t.append(bin[pos:pos + l])

196

t.append(bin[pos:pos + l])

202

pos += l

197

pos += l

203

return "".join(t)

198

return "".join(t)

204

199

205

def patch(a, bin):

200

def patch(a, bin):

206

return mpatch.patches(a, [bin])

201

return mpatch.patches(a, [bin])

207

202

208

patches = mpatch.patches

203

patches = mpatch.patches

209

patchedsize = mpatch.patchedsize

204

patchedsize = mpatch.patchedsize

210

textdiff = bdiff.bdiff

205

textdiff = bdiff.bdiff

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mdiff.py - diff and patch routines for mercurial
             #
             # Copyright 2005 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms
             # of the GNU General Public License, incorporated herein by reference.
             from demandload import demandload
             import struct, bdiff, util, mpatch
             demandload(globals(), "re")
+            def splitnewlines(text):
-            def splitnewlines(text, keepends=False):
                 '''like str.splitlines, but only split on newlines.'''
-                i = 0
+                lines = [l + '\n' for l in text.split('\n')]
-                lines = []
+                if lines:
-                while True:
+                    if lines[-1] == '\n':
-                    n = text.find('\n', i)
+                        lines.pop()
-                    if n == -1:
+                    else:
-                        last = text[i:]
+                        lines[-1] = lines[-1][:-1]
-                        if last:
+                return lines
-                            lines.append(last)
-                        return lines
-                    lines.append(text[i:keepends and n+1 or n])
-                    i = n + 1
             def unidiff(a, ad, b, bd, fn, r=None, text=False,
                         showfunc=False, ignorews=False):
                 if not a and not b: return ""
                 epoch = util.datestr((0, 0))
                 if not text and (util.binary(a) or util.binary(b)):
                     l = ['Binary file %s has changed\n' % fn]
                 elif not a:
-                    b = splitnewlines(b, keepends=True)
+                    b = splitnewlines(b)
                     if a is None:
                         l1 = "--- %s\t%s\n" % ("/dev/null", epoch)
                     else:
                         l1 = "--- %s\t%s\n" % ("a/" + fn, ad)
                     l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)
                     l3 = "@@ -0,0 +1,%d @@\n" % len(b)
                     l = [l1, l2, l3] + ["+" + e for e in b]
                 elif not b:
-                    a = splitnewlines(a, keepends=True)
+                    a = splitnewlines(a)
                     l1 = "--- %s\t%s\n" % ("a/" + fn, ad)
                     if b is None:
                         l2 = "+++ %s\t%s\n" % ("/dev/null", epoch)
                     else:
                         l2 = "+++ %s\t%s\n" % ("b/" + fn, bd)
                     l3 = "@@ -1,%d +0,0 @@\n" % len(a)
                     l = [l1, l2, l3] + ["-" + e for e in a]
                 else:
-                    al = splitnewlines(a, keepends=True)
+                    al = splitnewlines(a)
-                    bl = splitnewlines(b, keepends=True)
+                    bl = splitnewlines(b)
                     l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn,
                                       showfunc=showfunc, ignorews=ignorews))
                     if not l: return ""
                     # difflib uses a space, rather than a tab
                     l[0] = "%s\t%s\n" % (l[0][:-2], ad)
                     l[1] = "%s\t%s\n" % (l[1][:-2], bd)
                 for ln in xrange(len(l)):
                     if l[ln][-1] != '\n':
                         l[ln] += "\n\ No newline at end of file\n"
                 if r:
                     l.insert(0, "diff %s %s\n" %
                                 (' '.join(["-r %s" % rev for rev in r]), fn))
                 return "".join(l)
             # somewhat self contained replacement for difflib.unified_diff
             # t1 and t2 are the text to be diffed
             # l1 and l2 are the text broken up into lines
             # header1 and header2 are the filenames for the diff output
             # context is the number of context lines
             # showfunc enables diff -p output
             # ignorews ignores all whitespace changes in the diff
             def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,
                          ignorews=False):
                 def contextend(l, len):
                     ret = l + context
                     if ret > len:
                         ret = len
                     return ret
                 def contextstart(l):
                     ret = l - context
                     if ret < 0:
                         return 0
                     return ret
                 def yieldhunk(hunk, header):
                     if header:
                         for x in header:
                             yield x
                     (astart, a2, bstart, b2, delta) = hunk
                     aend = contextend(a2, len(l1))
                     alen = aend - astart
                     blen = b2 - bstart + aend - a2
                     func = ""
                     if showfunc:
                         # walk backwards from the start of the context
                         # to find a line starting with an alphanumeric char.
                         for x in xrange(astart, -1, -1):
                             t = l1[x].rstrip()
                             if funcre.match(t):
                                 func = ' ' + t[:40]
                                 break
                     yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
                                                        bstart + 1, blen, func)
                     for x in delta:
                         yield x
                     for x in xrange(a2, aend):
                         yield ' ' + l1[x]
                 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
                 if showfunc:
                     funcre = re.compile('\w')
                 if ignorews:
                     wsre = re.compile('[ \t]')
                 # bdiff.blocks gives us the matching sequences in the files.  The loop
                 # below finds the spaces between those matching sequences and translates
                 # them into diff output.
                 #
                 diff = bdiff.blocks(t1, t2)
                 hunk = None
                 for i in xrange(len(diff)):
                     # The first match is special.
                     # we've either found a match starting at line 0 or a match later
                     # in the file.  If it starts later, old and new below will both be
                     # empty and we'll continue to the next match.
                     if i > 0:
                         s = diff[i-1]
                     else:
                         s = [0, 0, 0, 0]
                     delta = []
                     s1 = diff[i]
                     a1 = s[1]
                     a2 = s1[0]
                     b1 = s[3]
                     b2 = s1[2]
                     old = l1[a1:a2]
                     new = l2[b1:b2]
                     # bdiff sometimes gives huge matches past eof, this check eats them,
                     # and deals with the special first match case described above
                     if not old and not new:
                         continue
                     if ignorews:
                         wsold = wsre.sub('', "".join(old))
                         wsnew = wsre.sub('', "".join(new))
                         if wsold == wsnew:
                             continue
                     astart = contextstart(a1)
                     bstart = contextstart(b1)
                     prev = None
                     if hunk:
                         # join with the previous hunk if it falls inside the context
                         if astart < hunk[1] + context + 1:
                             prev = hunk
                             astart = hunk[1]
                             bstart = hunk[3]
                         else:
                             for x in yieldhunk(hunk, header):
                                 yield x
                             # we only want to yield the header if the files differ, and
                             # we only want to yield it once.
                             header = None
                     if prev:
                         # we've joined the previous hunk, record the new ending points.
                         hunk[1] = a2
                         hunk[3] = b2
                         delta = hunk[4]
                     else:
                         # create a new hunk
                         hunk = [ astart, a2, bstart, b2, delta ]
                     delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
                     delta[len(delta):] = [ '-' + x for x in old ]
                     delta[len(delta):] = [ '+' + x for x in new ]
                 if hunk:
                     for x in yieldhunk(hunk, header):
                         yield x
             def patchtext(bin):
                 pos = 0
                 t = []
                 while pos < len(bin):
                     p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
                     pos += 12
                     t.append(bin[pos:pos + l])
                     pos += l
                 return "".join(t)
             def patch(a, bin):
                 return mpatch.patches(a, [bin])
             patches = mpatch.patches
             patchedsize = mpatch.patchedsize
             textdiff = bdiff.bdiff