upstream/mercurial-mirror Commit - r31641:f2b334e6

1

# bdiff.py - Python implementation of bdiff.c

1

# bdiff.py - Python implementation of bdiff.c

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import difflib

10

import difflib

11

import re

11

import re

12

import struct

12

import struct

13

14

from . import policy

14

from . import policy

15

policynocffi = policy.policynocffi

15

policynocffi = policy.policynocffi

16

modulepolicy = policy.policy

16

modulepolicy = policy.policy

17

18

def splitnewlines(text):

18

def splitnewlines(text):

19

'''like str.splitlines, but only split on newlines.'''

19

'''like str.splitlines, but only split on newlines.'''

20

lines = [l + '\n' for l in text.split('\n')]

20

lines = [l + '\n' for l in text.split('\n')]

21

if lines:

21

if lines:

22

if lines[-1] == '\n':

22

if lines[-1] == '\n':

23

lines.pop()

23

lines.pop()

24

else:

24

else:

25

lines[-1] = lines[-1][:-1]

25

lines[-1] = lines[-1][:-1]

26

return lines

26

return lines

27

28

def _normalizeblocks(a, b, blocks):

28

def _normalizeblocks(a, b, blocks):

29

prev = None

29

prev = None

30

r = []

30

r = []

31

for curr in blocks:

31

for curr in blocks:

32

if prev is None:

32

if prev is None:

33

prev = curr

33

prev = curr

34

continue

34

continue

35

shift = 0

35

shift = 0

36

37

a1, b1, l1 = prev

37

a1, b1, l1 = prev

38

a1end = a1 + l1

38

a1end = a1 + l1

39

b1end = b1 + l1

39

b1end = b1 + l1

40

41

a2, b2, l2 = curr

41

a2, b2, l2 = curr

42

a2end = a2 + l2

42

a2end = a2 + l2

43

b2end = b2 + l2

43

b2end = b2 + l2

44

if a1end == a2:

44

if a1end == a2:

45

while (a1end + shift < a2end and

45

while (a1end + shift < a2end and

46

a[a1end + shift] == b[b1end + shift]):

46

a[a1end + shift] == b[b1end + shift]):

47

shift += 1

47

shift += 1

48

elif b1end == b2:

48

elif b1end == b2:

49

while (b1end + shift < b2end and

49

while (b1end + shift < b2end and

50

a[a1end + shift] == b[b1end + shift]):

50

a[a1end + shift] == b[b1end + shift]):

51

shift += 1

51

shift += 1

52

r.append((a1, b1, l1 + shift))

52

r.append((a1, b1, l1 + shift))

53

prev = a2 + shift, b2 + shift, l2 - shift

53

prev = a2 + shift, b2 + shift, l2 - shift

54

r.append(prev)

54

r.append(prev)

55

return r

55

return r

56

57

def _tostring(c):

58

return str(c)

59

60

def bdiff(a, b):

57

def bdiff(a, b):

61

a = ~~_tostring~~(a).splitlines(True)

58

a = bytes(a).splitlines(True)

62

b = ~~_tostring~~(b).splitlines(True)

59

b = bytes(b).splitlines(True)

63

60

64

if not a:

61

if not a:

65

s = "".join(b)

62

s = "".join(b)

66

return s and (struct.pack(">lll", 0, 0, len(s)) + s)

63

return s and (struct.pack(">lll", 0, 0, len(s)) + s)

67

64

68

bin = []

65

bin = []

69

p = [0]

66

p = [0]

70

for i in a: p.append(p[-1] + len(i))

67

for i in a: p.append(p[-1] + len(i))

71

68

72

d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()

69

d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()

73

d = _normalizeblocks(a, b, d)

70

d = _normalizeblocks(a, b, d)

74

la = 0

71

la = 0

75

lb = 0

72

lb = 0

76

for am, bm, size in d:

73

for am, bm, size in d:

77

s = "".join(b[lb:bm])

74

s = "".join(b[lb:bm])

78

if am > la or s:

75

if am > la or s:

79

bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)

76

bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)

80

la = am + size

77

la = am + size

81

lb = bm + size

78

lb = bm + size

82

79

83

return "".join(bin)

80

return "".join(bin)

84

81

85

def blocks(a, b):

82

def blocks(a, b):

86

an = splitnewlines(a)

83

an = splitnewlines(a)

87

bn = splitnewlines(b)

84

bn = splitnewlines(b)

88

d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()

85

d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()

89

d = _normalizeblocks(an, bn, d)

86

d = _normalizeblocks(an, bn, d)

90

return [(i, i + n, j, j + n) for (i, j, n) in d]

87

return [(i, i + n, j, j + n) for (i, j, n) in d]

91

88

92

def fixws(text, allws):

89

def fixws(text, allws):

93

if allws:

90

if allws:

94

text = re.sub('[ \t\r]+', '', text)

91

text = re.sub('[ \t\r]+', '', text)

95

else:

92

else:

96

text = re.sub('[ \t\r]+', ' ', text)

93

text = re.sub('[ \t\r]+', ' ', text)

97

text = text.replace(' \n', '\n')

94

text = text.replace(' \n', '\n')

98

return text

95

return text

99

96

100

if modulepolicy not in policynocffi:

97

if modulepolicy not in policynocffi:

101

try:

98

try:

102

from _bdiff_cffi import ffi, lib

99

from _bdiff_cffi import ffi, lib

103

except ImportError:

100

except ImportError:

104

if modulepolicy == 'cffi': # strict cffi import

101

if modulepolicy == 'cffi': # strict cffi import

105

raise

102

raise

106

else:

103

else:

107

def blocks(sa, sb):

104

def blocks(sa, sb):

108

a = ffi.new("struct bdiff_line**")

105

a = ffi.new("struct bdiff_line**")

109

b = ffi.new("struct bdiff_line**")

106

b = ffi.new("struct bdiff_line**")

110

ac = ffi.new("char[]", str(sa))

107

ac = ffi.new("char[]", str(sa))

111

bc = ffi.new("char[]", str(sb))

108

bc = ffi.new("char[]", str(sb))

112

l = ffi.new("struct bdiff_hunk*")

109

l = ffi.new("struct bdiff_hunk*")

113

try:

110

try:

114

an = lib.bdiff_splitlines(ac, len(sa), a)

111

an = lib.bdiff_splitlines(ac, len(sa), a)

115

bn = lib.bdiff_splitlines(bc, len(sb), b)

112

bn = lib.bdiff_splitlines(bc, len(sb), b)

116

if not a[0] or not b[0]:

113

if not a[0] or not b[0]:

117

raise MemoryError

114

raise MemoryError

118

count = lib.bdiff_diff(a[0], an, b[0], bn, l)

115

count = lib.bdiff_diff(a[0], an, b[0], bn, l)

119

if count < 0:

116

if count < 0:

120

raise MemoryError

117

raise MemoryError

121

rl = [None] * count

118

rl = [None] * count

122

h = l.next

119

h = l.next

123

i = 0

120

i = 0

124

while h:

121

while h:

125

rl[i] = (h.a1, h.a2, h.b1, h.b2)

122

rl[i] = (h.a1, h.a2, h.b1, h.b2)

126

h = h.next

123

h = h.next

127

i += 1

124

i += 1

128

finally:

125

finally:

129

lib.free(a[0])

126

lib.free(a[0])

130

lib.free(b[0])

127

lib.free(b[0])

131

lib.bdiff_freehunks(l.next)

128

lib.bdiff_freehunks(l.next)

132

return rl

129

return rl

133

130

134

def bdiff(sa, sb):

131

def bdiff(sa, sb):

135

a = ffi.new("struct bdiff_line**")

132

a = ffi.new("struct bdiff_line**")

136

b = ffi.new("struct bdiff_line**")

133

b = ffi.new("struct bdiff_line**")

137

ac = ffi.new("char[]", str(sa))

134

ac = ffi.new("char[]", str(sa))

138

bc = ffi.new("char[]", str(sb))

135

bc = ffi.new("char[]", str(sb))

139

l = ffi.new("struct bdiff_hunk*")

136

l = ffi.new("struct bdiff_hunk*")

140

try:

137

try:

141

an = lib.bdiff_splitlines(ac, len(sa), a)

138

an = lib.bdiff_splitlines(ac, len(sa), a)

142

bn = lib.bdiff_splitlines(bc, len(sb), b)

139

bn = lib.bdiff_splitlines(bc, len(sb), b)

143

if not a[0] or not b[0]:

140

if not a[0] or not b[0]:

144

raise MemoryError

141

raise MemoryError

145

count = lib.bdiff_diff(a[0], an, b[0], bn, l)

142

count = lib.bdiff_diff(a[0], an, b[0], bn, l)

146

if count < 0:

143

if count < 0:

147

raise MemoryError

144

raise MemoryError

148

rl = []

145

rl = []

149

h = l.next

146

h = l.next

150

la = lb = 0

147

la = lb = 0

151

while h:

148

while h:

152

if h.a1 != la or h.b1 != lb:

149

if h.a1 != la or h.b1 != lb:

153

lgt = (b[0] + h.b1).l - (b[0] + lb).l

150

lgt = (b[0] + h.b1).l - (b[0] + lb).l

154

rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,

151

rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,

155

(a[0] + h.a1).l - a[0].l, lgt))

152

(a[0] + h.a1).l - a[0].l, lgt))

156

rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))

153

rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))

157

la = h.a2

154

la = h.a2

158

lb = h.b2

155

lb = h.b2

159

h = h.next

156

h = h.next

160

157

161

finally:

158

finally:

162

lib.free(a[0])

159

lib.free(a[0])

163

lib.free(b[0])

160

lib.free(b[0])

164

lib.bdiff_freehunks(l.next)

161

lib.bdiff_freehunks(l.next)

165

return "".join(rl)

162

return "".join(rl)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # bdiff.py - Python implementation of bdiff.c
             #
             # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import difflib
             import re
             import struct
             from . import policy
             policynocffi = policy.policynocffi
             modulepolicy = policy.policy
             def splitnewlines(text):
                 '''like str.splitlines, but only split on newlines.'''
                 lines = [l + '\n' for l in text.split('\n')]
                 if lines:
                     if lines[-1] == '\n':
                         lines.pop()
                     else:
                         lines[-1] = lines[-1][:-1]
                 return lines
             def _normalizeblocks(a, b, blocks):
                 prev = None
                 r = []
                 for curr in blocks:
                     if prev is None:
                         prev = curr
                         continue
                     shift = 0
                     a1, b1, l1 = prev
                     a1end = a1 + l1
                     b1end = b1 + l1
                     a2, b2, l2 = curr
                     a2end = a2 + l2
                     b2end = b2 + l2
                     if a1end == a2:
                         while (a1end + shift < a2end and
                                a[a1end + shift] == b[b1end + shift]):
                             shift += 1
                     elif b1end == b2:
                         while (b1end + shift < b2end and
                                a[a1end + shift] == b[b1end + shift]):
                             shift += 1
                     r.append((a1, b1, l1 + shift))
                     prev = a2 + shift, b2 + shift, l2 - shift
                 r.append(prev)
                 return r
-            def _tostring(c):
-                return str(c)
             def bdiff(a, b):
-                a = _tostring(a).splitlines(True)
+                a = bytes(a).splitlines(True)
-                b = _tostring(b).splitlines(True)
+                b = bytes(b).splitlines(True)
                 if not a:
                     s = "".join(b)
                     return s and (struct.pack(">lll", 0, 0, len(s)) + s)
                 bin = []
                 p = [0]
                 for i in a: p.append(p[-1] + len(i))
                 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
                 d = _normalizeblocks(a, b, d)
                 la = 0
                 lb = 0
                 for am, bm, size in d:
                     s = "".join(b[lb:bm])
                     if am > la or s:
                         bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
                     la = am + size
                     lb = bm + size
                 return "".join(bin)
             def blocks(a, b):
                 an = splitnewlines(a)
                 bn = splitnewlines(b)
                 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
                 d = _normalizeblocks(an, bn, d)
                 return [(i, i + n, j, j + n) for (i, j, n) in d]
             def fixws(text, allws):
                 if allws:
                     text = re.sub('[ \t\r]+', '', text)
                 else:
                     text = re.sub('[ \t\r]+', ' ', text)
                     text = text.replace(' \n', '\n')
                 return text
             if modulepolicy not in policynocffi:
                 try:
                     from _bdiff_cffi import ffi, lib
                 except ImportError:
                     if modulepolicy == 'cffi': # strict cffi import
                         raise
                 else:
                     def blocks(sa, sb):
                         a = ffi.new("struct bdiff_line**")
                         b = ffi.new("struct bdiff_line**")
                         ac = ffi.new("char[]", str(sa))
                         bc = ffi.new("char[]", str(sb))
                         l = ffi.new("struct bdiff_hunk*")
                         try:
                             an = lib.bdiff_splitlines(ac, len(sa), a)
                             bn = lib.bdiff_splitlines(bc, len(sb), b)
                             if not a[0] or not b[0]:
                                 raise MemoryError
                             count = lib.bdiff_diff(a[0], an, b[0], bn, l)
                             if count < 0:
                                 raise MemoryError
                             rl = [None] * count
                             h = l.next
                             i = 0
                             while h:
                                 rl[i] = (h.a1, h.a2, h.b1, h.b2)
                                 h = h.next
                                 i += 1
                         finally:
                             lib.free(a[0])
                             lib.free(b[0])
                             lib.bdiff_freehunks(l.next)
                         return rl
                     def bdiff(sa, sb):
                         a = ffi.new("struct bdiff_line**")
                         b = ffi.new("struct bdiff_line**")
                         ac = ffi.new("char[]", str(sa))
                         bc = ffi.new("char[]", str(sb))
                         l = ffi.new("struct bdiff_hunk*")
                         try:
                             an = lib.bdiff_splitlines(ac, len(sa), a)
                             bn = lib.bdiff_splitlines(bc, len(sb), b)
                             if not a[0] or not b[0]:
                                 raise MemoryError
                             count = lib.bdiff_diff(a[0], an, b[0], bn, l)
                             if count < 0:
                                 raise MemoryError
                             rl = []
                             h = l.next
                             la = lb = 0
                             while h:
                                 if h.a1 != la or h.b1 != lb:
                                     lgt = (b[0] + h.b1).l - (b[0] + lb).l
                                     rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
                                         (a[0] + h.a1).l - a[0].l, lgt))
                                     rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
                                 la = h.a2
                                 lb = h.b2
                                 h = h.next
                         finally:
                             lib.free(a[0])
                             lib.free(b[0])
                             lib.bdiff_freehunks(l.next)
                         return "".join(rl)