##// END OF EJS Templates
py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara -
r31641:f2b334e6 default
parent child Browse files
Show More
@@ -1,165 +1,162 b''
1 # bdiff.py - Python implementation of bdiff.c
1 # bdiff.py - Python implementation of bdiff.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import difflib
10 import difflib
11 import re
11 import re
12 import struct
12 import struct
13
13
14 from . import policy
14 from . import policy
15 policynocffi = policy.policynocffi
15 policynocffi = policy.policynocffi
16 modulepolicy = policy.policy
16 modulepolicy = policy.policy
17
17
18 def splitnewlines(text):
18 def splitnewlines(text):
19 '''like str.splitlines, but only split on newlines.'''
19 '''like str.splitlines, but only split on newlines.'''
20 lines = [l + '\n' for l in text.split('\n')]
20 lines = [l + '\n' for l in text.split('\n')]
21 if lines:
21 if lines:
22 if lines[-1] == '\n':
22 if lines[-1] == '\n':
23 lines.pop()
23 lines.pop()
24 else:
24 else:
25 lines[-1] = lines[-1][:-1]
25 lines[-1] = lines[-1][:-1]
26 return lines
26 return lines
27
27
28 def _normalizeblocks(a, b, blocks):
28 def _normalizeblocks(a, b, blocks):
29 prev = None
29 prev = None
30 r = []
30 r = []
31 for curr in blocks:
31 for curr in blocks:
32 if prev is None:
32 if prev is None:
33 prev = curr
33 prev = curr
34 continue
34 continue
35 shift = 0
35 shift = 0
36
36
37 a1, b1, l1 = prev
37 a1, b1, l1 = prev
38 a1end = a1 + l1
38 a1end = a1 + l1
39 b1end = b1 + l1
39 b1end = b1 + l1
40
40
41 a2, b2, l2 = curr
41 a2, b2, l2 = curr
42 a2end = a2 + l2
42 a2end = a2 + l2
43 b2end = b2 + l2
43 b2end = b2 + l2
44 if a1end == a2:
44 if a1end == a2:
45 while (a1end + shift < a2end and
45 while (a1end + shift < a2end and
46 a[a1end + shift] == b[b1end + shift]):
46 a[a1end + shift] == b[b1end + shift]):
47 shift += 1
47 shift += 1
48 elif b1end == b2:
48 elif b1end == b2:
49 while (b1end + shift < b2end and
49 while (b1end + shift < b2end and
50 a[a1end + shift] == b[b1end + shift]):
50 a[a1end + shift] == b[b1end + shift]):
51 shift += 1
51 shift += 1
52 r.append((a1, b1, l1 + shift))
52 r.append((a1, b1, l1 + shift))
53 prev = a2 + shift, b2 + shift, l2 - shift
53 prev = a2 + shift, b2 + shift, l2 - shift
54 r.append(prev)
54 r.append(prev)
55 return r
55 return r
56
56
57 def _tostring(c):
58 return str(c)
59
60 def bdiff(a, b):
57 def bdiff(a, b):
61 a = _tostring(a).splitlines(True)
58 a = bytes(a).splitlines(True)
62 b = _tostring(b).splitlines(True)
59 b = bytes(b).splitlines(True)
63
60
64 if not a:
61 if not a:
65 s = "".join(b)
62 s = "".join(b)
66 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
63 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
67
64
68 bin = []
65 bin = []
69 p = [0]
66 p = [0]
70 for i in a: p.append(p[-1] + len(i))
67 for i in a: p.append(p[-1] + len(i))
71
68
72 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
69 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
73 d = _normalizeblocks(a, b, d)
70 d = _normalizeblocks(a, b, d)
74 la = 0
71 la = 0
75 lb = 0
72 lb = 0
76 for am, bm, size in d:
73 for am, bm, size in d:
77 s = "".join(b[lb:bm])
74 s = "".join(b[lb:bm])
78 if am > la or s:
75 if am > la or s:
79 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
76 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
80 la = am + size
77 la = am + size
81 lb = bm + size
78 lb = bm + size
82
79
83 return "".join(bin)
80 return "".join(bin)
84
81
85 def blocks(a, b):
82 def blocks(a, b):
86 an = splitnewlines(a)
83 an = splitnewlines(a)
87 bn = splitnewlines(b)
84 bn = splitnewlines(b)
88 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
85 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
89 d = _normalizeblocks(an, bn, d)
86 d = _normalizeblocks(an, bn, d)
90 return [(i, i + n, j, j + n) for (i, j, n) in d]
87 return [(i, i + n, j, j + n) for (i, j, n) in d]
91
88
92 def fixws(text, allws):
89 def fixws(text, allws):
93 if allws:
90 if allws:
94 text = re.sub('[ \t\r]+', '', text)
91 text = re.sub('[ \t\r]+', '', text)
95 else:
92 else:
96 text = re.sub('[ \t\r]+', ' ', text)
93 text = re.sub('[ \t\r]+', ' ', text)
97 text = text.replace(' \n', '\n')
94 text = text.replace(' \n', '\n')
98 return text
95 return text
99
96
100 if modulepolicy not in policynocffi:
97 if modulepolicy not in policynocffi:
101 try:
98 try:
102 from _bdiff_cffi import ffi, lib
99 from _bdiff_cffi import ffi, lib
103 except ImportError:
100 except ImportError:
104 if modulepolicy == 'cffi': # strict cffi import
101 if modulepolicy == 'cffi': # strict cffi import
105 raise
102 raise
106 else:
103 else:
107 def blocks(sa, sb):
104 def blocks(sa, sb):
108 a = ffi.new("struct bdiff_line**")
105 a = ffi.new("struct bdiff_line**")
109 b = ffi.new("struct bdiff_line**")
106 b = ffi.new("struct bdiff_line**")
110 ac = ffi.new("char[]", str(sa))
107 ac = ffi.new("char[]", str(sa))
111 bc = ffi.new("char[]", str(sb))
108 bc = ffi.new("char[]", str(sb))
112 l = ffi.new("struct bdiff_hunk*")
109 l = ffi.new("struct bdiff_hunk*")
113 try:
110 try:
114 an = lib.bdiff_splitlines(ac, len(sa), a)
111 an = lib.bdiff_splitlines(ac, len(sa), a)
115 bn = lib.bdiff_splitlines(bc, len(sb), b)
112 bn = lib.bdiff_splitlines(bc, len(sb), b)
116 if not a[0] or not b[0]:
113 if not a[0] or not b[0]:
117 raise MemoryError
114 raise MemoryError
118 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
115 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
119 if count < 0:
116 if count < 0:
120 raise MemoryError
117 raise MemoryError
121 rl = [None] * count
118 rl = [None] * count
122 h = l.next
119 h = l.next
123 i = 0
120 i = 0
124 while h:
121 while h:
125 rl[i] = (h.a1, h.a2, h.b1, h.b2)
122 rl[i] = (h.a1, h.a2, h.b1, h.b2)
126 h = h.next
123 h = h.next
127 i += 1
124 i += 1
128 finally:
125 finally:
129 lib.free(a[0])
126 lib.free(a[0])
130 lib.free(b[0])
127 lib.free(b[0])
131 lib.bdiff_freehunks(l.next)
128 lib.bdiff_freehunks(l.next)
132 return rl
129 return rl
133
130
134 def bdiff(sa, sb):
131 def bdiff(sa, sb):
135 a = ffi.new("struct bdiff_line**")
132 a = ffi.new("struct bdiff_line**")
136 b = ffi.new("struct bdiff_line**")
133 b = ffi.new("struct bdiff_line**")
137 ac = ffi.new("char[]", str(sa))
134 ac = ffi.new("char[]", str(sa))
138 bc = ffi.new("char[]", str(sb))
135 bc = ffi.new("char[]", str(sb))
139 l = ffi.new("struct bdiff_hunk*")
136 l = ffi.new("struct bdiff_hunk*")
140 try:
137 try:
141 an = lib.bdiff_splitlines(ac, len(sa), a)
138 an = lib.bdiff_splitlines(ac, len(sa), a)
142 bn = lib.bdiff_splitlines(bc, len(sb), b)
139 bn = lib.bdiff_splitlines(bc, len(sb), b)
143 if not a[0] or not b[0]:
140 if not a[0] or not b[0]:
144 raise MemoryError
141 raise MemoryError
145 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
142 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
146 if count < 0:
143 if count < 0:
147 raise MemoryError
144 raise MemoryError
148 rl = []
145 rl = []
149 h = l.next
146 h = l.next
150 la = lb = 0
147 la = lb = 0
151 while h:
148 while h:
152 if h.a1 != la or h.b1 != lb:
149 if h.a1 != la or h.b1 != lb:
153 lgt = (b[0] + h.b1).l - (b[0] + lb).l
150 lgt = (b[0] + h.b1).l - (b[0] + lb).l
154 rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
151 rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
155 (a[0] + h.a1).l - a[0].l, lgt))
152 (a[0] + h.a1).l - a[0].l, lgt))
156 rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
153 rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
157 la = h.a2
154 la = h.a2
158 lb = h.b2
155 lb = h.b2
159 h = h.next
156 h = h.next
160
157
161 finally:
158 finally:
162 lib.free(a[0])
159 lib.free(a[0])
163 lib.free(b[0])
160 lib.free(b[0])
164 lib.bdiff_freehunks(l.next)
161 lib.bdiff_freehunks(l.next)
165 return "".join(rl)
162 return "".join(rl)
General Comments 0
You need to be logged in to leave comments. Login now