##// END OF EJS Templates
bdiff: implement cffi version of bdiff
Maciej Fijalkowski -
r29834:1ea77b75 default
parent child Browse files
Show More
@@ -1,136 +1,169 b''
1 # bdiff.py - Python implementation of bdiff.c
1 # bdiff.py - Python implementation of bdiff.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
10 import array
11 import difflib
11 import difflib
12 import re
12 import re
13 import struct
13 import struct
14
14
15 from . import policy
15 from . import policy
16 policynocffi = policy.policynocffi
16 policynocffi = policy.policynocffi
17 modulepolicy = policy.policy
17 modulepolicy = policy.policy
18
18
19 def splitnewlines(text):
19 def splitnewlines(text):
20 '''like str.splitlines, but only split on newlines.'''
20 '''like str.splitlines, but only split on newlines.'''
21 lines = [l + '\n' for l in text.split('\n')]
21 lines = [l + '\n' for l in text.split('\n')]
22 if lines:
22 if lines:
23 if lines[-1] == '\n':
23 if lines[-1] == '\n':
24 lines.pop()
24 lines.pop()
25 else:
25 else:
26 lines[-1] = lines[-1][:-1]
26 lines[-1] = lines[-1][:-1]
27 return lines
27 return lines
28
28
29 def _normalizeblocks(a, b, blocks):
29 def _normalizeblocks(a, b, blocks):
30 prev = None
30 prev = None
31 r = []
31 r = []
32 for curr in blocks:
32 for curr in blocks:
33 if prev is None:
33 if prev is None:
34 prev = curr
34 prev = curr
35 continue
35 continue
36 shift = 0
36 shift = 0
37
37
38 a1, b1, l1 = prev
38 a1, b1, l1 = prev
39 a1end = a1 + l1
39 a1end = a1 + l1
40 b1end = b1 + l1
40 b1end = b1 + l1
41
41
42 a2, b2, l2 = curr
42 a2, b2, l2 = curr
43 a2end = a2 + l2
43 a2end = a2 + l2
44 b2end = b2 + l2
44 b2end = b2 + l2
45 if a1end == a2:
45 if a1end == a2:
46 while (a1end + shift < a2end and
46 while (a1end + shift < a2end and
47 a[a1end + shift] == b[b1end + shift]):
47 a[a1end + shift] == b[b1end + shift]):
48 shift += 1
48 shift += 1
49 elif b1end == b2:
49 elif b1end == b2:
50 while (b1end + shift < b2end and
50 while (b1end + shift < b2end and
51 a[a1end + shift] == b[b1end + shift]):
51 a[a1end + shift] == b[b1end + shift]):
52 shift += 1
52 shift += 1
53 r.append((a1, b1, l1 + shift))
53 r.append((a1, b1, l1 + shift))
54 prev = a2 + shift, b2 + shift, l2 - shift
54 prev = a2 + shift, b2 + shift, l2 - shift
55 r.append(prev)
55 r.append(prev)
56 return r
56 return r
57
57
58 def _tostring(c):
58 def _tostring(c):
59 if type(c) is array.array:
59 if type(c) is array.array:
60 # this copy overhead isn't ideal
60 # this copy overhead isn't ideal
61 return c.tostring()
61 return c.tostring()
62 return str(c)
62 return str(c)
63
63
64 def bdiff(a, b):
64 def bdiff(a, b):
65 a = _tostring(a).splitlines(True)
65 a = _tostring(a).splitlines(True)
66 b = _tostring(b).splitlines(True)
66 b = _tostring(b).splitlines(True)
67
67
68 if not a:
68 if not a:
69 s = "".join(b)
69 s = "".join(b)
70 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
70 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
71
71
72 bin = []
72 bin = []
73 p = [0]
73 p = [0]
74 for i in a: p.append(p[-1] + len(i))
74 for i in a: p.append(p[-1] + len(i))
75
75
76 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
76 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
77 d = _normalizeblocks(a, b, d)
77 d = _normalizeblocks(a, b, d)
78 la = 0
78 la = 0
79 lb = 0
79 lb = 0
80 for am, bm, size in d:
80 for am, bm, size in d:
81 s = "".join(b[lb:bm])
81 s = "".join(b[lb:bm])
82 if am > la or s:
82 if am > la or s:
83 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
83 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
84 la = am + size
84 la = am + size
85 lb = bm + size
85 lb = bm + size
86
86
87 return "".join(bin)
87 return "".join(bin)
88
88
89 def blocks(a, b):
89 def blocks(a, b):
90 an = splitnewlines(a)
90 an = splitnewlines(a)
91 bn = splitnewlines(b)
91 bn = splitnewlines(b)
92 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
92 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
93 d = _normalizeblocks(an, bn, d)
93 d = _normalizeblocks(an, bn, d)
94 return [(i, i + n, j, j + n) for (i, j, n) in d]
94 return [(i, i + n, j, j + n) for (i, j, n) in d]
95
95
96 def fixws(text, allws):
96 def fixws(text, allws):
97 if allws:
97 if allws:
98 text = re.sub('[ \t\r]+', '', text)
98 text = re.sub('[ \t\r]+', '', text)
99 else:
99 else:
100 text = re.sub('[ \t\r]+', ' ', text)
100 text = re.sub('[ \t\r]+', ' ', text)
101 text = text.replace(' \n', '\n')
101 text = text.replace(' \n', '\n')
102 return text
102 return text
103
103
104 if modulepolicy not in policynocffi:
104 if modulepolicy not in policynocffi:
105 try:
105 try:
106 from _bdiff_cffi import ffi, lib
106 from _bdiff_cffi import ffi, lib
107 except ImportError:
107 except ImportError:
108 if modulepolicy == 'cffi': # strict cffi import
108 if modulepolicy == 'cffi': # strict cffi import
109 raise
109 raise
110 else:
110 else:
111 def blocks(sa, sb):
111 def blocks(sa, sb):
112 a = ffi.new("struct bdiff_line**")
112 a = ffi.new("struct bdiff_line**")
113 b = ffi.new("struct bdiff_line**")
113 b = ffi.new("struct bdiff_line**")
114 ac = ffi.new("char[]", sa)
114 ac = ffi.new("char[]", sa)
115 bc = ffi.new("char[]", sb)
115 bc = ffi.new("char[]", sb)
116 l = ffi.new("struct bdiff_hunk*")
116 try:
117 try:
117 an = lib.bdiff_splitlines(ac, len(sa), a)
118 an = lib.bdiff_splitlines(ac, len(sa), a)
118 bn = lib.bdiff_splitlines(bc, len(sb), b)
119 bn = lib.bdiff_splitlines(bc, len(sb), b)
119 if not a[0] or not b[0]:
120 if not a[0] or not b[0]:
120 raise MemoryError
121 raise MemoryError
121 l = ffi.new("struct bdiff_hunk*")
122 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
122 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
123 if count < 0:
123 if count < 0:
124 raise MemoryError
124 raise MemoryError
125 rl = [None] * count
125 rl = [None] * count
126 h = l.next
126 h = l.next
127 i = 0
127 i = 0
128 while h:
128 while h:
129 rl[i] = (h.a1, h.a2, h.b1, h.b2)
129 rl[i] = (h.a1, h.a2, h.b1, h.b2)
130 h = h.next
130 h = h.next
131 i += 1
131 i += 1
132 finally:
132 finally:
133 lib.free(a[0])
133 lib.free(a[0])
134 lib.free(b[0])
134 lib.free(b[0])
135 lib.bdiff_freehunks(l.next)
135 lib.bdiff_freehunks(l.next)
136 return rl
136 return rl
137
138 def bdiff(sa, sb):
139 a = ffi.new("struct bdiff_line**")
140 b = ffi.new("struct bdiff_line**")
141 ac = ffi.new("char[]", sa)
142 bc = ffi.new("char[]", sb)
143 l = ffi.new("struct bdiff_hunk*")
144 try:
145 an = lib.bdiff_splitlines(ac, len(sa), a)
146 bn = lib.bdiff_splitlines(bc, len(sb), b)
147 if not a[0] or not b[0]:
148 raise MemoryError
149 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
150 if count < 0:
151 raise MemoryError
152 rl = []
153 h = l.next
154 la = lb = 0
155 while h:
156 if h.a1 != la or h.b1 != lb:
157 lgt = (b[0] + h.b1).l - (b[0] + lb).l
158 rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
159 (a[0] + h.a1).l - a[0].l, lgt))
160 rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
161 la = h.a2
162 lb = h.b2
163 h = h.next
164
165 finally:
166 lib.free(a[0])
167 lib.free(b[0])
168 lib.bdiff_freehunks(l.next)
169 return "".join(rl)
General Comments 0
You need to be logged in to leave comments. Login now