##// END OF EJS Templates
bdiff: implement cffi version of bdiff
Maciej Fijalkowski -
r29834:1ea77b75 default
parent child Browse files
Show More
@@ -1,136 +1,169 b''
1 1 # bdiff.py - Python implementation of bdiff.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import difflib
12 12 import re
13 13 import struct
14 14
15 15 from . import policy
16 16 policynocffi = policy.policynocffi
17 17 modulepolicy = policy.policy
18 18
19 19 def splitnewlines(text):
20 20 '''like str.splitlines, but only split on newlines.'''
21 21 lines = [l + '\n' for l in text.split('\n')]
22 22 if lines:
23 23 if lines[-1] == '\n':
24 24 lines.pop()
25 25 else:
26 26 lines[-1] = lines[-1][:-1]
27 27 return lines
28 28
29 29 def _normalizeblocks(a, b, blocks):
30 30 prev = None
31 31 r = []
32 32 for curr in blocks:
33 33 if prev is None:
34 34 prev = curr
35 35 continue
36 36 shift = 0
37 37
38 38 a1, b1, l1 = prev
39 39 a1end = a1 + l1
40 40 b1end = b1 + l1
41 41
42 42 a2, b2, l2 = curr
43 43 a2end = a2 + l2
44 44 b2end = b2 + l2
45 45 if a1end == a2:
46 46 while (a1end + shift < a2end and
47 47 a[a1end + shift] == b[b1end + shift]):
48 48 shift += 1
49 49 elif b1end == b2:
50 50 while (b1end + shift < b2end and
51 51 a[a1end + shift] == b[b1end + shift]):
52 52 shift += 1
53 53 r.append((a1, b1, l1 + shift))
54 54 prev = a2 + shift, b2 + shift, l2 - shift
55 55 r.append(prev)
56 56 return r
57 57
58 58 def _tostring(c):
59 59 if type(c) is array.array:
60 60 # this copy overhead isn't ideal
61 61 return c.tostring()
62 62 return str(c)
63 63
64 64 def bdiff(a, b):
65 65 a = _tostring(a).splitlines(True)
66 66 b = _tostring(b).splitlines(True)
67 67
68 68 if not a:
69 69 s = "".join(b)
70 70 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
71 71
72 72 bin = []
73 73 p = [0]
74 74 for i in a: p.append(p[-1] + len(i))
75 75
76 76 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
77 77 d = _normalizeblocks(a, b, d)
78 78 la = 0
79 79 lb = 0
80 80 for am, bm, size in d:
81 81 s = "".join(b[lb:bm])
82 82 if am > la or s:
83 83 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
84 84 la = am + size
85 85 lb = bm + size
86 86
87 87 return "".join(bin)
88 88
89 89 def blocks(a, b):
90 90 an = splitnewlines(a)
91 91 bn = splitnewlines(b)
92 92 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
93 93 d = _normalizeblocks(an, bn, d)
94 94 return [(i, i + n, j, j + n) for (i, j, n) in d]
95 95
96 96 def fixws(text, allws):
97 97 if allws:
98 98 text = re.sub('[ \t\r]+', '', text)
99 99 else:
100 100 text = re.sub('[ \t\r]+', ' ', text)
101 101 text = text.replace(' \n', '\n')
102 102 return text
103 103
104 104 if modulepolicy not in policynocffi:
105 105 try:
106 106 from _bdiff_cffi import ffi, lib
107 107 except ImportError:
108 108 if modulepolicy == 'cffi': # strict cffi import
109 109 raise
110 110 else:
111 111 def blocks(sa, sb):
112 112 a = ffi.new("struct bdiff_line**")
113 113 b = ffi.new("struct bdiff_line**")
114 114 ac = ffi.new("char[]", sa)
115 115 bc = ffi.new("char[]", sb)
116 l = ffi.new("struct bdiff_hunk*")
116 117 try:
117 118 an = lib.bdiff_splitlines(ac, len(sa), a)
118 119 bn = lib.bdiff_splitlines(bc, len(sb), b)
119 120 if not a[0] or not b[0]:
120 121 raise MemoryError
121 l = ffi.new("struct bdiff_hunk*")
122 122 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
123 123 if count < 0:
124 124 raise MemoryError
125 125 rl = [None] * count
126 126 h = l.next
127 127 i = 0
128 128 while h:
129 129 rl[i] = (h.a1, h.a2, h.b1, h.b2)
130 130 h = h.next
131 131 i += 1
132 132 finally:
133 133 lib.free(a[0])
134 134 lib.free(b[0])
135 135 lib.bdiff_freehunks(l.next)
136 136 return rl
137
138 def bdiff(sa, sb):
139 a = ffi.new("struct bdiff_line**")
140 b = ffi.new("struct bdiff_line**")
141 ac = ffi.new("char[]", sa)
142 bc = ffi.new("char[]", sb)
143 l = ffi.new("struct bdiff_hunk*")
144 try:
145 an = lib.bdiff_splitlines(ac, len(sa), a)
146 bn = lib.bdiff_splitlines(bc, len(sb), b)
147 if not a[0] or not b[0]:
148 raise MemoryError
149 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
150 if count < 0:
151 raise MemoryError
152 rl = []
153 h = l.next
154 la = lb = 0
155 while h:
156 if h.a1 != la or h.b1 != lb:
157 lgt = (b[0] + h.b1).l - (b[0] + lb).l
158 rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
159 (a[0] + h.a1).l - a[0].l, lgt))
160 rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
161 la = h.a2
162 lb = h.b2
163 h = h.next
164
165 finally:
166 lib.free(a[0])
167 lib.free(b[0])
168 lib.bdiff_freehunks(l.next)
169 return "".join(rl)
General Comments 0
You need to be logged in to leave comments. Login now