Show More
@@ -1,165 +1,162 b'' | |||||
1 | # bdiff.py - Python implementation of bdiff.c |
|
1 | # bdiff.py - Python implementation of bdiff.c | |
2 | # |
|
2 | # | |
3 | # Copyright 2009 Matt Mackall <mpm@selenic.com> and others |
|
3 | # Copyright 2009 Matt Mackall <mpm@selenic.com> and others | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | from __future__ import absolute_import |
|
8 | from __future__ import absolute_import | |
9 |
|
9 | |||
10 | import difflib |
|
10 | import difflib | |
11 | import re |
|
11 | import re | |
12 | import struct |
|
12 | import struct | |
13 |
|
13 | |||
14 | from . import policy |
|
14 | from . import policy | |
15 | policynocffi = policy.policynocffi |
|
15 | policynocffi = policy.policynocffi | |
16 | modulepolicy = policy.policy |
|
16 | modulepolicy = policy.policy | |
17 |
|
17 | |||
18 | def splitnewlines(text): |
|
18 | def splitnewlines(text): | |
19 | '''like str.splitlines, but only split on newlines.''' |
|
19 | '''like str.splitlines, but only split on newlines.''' | |
20 | lines = [l + '\n' for l in text.split('\n')] |
|
20 | lines = [l + '\n' for l in text.split('\n')] | |
21 | if lines: |
|
21 | if lines: | |
22 | if lines[-1] == '\n': |
|
22 | if lines[-1] == '\n': | |
23 | lines.pop() |
|
23 | lines.pop() | |
24 | else: |
|
24 | else: | |
25 | lines[-1] = lines[-1][:-1] |
|
25 | lines[-1] = lines[-1][:-1] | |
26 | return lines |
|
26 | return lines | |
27 |
|
27 | |||
28 | def _normalizeblocks(a, b, blocks): |
|
28 | def _normalizeblocks(a, b, blocks): | |
29 | prev = None |
|
29 | prev = None | |
30 | r = [] |
|
30 | r = [] | |
31 | for curr in blocks: |
|
31 | for curr in blocks: | |
32 | if prev is None: |
|
32 | if prev is None: | |
33 | prev = curr |
|
33 | prev = curr | |
34 | continue |
|
34 | continue | |
35 | shift = 0 |
|
35 | shift = 0 | |
36 |
|
36 | |||
37 | a1, b1, l1 = prev |
|
37 | a1, b1, l1 = prev | |
38 | a1end = a1 + l1 |
|
38 | a1end = a1 + l1 | |
39 | b1end = b1 + l1 |
|
39 | b1end = b1 + l1 | |
40 |
|
40 | |||
41 | a2, b2, l2 = curr |
|
41 | a2, b2, l2 = curr | |
42 | a2end = a2 + l2 |
|
42 | a2end = a2 + l2 | |
43 | b2end = b2 + l2 |
|
43 | b2end = b2 + l2 | |
44 | if a1end == a2: |
|
44 | if a1end == a2: | |
45 | while (a1end + shift < a2end and |
|
45 | while (a1end + shift < a2end and | |
46 | a[a1end + shift] == b[b1end + shift]): |
|
46 | a[a1end + shift] == b[b1end + shift]): | |
47 | shift += 1 |
|
47 | shift += 1 | |
48 | elif b1end == b2: |
|
48 | elif b1end == b2: | |
49 | while (b1end + shift < b2end and |
|
49 | while (b1end + shift < b2end and | |
50 | a[a1end + shift] == b[b1end + shift]): |
|
50 | a[a1end + shift] == b[b1end + shift]): | |
51 | shift += 1 |
|
51 | shift += 1 | |
52 | r.append((a1, b1, l1 + shift)) |
|
52 | r.append((a1, b1, l1 + shift)) | |
53 | prev = a2 + shift, b2 + shift, l2 - shift |
|
53 | prev = a2 + shift, b2 + shift, l2 - shift | |
54 | r.append(prev) |
|
54 | r.append(prev) | |
55 | return r |
|
55 | return r | |
56 |
|
56 | |||
57 | def _tostring(c): |
|
|||
58 | return str(c) |
|
|||
59 |
|
||||
60 | def bdiff(a, b): |
|
57 | def bdiff(a, b): | |
61 |
a = |
|
58 | a = bytes(a).splitlines(True) | |
62 |
b = |
|
59 | b = bytes(b).splitlines(True) | |
63 |
|
60 | |||
64 | if not a: |
|
61 | if not a: | |
65 | s = "".join(b) |
|
62 | s = "".join(b) | |
66 | return s and (struct.pack(">lll", 0, 0, len(s)) + s) |
|
63 | return s and (struct.pack(">lll", 0, 0, len(s)) + s) | |
67 |
|
64 | |||
68 | bin = [] |
|
65 | bin = [] | |
69 | p = [0] |
|
66 | p = [0] | |
70 | for i in a: p.append(p[-1] + len(i)) |
|
67 | for i in a: p.append(p[-1] + len(i)) | |
71 |
|
68 | |||
72 | d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
|
69 | d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() | |
73 | d = _normalizeblocks(a, b, d) |
|
70 | d = _normalizeblocks(a, b, d) | |
74 | la = 0 |
|
71 | la = 0 | |
75 | lb = 0 |
|
72 | lb = 0 | |
76 | for am, bm, size in d: |
|
73 | for am, bm, size in d: | |
77 | s = "".join(b[lb:bm]) |
|
74 | s = "".join(b[lb:bm]) | |
78 | if am > la or s: |
|
75 | if am > la or s: | |
79 | bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) |
|
76 | bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) | |
80 | la = am + size |
|
77 | la = am + size | |
81 | lb = bm + size |
|
78 | lb = bm + size | |
82 |
|
79 | |||
83 | return "".join(bin) |
|
80 | return "".join(bin) | |
84 |
|
81 | |||
85 | def blocks(a, b): |
|
82 | def blocks(a, b): | |
86 | an = splitnewlines(a) |
|
83 | an = splitnewlines(a) | |
87 | bn = splitnewlines(b) |
|
84 | bn = splitnewlines(b) | |
88 | d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
|
85 | d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() | |
89 | d = _normalizeblocks(an, bn, d) |
|
86 | d = _normalizeblocks(an, bn, d) | |
90 | return [(i, i + n, j, j + n) for (i, j, n) in d] |
|
87 | return [(i, i + n, j, j + n) for (i, j, n) in d] | |
91 |
|
88 | |||
92 | def fixws(text, allws): |
|
89 | def fixws(text, allws): | |
93 | if allws: |
|
90 | if allws: | |
94 | text = re.sub('[ \t\r]+', '', text) |
|
91 | text = re.sub('[ \t\r]+', '', text) | |
95 | else: |
|
92 | else: | |
96 | text = re.sub('[ \t\r]+', ' ', text) |
|
93 | text = re.sub('[ \t\r]+', ' ', text) | |
97 | text = text.replace(' \n', '\n') |
|
94 | text = text.replace(' \n', '\n') | |
98 | return text |
|
95 | return text | |
99 |
|
96 | |||
100 | if modulepolicy not in policynocffi: |
|
97 | if modulepolicy not in policynocffi: | |
101 | try: |
|
98 | try: | |
102 | from _bdiff_cffi import ffi, lib |
|
99 | from _bdiff_cffi import ffi, lib | |
103 | except ImportError: |
|
100 | except ImportError: | |
104 | if modulepolicy == 'cffi': # strict cffi import |
|
101 | if modulepolicy == 'cffi': # strict cffi import | |
105 | raise |
|
102 | raise | |
106 | else: |
|
103 | else: | |
107 | def blocks(sa, sb): |
|
104 | def blocks(sa, sb): | |
108 | a = ffi.new("struct bdiff_line**") |
|
105 | a = ffi.new("struct bdiff_line**") | |
109 | b = ffi.new("struct bdiff_line**") |
|
106 | b = ffi.new("struct bdiff_line**") | |
110 | ac = ffi.new("char[]", str(sa)) |
|
107 | ac = ffi.new("char[]", str(sa)) | |
111 | bc = ffi.new("char[]", str(sb)) |
|
108 | bc = ffi.new("char[]", str(sb)) | |
112 | l = ffi.new("struct bdiff_hunk*") |
|
109 | l = ffi.new("struct bdiff_hunk*") | |
113 | try: |
|
110 | try: | |
114 | an = lib.bdiff_splitlines(ac, len(sa), a) |
|
111 | an = lib.bdiff_splitlines(ac, len(sa), a) | |
115 | bn = lib.bdiff_splitlines(bc, len(sb), b) |
|
112 | bn = lib.bdiff_splitlines(bc, len(sb), b) | |
116 | if not a[0] or not b[0]: |
|
113 | if not a[0] or not b[0]: | |
117 | raise MemoryError |
|
114 | raise MemoryError | |
118 | count = lib.bdiff_diff(a[0], an, b[0], bn, l) |
|
115 | count = lib.bdiff_diff(a[0], an, b[0], bn, l) | |
119 | if count < 0: |
|
116 | if count < 0: | |
120 | raise MemoryError |
|
117 | raise MemoryError | |
121 | rl = [None] * count |
|
118 | rl = [None] * count | |
122 | h = l.next |
|
119 | h = l.next | |
123 | i = 0 |
|
120 | i = 0 | |
124 | while h: |
|
121 | while h: | |
125 | rl[i] = (h.a1, h.a2, h.b1, h.b2) |
|
122 | rl[i] = (h.a1, h.a2, h.b1, h.b2) | |
126 | h = h.next |
|
123 | h = h.next | |
127 | i += 1 |
|
124 | i += 1 | |
128 | finally: |
|
125 | finally: | |
129 | lib.free(a[0]) |
|
126 | lib.free(a[0]) | |
130 | lib.free(b[0]) |
|
127 | lib.free(b[0]) | |
131 | lib.bdiff_freehunks(l.next) |
|
128 | lib.bdiff_freehunks(l.next) | |
132 | return rl |
|
129 | return rl | |
133 |
|
130 | |||
134 | def bdiff(sa, sb): |
|
131 | def bdiff(sa, sb): | |
135 | a = ffi.new("struct bdiff_line**") |
|
132 | a = ffi.new("struct bdiff_line**") | |
136 | b = ffi.new("struct bdiff_line**") |
|
133 | b = ffi.new("struct bdiff_line**") | |
137 | ac = ffi.new("char[]", str(sa)) |
|
134 | ac = ffi.new("char[]", str(sa)) | |
138 | bc = ffi.new("char[]", str(sb)) |
|
135 | bc = ffi.new("char[]", str(sb)) | |
139 | l = ffi.new("struct bdiff_hunk*") |
|
136 | l = ffi.new("struct bdiff_hunk*") | |
140 | try: |
|
137 | try: | |
141 | an = lib.bdiff_splitlines(ac, len(sa), a) |
|
138 | an = lib.bdiff_splitlines(ac, len(sa), a) | |
142 | bn = lib.bdiff_splitlines(bc, len(sb), b) |
|
139 | bn = lib.bdiff_splitlines(bc, len(sb), b) | |
143 | if not a[0] or not b[0]: |
|
140 | if not a[0] or not b[0]: | |
144 | raise MemoryError |
|
141 | raise MemoryError | |
145 | count = lib.bdiff_diff(a[0], an, b[0], bn, l) |
|
142 | count = lib.bdiff_diff(a[0], an, b[0], bn, l) | |
146 | if count < 0: |
|
143 | if count < 0: | |
147 | raise MemoryError |
|
144 | raise MemoryError | |
148 | rl = [] |
|
145 | rl = [] | |
149 | h = l.next |
|
146 | h = l.next | |
150 | la = lb = 0 |
|
147 | la = lb = 0 | |
151 | while h: |
|
148 | while h: | |
152 | if h.a1 != la or h.b1 != lb: |
|
149 | if h.a1 != la or h.b1 != lb: | |
153 | lgt = (b[0] + h.b1).l - (b[0] + lb).l |
|
150 | lgt = (b[0] + h.b1).l - (b[0] + lb).l | |
154 | rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l, |
|
151 | rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l, | |
155 | (a[0] + h.a1).l - a[0].l, lgt)) |
|
152 | (a[0] + h.a1).l - a[0].l, lgt)) | |
156 | rl.append(str(ffi.buffer((b[0] + lb).l, lgt))) |
|
153 | rl.append(str(ffi.buffer((b[0] + lb).l, lgt))) | |
157 | la = h.a2 |
|
154 | la = h.a2 | |
158 | lb = h.b2 |
|
155 | lb = h.b2 | |
159 | h = h.next |
|
156 | h = h.next | |
160 |
|
157 | |||
161 | finally: |
|
158 | finally: | |
162 | lib.free(a[0]) |
|
159 | lib.free(a[0]) | |
163 | lib.free(b[0]) |
|
160 | lib.free(b[0]) | |
164 | lib.bdiff_freehunks(l.next) |
|
161 | lib.bdiff_freehunks(l.next) | |
165 | return "".join(rl) |
|
162 | return "".join(rl) |
General Comments 0
You need to be logged in to leave comments.
Login now