Show More
@@ -1,134 +1,144 | |||||
1 | #!/usr/bin/python |
|
1 | #!/usr/bin/python | |
2 | import difflib, struct, mmap |
|
2 | import difflib, struct, mmap | |
3 |
|
3 | |||
4 | devzero = file("/dev/zero") |
|
4 | devzero = file("/dev/zero") | |
5 |
|
5 | |||
6 | def unidiff(a, ad, b, bd, fn): |
|
6 | def unidiff(a, ad, b, bd, fn): | |
7 | if not a and not b: return "" |
|
7 | if not a and not b: return "" | |
8 | a = a.splitlines(1) |
|
8 | a = a.splitlines(1) | |
9 | b = b.splitlines(1) |
|
9 | b = b.splitlines(1) | |
10 | l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd)) |
|
10 | l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd)) | |
11 | return "".join(l) |
|
11 | return "".join(l) | |
12 |
|
12 | |||
13 | def textdiff(a, b): |
|
13 | def textdiff(a, b): | |
14 | return diff(a.splitlines(1), b.splitlines(1)) |
|
14 | return diff(a.splitlines(1), b.splitlines(1)) | |
15 |
|
15 | |||
16 | def sortdiff(a, b): |
|
16 | def sortdiff(a, b): | |
17 | la = lb = 0 |
|
17 | la = lb = 0 | |
18 |
|
18 | |||
19 | while 1: |
|
19 | while 1: | |
20 | if la >= len(a) or lb >= len(b): break |
|
20 | if la >= len(a) or lb >= len(b): break | |
21 | if b[lb] < a[la]: |
|
21 | if b[lb] < a[la]: | |
22 | si = lb |
|
22 | si = lb | |
23 | while lb < len(b) and b[lb] < a[la] : lb += 1 |
|
23 | while lb < len(b) and b[lb] < a[la] : lb += 1 | |
24 | yield "insert", la, la, si, lb |
|
24 | yield "insert", la, la, si, lb | |
25 | elif a[la] < b[lb]: |
|
25 | elif a[la] < b[lb]: | |
26 | si = la |
|
26 | si = la | |
27 | while la < len(a) and a[la] < b[lb]: la += 1 |
|
27 | while la < len(a) and a[la] < b[lb]: la += 1 | |
28 | yield "delete", si, la, lb, lb |
|
28 | yield "delete", si, la, lb, lb | |
29 | else: |
|
29 | else: | |
30 | la += 1 |
|
30 | la += 1 | |
31 | lb += 1 |
|
31 | lb += 1 | |
32 |
|
32 | |||
33 | if lb < len(b): |
|
33 | if lb < len(b): | |
34 | yield "insert", la, la, lb, len(b) |
|
34 | yield "insert", la, la, lb, len(b) | |
35 |
|
35 | |||
36 | if la < len(a): |
|
36 | if la < len(a): | |
37 | yield "delete", la, len(a), lb, lb |
|
37 | yield "delete", la, len(a), lb, lb | |
38 |
|
38 | |||
39 | def diff(a, b, sorted=0): |
|
39 | def diff(a, b, sorted=0): | |
40 | bin = [] |
|
40 | bin = [] | |
41 | p = [0] |
|
41 | p = [0] | |
42 | for i in a: p.append(p[-1] + len(i)) |
|
42 | for i in a: p.append(p[-1] + len(i)) | |
43 |
|
43 | |||
44 | if sorted: |
|
44 | if sorted: | |
45 | d = sortdiff(a, b) |
|
45 | d = sortdiff(a, b) | |
46 | else: |
|
46 | else: | |
47 | d = difflib.SequenceMatcher(None, a, b).get_opcodes() |
|
47 | d = difflib.SequenceMatcher(None, a, b).get_opcodes() | |
48 |
|
48 | |||
49 | for o, m, n, s, t in d: |
|
49 | for o, m, n, s, t in d: | |
50 | if o == 'equal': continue |
|
50 | if o == 'equal': continue | |
51 | s = "".join(b[s:t]) |
|
51 | s = "".join(b[s:t]) | |
52 | bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) |
|
52 | bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) | |
53 |
|
53 | |||
54 | return "".join(bin) |
|
54 | return "".join(bin) | |
55 |
|
55 | |||
|
56 | def patchtext(bin): | |||
|
57 | pos = 0 | |||
|
58 | t = [] | |||
|
59 | while pos < len(bin): | |||
|
60 | p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) | |||
|
61 | pos += 12 | |||
|
62 | t.append(bin[pos:pos + l]) | |||
|
63 | pos += l | |||
|
64 | return "".join(t) | |||
|
65 | ||||
56 | # This attempts to apply a series of patches in time proportional to |
|
66 | # This attempts to apply a series of patches in time proportional to | |
57 | # the total size of the patches, rather than patches * len(text). This |
|
67 | # the total size of the patches, rather than patches * len(text). This | |
58 | # means rather than shuffling strings around, we shuffle around |
|
68 | # means rather than shuffling strings around, we shuffle around | |
59 | # pointers to fragments with fragment lists. |
|
69 | # pointers to fragments with fragment lists. | |
60 | # |
|
70 | # | |
61 | # When the fragment lists get too long, we collapse them. To do this |
|
71 | # When the fragment lists get too long, we collapse them. To do this | |
62 | # efficiently, we do all our operations inside a buffer created by |
|
72 | # efficiently, we do all our operations inside a buffer created by | |
63 | # mmap and simply use memmove. This avoids creating a bunch of large |
|
73 | # mmap and simply use memmove. This avoids creating a bunch of large | |
64 | # temporary string buffers. |
|
74 | # temporary string buffers. | |
65 |
|
75 | |||
66 | def patches(a, bins): |
|
76 | def patches(a, bins): | |
67 | if not bins: return a |
|
77 | if not bins: return a | |
68 |
|
78 | |||
69 | plens = [len(x) for x in bins] |
|
79 | plens = [len(x) for x in bins] | |
70 | pl = sum(plens) |
|
80 | pl = sum(plens) | |
71 | bl = len(a) + pl |
|
81 | bl = len(a) + pl | |
72 | tl = bl + bl + pl # enough for the patches and two working texts |
|
82 | tl = bl + bl + pl # enough for the patches and two working texts | |
73 | b1, b2 = 0, bl |
|
83 | b1, b2 = 0, bl | |
74 |
|
84 | |||
75 | if not tl: return a |
|
85 | if not tl: return a | |
76 |
|
86 | |||
77 | m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) |
|
87 | m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) | |
78 |
|
88 | |||
79 | # load our original text |
|
89 | # load our original text | |
80 | m.write(a) |
|
90 | m.write(a) | |
81 | frags = [(len(a), b1)] |
|
91 | frags = [(len(a), b1)] | |
82 |
|
92 | |||
83 | # copy all the patches into our segment so we can memmove from them |
|
93 | # copy all the patches into our segment so we can memmove from them | |
84 | pos = b2 + bl |
|
94 | pos = b2 + bl | |
85 | m.seek(pos) |
|
95 | m.seek(pos) | |
86 | for p in bins: m.write(p) |
|
96 | for p in bins: m.write(p) | |
87 |
|
97 | |||
88 | def pull(dst, src, l): # pull l bytes from src |
|
98 | def pull(dst, src, l): # pull l bytes from src | |
89 | while l: |
|
99 | while l: | |
90 | f = src.pop(0) |
|
100 | f = src.pop(0) | |
91 | if f[0] > l: # do we need to split? |
|
101 | if f[0] > l: # do we need to split? | |
92 | src.insert(0, (f[0] - l, f[1] + l)) |
|
102 | src.insert(0, (f[0] - l, f[1] + l)) | |
93 | dst.append((l, f[1])) |
|
103 | dst.append((l, f[1])) | |
94 | return |
|
104 | return | |
95 | dst.append(f) |
|
105 | dst.append(f) | |
96 | l -= f[0] |
|
106 | l -= f[0] | |
97 |
|
107 | |||
98 | def collect(buf, list): |
|
108 | def collect(buf, list): | |
99 | start = buf |
|
109 | start = buf | |
100 | for l, p in list: |
|
110 | for l, p in list: | |
101 | m.move(buf, p, l) |
|
111 | m.move(buf, p, l) | |
102 | buf += l |
|
112 | buf += l | |
103 | return (buf - start, start) |
|
113 | return (buf - start, start) | |
104 |
|
114 | |||
105 | for plen in plens: |
|
115 | for plen in plens: | |
106 | # if our list gets too long, execute it |
|
116 | # if our list gets too long, execute it | |
107 | if len(frags) > 128: |
|
117 | if len(frags) > 128: | |
108 | b2, b1 = b1, b2 |
|
118 | b2, b1 = b1, b2 | |
109 | frags = [collect(b1, frags)] |
|
119 | frags = [collect(b1, frags)] | |
110 |
|
120 | |||
111 | new = [] |
|
121 | new = [] | |
112 | end = pos + plen |
|
122 | end = pos + plen | |
113 | last = 0 |
|
123 | last = 0 | |
114 | while pos < end: |
|
124 | while pos < end: | |
115 | p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) |
|
125 | p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) | |
116 | pull(new, frags, p1 - last) # what didn't change |
|
126 | pull(new, frags, p1 - last) # what didn't change | |
117 | pull([], frags, p2 - p1) # what got deleted |
|
127 | pull([], frags, p2 - p1) # what got deleted | |
118 | new.append((l, pos + 12)) # what got added |
|
128 | new.append((l, pos + 12)) # what got added | |
119 | pos += l + 12 |
|
129 | pos += l + 12 | |
120 | last = p2 |
|
130 | last = p2 | |
121 | frags = new + frags # what was left at the end |
|
131 | frags = new + frags # what was left at the end | |
122 |
|
132 | |||
123 | t = collect(b2, frags) |
|
133 | t = collect(b2, frags) | |
124 |
|
134 | |||
125 | return m[t[1]:t[1] + t[0]] |
|
135 | return m[t[1]:t[1] + t[0]] | |
126 |
|
136 | |||
127 | def patch(a, bin): |
|
137 | def patch(a, bin): | |
128 | return patches(a, [bin]) |
|
138 | return patches(a, [bin]) | |
129 |
|
139 | |||
130 | try: |
|
140 | try: | |
131 | import mpatch |
|
141 | import mpatch | |
132 | patches = mpatch.patches |
|
142 | patches = mpatch.patches | |
133 | except: |
|
143 | except: | |
134 | pass |
|
144 | pass |
General Comments 0
You need to be logged in to leave comments.
Login now