##// END OF EJS Templates
Add mdiff.patches to speed up applying thousands of patches to the manifest
Add mdiff.patches to speed up applying thousands of patches to the manifest

File last commit:

r71:47c9a869 default
r71:47c9a869 default
Show More
mdiff.py
144 lines | 3.7 KiB | text/x-python | PythonLexer
#!/usr/bin/python
import difflib, struct, mmap
devzero = file("/dev/zero")
def unidiff(a, ad, b, bd, fn):
if not a and not b: return ""
a = a.splitlines(1)
b = b.splitlines(1)
l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
return "".join(l)
def textdiff(a, b):
return diff(a.splitlines(1), b.splitlines(1))
def sortdiff(a, b):
la = lb = 0
while 1:
if la >= len(a) or lb >= len(b): break
if b[lb] < a[la]:
si = lb
while lb < len(b) and b[lb] < a[la] : lb += 1
yield "insert", la, la, si, lb
elif a[la] < b[lb]:
si = la
while la < len(a) and a[la] < b[lb]: la += 1
yield "delete", si, la, lb, lb
else:
la += 1
lb += 1
if lb < len(b):
yield "insert", la, la, lb, len(b)
if la < len(a):
yield "delete", la, len(a), lb, lb
def diff(a, b, sorted=0):
bin = []
p = [0]
for i in a: p.append(p[-1] + len(i))
if sorted:
d = sortdiff(a, b)
else:
d = difflib.SequenceMatcher(None, a, b).get_opcodes()
for o, m, n, s, t in d:
if o == 'equal': continue
s = "".join(b[s:t])
bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
return "".join(bin)
# This attempts to apply a series of patches in time proportional to
# the total size of the patches, rather than patches * len(text). This
# means rather than shuffling strings around, we shuffle around
# pointers to fragments with fragment lists.
#
# When the fragment lists get too long, we collapse them. To do this
# efficiently, we do all our operations inside a buffer created by
# mmap and simply use memmove. This avoids creating a bunch of large
# temporary string buffers.
def patches(a, bins):
if not bins: return a
plens = [len(x) for x in bins]
pl = sum(plens)
bl = len(a) + pl
tl = bl + bl + pl # enough for the patches and two working texts
b1, b2 = 0, bl
m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
# load our original text
m.write(a)
frags = [(len(a), b1)]
# copy all the patches into our segment so we can memmove from them
pos = b2 + bl
m.seek(pos)
for p in bins: m.write(p)
def pull(dst, src, l): # pull l bytes from src
while l:
f = src.pop(0)
if f[0] > l: # do we need to split?
src.insert(0, (f[0] - l, f[1] + l))
dst.append((l, f[1]))
return
dst.append(f)
l -= f[0]
def collect(buf, list):
start = buf
for l, p in list:
m.move(buf, p, l)
buf += l
return (buf - start, start)
for plen in plens:
# if our list gets too long, execute it
if len(frags) > 128:
b2, b1 = b1, b2
frags = [collect(b1, frags)]
new = []
end = pos + plen
last = 0
while pos < end:
p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
pull(new, frags, p1 - last) # what didn't change
pull([], frags, p2 - p1) # what got deleted
new.append((l, pos + 12)) # what got added
pos += l + 12
last = p2
frags = new + frags # what was left at the end
t = collect(b2, frags)
return m[t[1]:t[1] + t[0]]
def patch(a, bin):
last = pos = 0
r = []
c = 0
while pos < len(bin):
p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
pos += 12
r.append(a[last:p1])
r.append(bin[pos:pos + l])
pos += l
last = p2
c += 1
r.append(a[last:])
return "".join(r)