##// END OF EJS Templates
Add mdiff.patches to speed up applying thousands of patches to the manifest
Add mdiff.patches to speed up applying thousands of patches to the manifest

File last commit:

r71:47c9a869 default
r71:47c9a869 default
Show More
mdiff.py
144 lines | 3.7 KiB | text/x-python | PythonLexer
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 #!/usr/bin/python
mpm@selenic.com
Add mdiff.patches to speed up applying thousands of patches to the manifest
r71 import difflib, struct, mmap
devzero = file("/dev/zero")
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0
mpm@selenic.com
Diff in subdirectories from Jake Edge...
r64 def unidiff(a, ad, b, bd, fn):
mpm@selenic.com
unidiff: punt on comparing empty files
r35 if not a and not b: return ""
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 a = a.splitlines(1)
b = b.splitlines(1)
mpm@selenic.com
Diff in subdirectories from Jake Edge...
r64 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 return "".join(l)
def textdiff(a, b):
return diff(a.splitlines(1), b.splitlines(1))
def sortdiff(a, b):
la = lb = 0
while 1:
if la >= len(a) or lb >= len(b): break
if b[lb] < a[la]:
si = lb
while lb < len(b) and b[lb] < a[la] : lb += 1
yield "insert", la, la, si, lb
elif a[la] < b[lb]:
si = la
while la < len(a) and a[la] < b[lb]: la += 1
yield "delete", si, la, lb, lb
else:
la += 1
lb += 1
mpm@selenic.com
Diff in subdirectories from Jake Edge...
r64 if lb < len(b):
yield "insert", la, la, lb, len(b)
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0
mpm@selenic.com
Diff in subdirectories from Jake Edge...
r64 if la < len(a):
yield "delete", la, len(a), lb, lb
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0
def diff(a, b, sorted=0):
bin = []
p = [0]
for i in a: p.append(p[-1] + len(i))
if sorted:
d = sortdiff(a, b)
else:
d = difflib.SequenceMatcher(None, a, b).get_opcodes()
for o, m, n, s, t in d:
if o == 'equal': continue
s = "".join(b[s:t])
bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
return "".join(bin)
mpm@selenic.com
Add mdiff.patches to speed up applying thousands of patches to the manifest
r71 # This attempts to apply a series of patches in time proportional to
# the total size of the patches, rather than patches * len(text). This
# means rather than shuffling strings around, we shuffle around
# pointers to fragments with fragment lists.
#
# When the fragment lists get too long, we collapse them. To do this
# efficiently, we do all our operations inside a buffer created by
# mmap and simply use memmove. This avoids creating a bunch of large
# temporary string buffers.
def patches(a, bins):
if not bins: return a
plens = [len(x) for x in bins]
pl = sum(plens)
bl = len(a) + pl
tl = bl + bl + pl # enough for the patches and two working texts
b1, b2 = 0, bl
m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
# load our original text
m.write(a)
frags = [(len(a), b1)]
# copy all the patches into our segment so we can memmove from them
pos = b2 + bl
m.seek(pos)
for p in bins: m.write(p)
def pull(dst, src, l): # pull l bytes from src
while l:
f = src.pop(0)
if f[0] > l: # do we need to split?
src.insert(0, (f[0] - l, f[1] + l))
dst.append((l, f[1]))
return
dst.append(f)
l -= f[0]
def collect(buf, list):
start = buf
for l, p in list:
m.move(buf, p, l)
buf += l
return (buf - start, start)
for plen in plens:
# if our list gets too long, execute it
if len(frags) > 128:
b2, b1 = b1, b2
frags = [collect(b1, frags)]
new = []
end = pos + plen
last = 0
while pos < end:
p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
pull(new, frags, p1 - last) # what didn't change
pull([], frags, p2 - p1) # what got deleted
new.append((l, pos + 12)) # what got added
pos += l + 12
last = p2
frags = new + frags # what was left at the end
t = collect(b2, frags)
return m[t[1]:t[1] + t[0]]
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 def patch(a, bin):
last = pos = 0
r = []
mpm@selenic.com
Diff in subdirectories from Jake Edge...
r64 c = 0
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 while pos < len(bin):
p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
pos += 12
r.append(a[last:p1])
r.append(bin[pos:pos + l])
pos += l
last = p2
mpm@selenic.com
Diff in subdirectories from Jake Edge...
r64 c += 1
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 r.append(a[last:])
return "".join(r)