##// END OF EJS Templates
Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com -
r71:47c9a869 default
parent child Browse files
Show More
@@ -1,6 +1,7 b''
1 #!/usr/bin/python
1 #!/usr/bin/python
2 import difflib, struct
2 import difflib, struct, mmap
3 from cStringIO import StringIO
3
4 devzero = file("/dev/zero")
4
5
5 def unidiff(a, ad, b, bd, fn):
6 def unidiff(a, ad, b, bd, fn):
6 if not a and not b: return ""
7 if not a and not b: return ""
@@ -52,6 +53,74 b' def diff(a, b, sorted=0):'
52
53
53 return "".join(bin)
54 return "".join(bin)
54
55
56 # This attempts to apply a series of patches in time proportional to
57 # the total size of the patches, rather than patches * len(text). This
58 # means rather than shuffling strings around, we shuffle around
59 # pointers to fragments with fragment lists.
60 #
61 # When the fragment lists get too long, we collapse them. To do this
62 # efficiently, we do all our operations inside a buffer created by
63 # mmap and simply use memmove. This avoids creating a bunch of large
64 # temporary string buffers.
65
66 def patches(a, bins):
67 if not bins: return a
68
69 plens = [len(x) for x in bins]
70 pl = sum(plens)
71 bl = len(a) + pl
72 tl = bl + bl + pl # enough for the patches and two working texts
73 b1, b2 = 0, bl
74 m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
75
76 # load our original text
77 m.write(a)
78 frags = [(len(a), b1)]
79
80 # copy all the patches into our segment so we can memmove from them
81 pos = b2 + bl
82 m.seek(pos)
83 for p in bins: m.write(p)
84
85 def pull(dst, src, l): # pull l bytes from src
86 while l:
87 f = src.pop(0)
88 if f[0] > l: # do we need to split?
89 src.insert(0, (f[0] - l, f[1] + l))
90 dst.append((l, f[1]))
91 return
92 dst.append(f)
93 l -= f[0]
94
95 def collect(buf, list):
96 start = buf
97 for l, p in list:
98 m.move(buf, p, l)
99 buf += l
100 return (buf - start, start)
101
102 for plen in plens:
103 # if our list gets too long, execute it
104 if len(frags) > 128:
105 b2, b1 = b1, b2
106 frags = [collect(b1, frags)]
107
108 new = []
109 end = pos + plen
110 last = 0
111 while pos < end:
112 p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
113 pull(new, frags, p1 - last) # what didn't change
114 pull([], frags, p2 - p1) # what got deleted
115 new.append((l, pos + 12)) # what got added
116 pos += l + 12
117 last = p2
118 frags = new + frags # what was left at the end
119
120 t = collect(b2, frags)
121
122 return m[t[1]:t[1] + t[0]]
123
55 def patch(a, bin):
124 def patch(a, bin):
56 last = pos = 0
125 last = pos = 0
57 r = []
126 r = []
@@ -41,7 +41,7 b' class revlog:'
41 n = 0
41 n = 0
42 i = self.opener(self.indexfile).read()
42 i = self.opener(self.indexfile).read()
43 s = struct.calcsize(indexformat)
43 s = struct.calcsize(indexformat)
44 for f in range(0, len(i), s):
44 for f in xrange(0, len(i), s):
45 # offset, size, base, linkrev, p1, p2, nodeid
45 # offset, size, base, linkrev, p1, p2, nodeid
46 e = struct.unpack(indexformat, i[f:f + s])
46 e = struct.unpack(indexformat, i[f:f + s])
47 self.nodemap[e[6]] = n
47 self.nodemap[e[6]] = n
@@ -87,9 +87,6 b' class revlog:'
87 def diff(self, a, b):
87 def diff(self, a, b):
88 return mdiff.textdiff(a, b)
88 return mdiff.textdiff(a, b)
89
89
90 def patch(self, text, patch):
91 return mdiff.patch(text, patch)
92
93 def revision(self, node):
90 def revision(self, node):
94 if node == nullid: return ""
91 if node == nullid: return ""
95 if self.cache and self.cache[0] == node: return self.cache[2]
92 if self.cache and self.cache[0] == node: return self.cache[2]
@@ -114,12 +111,14 b' class revlog:'
114 last = self.length(base)
111 last = self.length(base)
115 text = decompress(data[:last])
112 text = decompress(data[:last])
116
113
114 bins = []
117 for r in xrange(base + 1, rev + 1):
115 for r in xrange(base + 1, rev + 1):
118 s = self.length(r)
116 s = self.length(r)
119 b = decompress(data[last:last + s])
117 bins.append(decompress(data[last:last + s]))
120 text = self.patch(text, b)
121 last = last + s
118 last = last + s
122
119
120 text = mdiff.patches(text, bins)
121
123 (p1, p2) = self.parents(node)
122 (p1, p2) = self.parents(node)
124 if node != hash(text, p1, p2):
123 if node != hash(text, p1, p2):
125 raise "integrity check failed on %s:%d" % (self.datafile, rev)
124 raise "integrity check failed on %s:%d" % (self.datafile, rev)
@@ -301,14 +300,12 b' class revlog:'
301
300
302 # helper to reconstruct intermediate versions
301 # helper to reconstruct intermediate versions
303 def construct(text, base, rev):
302 def construct(text, base, rev):
304 for r in range(base + 1, rev + 1):
303 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
305 b = decompress(chunks[r])
304 return mdiff.patches(text, bins)
306 text = self.patch(text, b)
307 return text
308
305
309 # build deltas
306 # build deltas
310 deltas = []
307 deltas = []
311 for d in range(0, len(revs) - 1):
308 for d in xrange(0, len(revs) - 1):
312 a, b = revs[d], revs[d + 1]
309 a, b = revs[d], revs[d + 1]
313 n = self.node(b)
310 n = self.node(b)
314
311
General Comments 0
You need to be logged in to leave comments. Login now