##// END OF EJS Templates
Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com -
r71:47c9a869 default
parent child Browse files
Show More
@@ -1,75 +1,144 b''
1 #!/usr/bin/python
1 #!/usr/bin/python
2 import difflib, struct
2 import difflib, struct, mmap
3 from cStringIO import StringIO
3
4 devzero = file("/dev/zero")
4
5
5 def unidiff(a, ad, b, bd, fn):
6 def unidiff(a, ad, b, bd, fn):
6 if not a and not b: return ""
7 if not a and not b: return ""
7 a = a.splitlines(1)
8 a = a.splitlines(1)
8 b = b.splitlines(1)
9 b = b.splitlines(1)
9 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
10 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
10 return "".join(l)
11 return "".join(l)
11
12
12 def textdiff(a, b):
13 def textdiff(a, b):
13 return diff(a.splitlines(1), b.splitlines(1))
14 return diff(a.splitlines(1), b.splitlines(1))
14
15
15 def sortdiff(a, b):
16 def sortdiff(a, b):
16 la = lb = 0
17 la = lb = 0
17
18
18 while 1:
19 while 1:
19 if la >= len(a) or lb >= len(b): break
20 if la >= len(a) or lb >= len(b): break
20 if b[lb] < a[la]:
21 if b[lb] < a[la]:
21 si = lb
22 si = lb
22 while lb < len(b) and b[lb] < a[la] : lb += 1
23 while lb < len(b) and b[lb] < a[la] : lb += 1
23 yield "insert", la, la, si, lb
24 yield "insert", la, la, si, lb
24 elif a[la] < b[lb]:
25 elif a[la] < b[lb]:
25 si = la
26 si = la
26 while la < len(a) and a[la] < b[lb]: la += 1
27 while la < len(a) and a[la] < b[lb]: la += 1
27 yield "delete", si, la, lb, lb
28 yield "delete", si, la, lb, lb
28 else:
29 else:
29 la += 1
30 la += 1
30 lb += 1
31 lb += 1
31
32
32 if lb < len(b):
33 if lb < len(b):
33 yield "insert", la, la, lb, len(b)
34 yield "insert", la, la, lb, len(b)
34
35
35 if la < len(a):
36 if la < len(a):
36 yield "delete", la, len(a), lb, lb
37 yield "delete", la, len(a), lb, lb
37
38
38 def diff(a, b, sorted=0):
39 def diff(a, b, sorted=0):
39 bin = []
40 bin = []
40 p = [0]
41 p = [0]
41 for i in a: p.append(p[-1] + len(i))
42 for i in a: p.append(p[-1] + len(i))
42
43
43 if sorted:
44 if sorted:
44 d = sortdiff(a, b)
45 d = sortdiff(a, b)
45 else:
46 else:
46 d = difflib.SequenceMatcher(None, a, b).get_opcodes()
47 d = difflib.SequenceMatcher(None, a, b).get_opcodes()
47
48
48 for o, m, n, s, t in d:
49 for o, m, n, s, t in d:
49 if o == 'equal': continue
50 if o == 'equal': continue
50 s = "".join(b[s:t])
51 s = "".join(b[s:t])
51 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
52 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
52
53
53 return "".join(bin)
54 return "".join(bin)
54
55
56 # This attempts to apply a series of patches in time proportional to
57 # the total size of the patches, rather than patches * len(text). This
58 # means rather than shuffling strings around, we shuffle around
59 # pointers to fragments with fragment lists.
60 #
61 # When the fragment lists get too long, we collapse them. To do this
62 # efficiently, we do all our operations inside a buffer created by
63 # mmap and simply use memmove. This avoids creating a bunch of large
64 # temporary string buffers.
65
66 def patches(a, bins):
67 if not bins: return a
68
69 plens = [len(x) for x in bins]
70 pl = sum(plens)
71 bl = len(a) + pl
72 tl = bl + bl + pl # enough for the patches and two working texts
73 b1, b2 = 0, bl
74 m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
75
76 # load our original text
77 m.write(a)
78 frags = [(len(a), b1)]
79
80 # copy all the patches into our segment so we can memmove from them
81 pos = b2 + bl
82 m.seek(pos)
83 for p in bins: m.write(p)
84
85 def pull(dst, src, l): # pull l bytes from src
86 while l:
87 f = src.pop(0)
88 if f[0] > l: # do we need to split?
89 src.insert(0, (f[0] - l, f[1] + l))
90 dst.append((l, f[1]))
91 return
92 dst.append(f)
93 l -= f[0]
94
95 def collect(buf, list):
96 start = buf
97 for l, p in list:
98 m.move(buf, p, l)
99 buf += l
100 return (buf - start, start)
101
102 for plen in plens:
103 # if our list gets too long, execute it
104 if len(frags) > 128:
105 b2, b1 = b1, b2
106 frags = [collect(b1, frags)]
107
108 new = []
109 end = pos + plen
110 last = 0
111 while pos < end:
112 p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
113 pull(new, frags, p1 - last) # what didn't change
114 pull([], frags, p2 - p1) # what got deleted
115 new.append((l, pos + 12)) # what got added
116 pos += l + 12
117 last = p2
118 frags = new + frags # what was left at the end
119
120 t = collect(b2, frags)
121
122 return m[t[1]:t[1] + t[0]]
123
55 def patch(a, bin):
124 def patch(a, bin):
56 last = pos = 0
125 last = pos = 0
57 r = []
126 r = []
58
127
59 c = 0
128 c = 0
60 while pos < len(bin):
129 while pos < len(bin):
61 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
130 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
62 pos += 12
131 pos += 12
63 r.append(a[last:p1])
132 r.append(a[last:p1])
64 r.append(bin[pos:pos + l])
133 r.append(bin[pos:pos + l])
65 pos += l
134 pos += l
66 last = p2
135 last = p2
67 c += 1
136 c += 1
68 r.append(a[last:])
137 r.append(a[last:])
69
138
70 return "".join(r)
139 return "".join(r)
71
140
72
141
73
142
74
143
75
144
@@ -1,412 +1,409 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
4 # and O(changes) merge between branches
5 #
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
7 #
8 # This software may be used and distributed according to the terms
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
9 # of the GNU General Public License, incorporated herein by reference.
10
10
11 import zlib, struct, sha, os, tempfile, binascii
11 import zlib, struct, sha, os, tempfile, binascii
12 from mercurial import mdiff
12 from mercurial import mdiff
13
13
14 def hex(node): return binascii.hexlify(node)
14 def hex(node): return binascii.hexlify(node)
15 def bin(node): return binascii.unhexlify(node)
15 def bin(node): return binascii.unhexlify(node)
16
16
17 def compress(text):
17 def compress(text):
18 return zlib.compress(text)
18 return zlib.compress(text)
19
19
20 def decompress(bin):
20 def decompress(bin):
21 return zlib.decompress(bin)
21 return zlib.decompress(bin)
22
22
23 def hash(text, p1, p2):
23 def hash(text, p1, p2):
24 l = [p1, p2]
24 l = [p1, p2]
25 l.sort()
25 l.sort()
26 return sha.sha(l[0] + l[1] + text).digest()
26 return sha.sha(l[0] + l[1] + text).digest()
27
27
28 nullid = "\0" * 20
28 nullid = "\0" * 20
29 indexformat = ">4l20s20s20s"
29 indexformat = ">4l20s20s20s"
30
30
31 class revlog:
31 class revlog:
32 def __init__(self, opener, indexfile, datafile):
32 def __init__(self, opener, indexfile, datafile):
33 self.indexfile = indexfile
33 self.indexfile = indexfile
34 self.datafile = datafile
34 self.datafile = datafile
35 self.index = []
35 self.index = []
36 self.opener = opener
36 self.opener = opener
37 self.cache = None
37 self.cache = None
38 self.nodemap = {nullid: -1}
38 self.nodemap = {nullid: -1}
39 # read the whole index for now, handle on-demand later
39 # read the whole index for now, handle on-demand later
40 try:
40 try:
41 n = 0
41 n = 0
42 i = self.opener(self.indexfile).read()
42 i = self.opener(self.indexfile).read()
43 s = struct.calcsize(indexformat)
43 s = struct.calcsize(indexformat)
44 for f in range(0, len(i), s):
44 for f in xrange(0, len(i), s):
45 # offset, size, base, linkrev, p1, p2, nodeid
45 # offset, size, base, linkrev, p1, p2, nodeid
46 e = struct.unpack(indexformat, i[f:f + s])
46 e = struct.unpack(indexformat, i[f:f + s])
47 self.nodemap[e[6]] = n
47 self.nodemap[e[6]] = n
48 self.index.append(e)
48 self.index.append(e)
49 n += 1
49 n += 1
50 except IOError: pass
50 except IOError: pass
51
51
52 def tip(self): return self.node(len(self.index) - 1)
52 def tip(self): return self.node(len(self.index) - 1)
53 def count(self): return len(self.index)
53 def count(self): return len(self.index)
54 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
54 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
55 def rev(self, node): return self.nodemap[node]
55 def rev(self, node): return self.nodemap[node]
56 def linkrev(self, node): return self.index[self.nodemap[node]][3]
56 def linkrev(self, node): return self.index[self.nodemap[node]][3]
57 def parents(self, node):
57 def parents(self, node):
58 if node == nullid: return (nullid, nullid)
58 if node == nullid: return (nullid, nullid)
59 return self.index[self.nodemap[node]][4:6]
59 return self.index[self.nodemap[node]][4:6]
60
60
61 def start(self, rev): return self.index[rev][0]
61 def start(self, rev): return self.index[rev][0]
62 def length(self, rev): return self.index[rev][1]
62 def length(self, rev): return self.index[rev][1]
63 def end(self, rev): return self.start(rev) + self.length(rev)
63 def end(self, rev): return self.start(rev) + self.length(rev)
64 def base(self, rev): return self.index[rev][2]
64 def base(self, rev): return self.index[rev][2]
65
65
66 def lookup(self, id):
66 def lookup(self, id):
67 try:
67 try:
68 rev = int(id)
68 rev = int(id)
69 return self.node(rev)
69 return self.node(rev)
70 except ValueError:
70 except ValueError:
71 c = []
71 c = []
72 for n in self.nodemap:
72 for n in self.nodemap:
73 if id in hex(n):
73 if id in hex(n):
74 c.append(n)
74 c.append(n)
75 if len(c) > 1: raise KeyError("Ambiguous identifier")
75 if len(c) > 1: raise KeyError("Ambiguous identifier")
76 if len(c) < 1: raise KeyError("No match found")
76 if len(c) < 1: raise KeyError("No match found")
77 return c[0]
77 return c[0]
78
78
79 return None
79 return None
80
80
81 def revisions(self, list):
81 def revisions(self, list):
82 # this can be optimized to do spans, etc
82 # this can be optimized to do spans, etc
83 # be stupid for now
83 # be stupid for now
84 for node in list:
84 for node in list:
85 yield self.revision(node)
85 yield self.revision(node)
86
86
87 def diff(self, a, b):
87 def diff(self, a, b):
88 return mdiff.textdiff(a, b)
88 return mdiff.textdiff(a, b)
89
89
90 def patch(self, text, patch):
91 return mdiff.patch(text, patch)
92
93 def revision(self, node):
90 def revision(self, node):
94 if node == nullid: return ""
91 if node == nullid: return ""
95 if self.cache and self.cache[0] == node: return self.cache[2]
92 if self.cache and self.cache[0] == node: return self.cache[2]
96
93
97 text = None
94 text = None
98 rev = self.rev(node)
95 rev = self.rev(node)
99 base = self.base(rev)
96 base = self.base(rev)
100 start = self.start(base)
97 start = self.start(base)
101 end = self.end(rev)
98 end = self.end(rev)
102
99
103 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
100 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
104 base = self.cache[1]
101 base = self.cache[1]
105 start = self.start(base + 1)
102 start = self.start(base + 1)
106 text = self.cache[2]
103 text = self.cache[2]
107 last = 0
104 last = 0
108
105
109 f = self.opener(self.datafile)
106 f = self.opener(self.datafile)
110 f.seek(start)
107 f.seek(start)
111 data = f.read(end - start)
108 data = f.read(end - start)
112
109
113 if not text:
110 if not text:
114 last = self.length(base)
111 last = self.length(base)
115 text = decompress(data[:last])
112 text = decompress(data[:last])
116
113
114 bins = []
117 for r in xrange(base + 1, rev + 1):
115 for r in xrange(base + 1, rev + 1):
118 s = self.length(r)
116 s = self.length(r)
119 b = decompress(data[last:last + s])
117 bins.append(decompress(data[last:last + s]))
120 text = self.patch(text, b)
121 last = last + s
118 last = last + s
122
119
120 text = mdiff.patches(text, bins)
121
123 (p1, p2) = self.parents(node)
122 (p1, p2) = self.parents(node)
124 if node != hash(text, p1, p2):
123 if node != hash(text, p1, p2):
125 raise "integrity check failed on %s:%d" % (self.datafile, rev)
124 raise "integrity check failed on %s:%d" % (self.datafile, rev)
126
125
127 self.cache = (node, rev, text)
126 self.cache = (node, rev, text)
128 return text
127 return text
129
128
130 def addrevision(self, text, transaction, link, p1=None, p2=None):
129 def addrevision(self, text, transaction, link, p1=None, p2=None):
131 if text is None: text = ""
130 if text is None: text = ""
132 if p1 is None: p1 = self.tip()
131 if p1 is None: p1 = self.tip()
133 if p2 is None: p2 = nullid
132 if p2 is None: p2 = nullid
134
133
135 node = hash(text, p1, p2)
134 node = hash(text, p1, p2)
136
135
137 n = self.count()
136 n = self.count()
138 t = n - 1
137 t = n - 1
139
138
140 if n:
139 if n:
141 base = self.base(t)
140 base = self.base(t)
142 start = self.start(base)
141 start = self.start(base)
143 end = self.end(t)
142 end = self.end(t)
144 prev = self.revision(self.tip())
143 prev = self.revision(self.tip())
145 data = compress(self.diff(prev, text))
144 data = compress(self.diff(prev, text))
146 dist = end - start + len(data)
145 dist = end - start + len(data)
147
146
148 # full versions are inserted when the needed deltas
147 # full versions are inserted when the needed deltas
149 # become comparable to the uncompressed text
148 # become comparable to the uncompressed text
150 if not n or dist > len(text) * 2:
149 if not n or dist > len(text) * 2:
151 data = compress(text)
150 data = compress(text)
152 base = n
151 base = n
153 else:
152 else:
154 base = self.base(t)
153 base = self.base(t)
155
154
156 offset = 0
155 offset = 0
157 if t >= 0:
156 if t >= 0:
158 offset = self.end(t)
157 offset = self.end(t)
159
158
160 e = (offset, len(data), base, link, p1, p2, node)
159 e = (offset, len(data), base, link, p1, p2, node)
161
160
162 self.index.append(e)
161 self.index.append(e)
163 self.nodemap[node] = n
162 self.nodemap[node] = n
164 entry = struct.pack(indexformat, *e)
163 entry = struct.pack(indexformat, *e)
165
164
166 transaction.add(self.datafile, e[0])
165 transaction.add(self.datafile, e[0])
167 self.opener(self.datafile, "a").write(data)
166 self.opener(self.datafile, "a").write(data)
168 transaction.add(self.indexfile, n * len(entry))
167 transaction.add(self.indexfile, n * len(entry))
169 self.opener(self.indexfile, "a").write(entry)
168 self.opener(self.indexfile, "a").write(entry)
170
169
171 self.cache = (node, n, text)
170 self.cache = (node, n, text)
172 return node
171 return node
173
172
174 def ancestor(self, a, b):
173 def ancestor(self, a, b):
175 def expand(list, map):
174 def expand(list, map):
176 a = []
175 a = []
177 while list:
176 while list:
178 n = list.pop(0)
177 n = list.pop(0)
179 map[n] = 1
178 map[n] = 1
180 yield n
179 yield n
181 for p in self.parents(n):
180 for p in self.parents(n):
182 if p != nullid and p not in map:
181 if p != nullid and p not in map:
183 list.append(p)
182 list.append(p)
184 yield nullid
183 yield nullid
185
184
186 amap = {}
185 amap = {}
187 bmap = {}
186 bmap = {}
188 ag = expand([a], amap)
187 ag = expand([a], amap)
189 bg = expand([b], bmap)
188 bg = expand([b], bmap)
190 adone = bdone = 0
189 adone = bdone = 0
191
190
192 while not adone or not bdone:
191 while not adone or not bdone:
193 if not adone:
192 if not adone:
194 an = ag.next()
193 an = ag.next()
195 if an == nullid:
194 if an == nullid:
196 adone = 1
195 adone = 1
197 elif an in bmap:
196 elif an in bmap:
198 return an
197 return an
199 if not bdone:
198 if not bdone:
200 bn = bg.next()
199 bn = bg.next()
201 if bn == nullid:
200 if bn == nullid:
202 bdone = 1
201 bdone = 1
203 elif bn in amap:
202 elif bn in amap:
204 return bn
203 return bn
205
204
206 return nullid
205 return nullid
207
206
208 def mergedag(self, other, transaction, linkseq, accumulate = None):
207 def mergedag(self, other, transaction, linkseq, accumulate = None):
209 """combine the nodes from other's DAG into ours"""
208 """combine the nodes from other's DAG into ours"""
210 old = self.tip()
209 old = self.tip()
211 i = self.count()
210 i = self.count()
212 l = []
211 l = []
213
212
214 # merge the other revision log into our DAG
213 # merge the other revision log into our DAG
215 for r in range(other.count()):
214 for r in range(other.count()):
216 id = other.node(r)
215 id = other.node(r)
217 if id not in self.nodemap:
216 if id not in self.nodemap:
218 (xn, yn) = other.parents(id)
217 (xn, yn) = other.parents(id)
219 l.append((id, xn, yn))
218 l.append((id, xn, yn))
220 self.nodemap[id] = i
219 self.nodemap[id] = i
221 i += 1
220 i += 1
222
221
223 # merge node date for new nodes
222 # merge node date for new nodes
224 r = other.revisions([e[0] for e in l])
223 r = other.revisions([e[0] for e in l])
225 for e in l:
224 for e in l:
226 t = r.next()
225 t = r.next()
227 if accumulate: accumulate(t)
226 if accumulate: accumulate(t)
228 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
227 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
229
228
230 # return the unmerged heads for later resolving
229 # return the unmerged heads for later resolving
231 return (old, self.tip())
230 return (old, self.tip())
232
231
233 def group(self, linkmap):
232 def group(self, linkmap):
234 # given a list of changeset revs, return a set of deltas and
233 # given a list of changeset revs, return a set of deltas and
235 # metadata corresponding to nodes the first delta is
234 # metadata corresponding to nodes the first delta is
236 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
235 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
237 # have this parent as it has all history before these
236 # have this parent as it has all history before these
238 # changesets. parent is parent[0]
237 # changesets. parent is parent[0]
239
238
240 revs = []
239 revs = []
241 needed = {}
240 needed = {}
242
241
243 # find file nodes/revs that match changeset revs
242 # find file nodes/revs that match changeset revs
244 for i in xrange(0, self.count()):
243 for i in xrange(0, self.count()):
245 if self.index[i][3] in linkmap:
244 if self.index[i][3] in linkmap:
246 revs.append(i)
245 revs.append(i)
247 needed[i] = 1
246 needed[i] = 1
248
247
249 # if we don't have any revisions touched by these changesets, bail
248 # if we don't have any revisions touched by these changesets, bail
250 if not revs: return struct.pack(">l", 0)
249 if not revs: return struct.pack(">l", 0)
251
250
252 # add the parent of the first rev
251 # add the parent of the first rev
253 p = self.parents(self.node(revs[0]))[0]
252 p = self.parents(self.node(revs[0]))[0]
254 revs.insert(0, self.rev(p))
253 revs.insert(0, self.rev(p))
255
254
256 # for each delta that isn't contiguous in the log, we need to
255 # for each delta that isn't contiguous in the log, we need to
257 # reconstruct the base, reconstruct the result, and then
256 # reconstruct the base, reconstruct the result, and then
258 # calculate the delta. We also need to do this where we've
257 # calculate the delta. We also need to do this where we've
259 # stored a full version and not a delta
258 # stored a full version and not a delta
260 for i in xrange(0, len(revs) - 1):
259 for i in xrange(0, len(revs) - 1):
261 a, b = revs[i], revs[i + 1]
260 a, b = revs[i], revs[i + 1]
262 if a + 1 != b or self.base(b) == b:
261 if a + 1 != b or self.base(b) == b:
263 for j in xrange(self.base(a), a + 1):
262 for j in xrange(self.base(a), a + 1):
264 needed[j] = 1
263 needed[j] = 1
265 for j in xrange(self.base(b), b + 1):
264 for j in xrange(self.base(b), b + 1):
266 needed[j] = 1
265 needed[j] = 1
267
266
268 # calculate spans to retrieve from datafile
267 # calculate spans to retrieve from datafile
269 needed = needed.keys()
268 needed = needed.keys()
270 needed.sort()
269 needed.sort()
271 spans = []
270 spans = []
272 for n in needed:
271 for n in needed:
273 if n < 0: continue
272 if n < 0: continue
274 o = self.start(n)
273 o = self.start(n)
275 l = self.length(n)
274 l = self.length(n)
276 spans.append((o, l, [(n, l)]))
275 spans.append((o, l, [(n, l)]))
277
276
278 # merge spans
277 # merge spans
279 merge = [spans.pop(0)]
278 merge = [spans.pop(0)]
280 while spans:
279 while spans:
281 e = spans.pop(0)
280 e = spans.pop(0)
282 f = merge[-1]
281 f = merge[-1]
283 if e[0] == f[0] + f[1]:
282 if e[0] == f[0] + f[1]:
284 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
283 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
285 else:
284 else:
286 merge.append(e)
285 merge.append(e)
287
286
288 # read spans in, divide up chunks
287 # read spans in, divide up chunks
289 chunks = {}
288 chunks = {}
290 for span in merge:
289 for span in merge:
291 # we reopen the file for each span to make http happy for now
290 # we reopen the file for each span to make http happy for now
292 f = self.opener(self.datafile)
291 f = self.opener(self.datafile)
293 f.seek(span[0])
292 f.seek(span[0])
294 data = f.read(span[1])
293 data = f.read(span[1])
295
294
296 # divide up the span
295 # divide up the span
297 pos = 0
296 pos = 0
298 for r, l in span[2]:
297 for r, l in span[2]:
299 chunks[r] = data[pos: pos + l]
298 chunks[r] = data[pos: pos + l]
300 pos += l
299 pos += l
301
300
302 # helper to reconstruct intermediate versions
301 # helper to reconstruct intermediate versions
303 def construct(text, base, rev):
302 def construct(text, base, rev):
304 for r in range(base + 1, rev + 1):
303 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
305 b = decompress(chunks[r])
304 return mdiff.patches(text, bins)
306 text = self.patch(text, b)
307 return text
308
305
309 # build deltas
306 # build deltas
310 deltas = []
307 deltas = []
311 for d in range(0, len(revs) - 1):
308 for d in xrange(0, len(revs) - 1):
312 a, b = revs[d], revs[d + 1]
309 a, b = revs[d], revs[d + 1]
313 n = self.node(b)
310 n = self.node(b)
314
311
315 if a + 1 != b or self.base(b) == b:
312 if a + 1 != b or self.base(b) == b:
316 if a >= 0:
313 if a >= 0:
317 base = self.base(a)
314 base = self.base(a)
318 ta = decompress(chunks[self.base(a)])
315 ta = decompress(chunks[self.base(a)])
319 ta = construct(ta, base, a)
316 ta = construct(ta, base, a)
320 else:
317 else:
321 ta = ""
318 ta = ""
322
319
323 base = self.base(b)
320 base = self.base(b)
324 if a > base:
321 if a > base:
325 base = a
322 base = a
326 tb = ta
323 tb = ta
327 else:
324 else:
328 tb = decompress(chunks[self.base(b)])
325 tb = decompress(chunks[self.base(b)])
329 tb = construct(tb, base, b)
326 tb = construct(tb, base, b)
330 d = self.diff(ta, tb)
327 d = self.diff(ta, tb)
331 else:
328 else:
332 d = decompress(chunks[b])
329 d = decompress(chunks[b])
333
330
334 p = self.parents(n)
331 p = self.parents(n)
335 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
332 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
336 l = struct.pack(">l", len(meta) + len(d) + 4)
333 l = struct.pack(">l", len(meta) + len(d) + 4)
337 deltas.append(l + meta + d)
334 deltas.append(l + meta + d)
338
335
339 l = struct.pack(">l", sum(map(len, deltas)) + 4)
336 l = struct.pack(">l", sum(map(len, deltas)) + 4)
340 deltas.insert(0, l)
337 deltas.insert(0, l)
341 return "".join(deltas)
338 return "".join(deltas)
342
339
343 def addgroup(self, data, linkmapper, transaction):
340 def addgroup(self, data, linkmapper, transaction):
344 # given a set of deltas, add them to the revision log. the
341 # given a set of deltas, add them to the revision log. the
345 # first delta is against its parent, which should be in our
342 # first delta is against its parent, which should be in our
346 # log, the rest are against the previous delta.
343 # log, the rest are against the previous delta.
347
344
348 if not data: return self.tip()
345 if not data: return self.tip()
349
346
350 # retrieve the parent revision of the delta chain
347 # retrieve the parent revision of the delta chain
351 chain = data[24:44]
348 chain = data[24:44]
352
349
353 # track the base of the current delta log
350 # track the base of the current delta log
354 r = self.count()
351 r = self.count()
355 t = r - 1
352 t = r - 1
356
353
357 base = prev = -1
354 base = prev = -1
358 start = end = 0
355 start = end = 0
359 if r:
356 if r:
360 start = self.start(self.base(t))
357 start = self.start(self.base(t))
361 end = self.end(t)
358 end = self.end(t)
362 measure = self.length(self.base(t))
359 measure = self.length(self.base(t))
363 base = self.base(t)
360 base = self.base(t)
364 prev = self.tip()
361 prev = self.tip()
365
362
366 transaction.add(self.datafile, end)
363 transaction.add(self.datafile, end)
367 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
364 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
368 dfh = self.opener(self.datafile, "a")
365 dfh = self.opener(self.datafile, "a")
369 ifh = self.opener(self.indexfile, "a")
366 ifh = self.opener(self.indexfile, "a")
370
367
371 # loop through our set of deltas
368 # loop through our set of deltas
372 pos = 0
369 pos = 0
373 while pos < len(data):
370 while pos < len(data):
374 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
371 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
375 data[pos:pos+84])
372 data[pos:pos+84])
376 link = linkmapper(cs)
373 link = linkmapper(cs)
377 delta = data[pos + 84:pos + l]
374 delta = data[pos + 84:pos + l]
378 pos += l
375 pos += l
379
376
380 # full versions are inserted when the needed deltas become
377 # full versions are inserted when the needed deltas become
381 # comparable to the uncompressed text or when the previous
378 # comparable to the uncompressed text or when the previous
382 # version is not the one we have a delta against. We use
379 # version is not the one we have a delta against. We use
383 # the size of the previous full rev as a proxy for the
380 # the size of the previous full rev as a proxy for the
384 # current size.
381 # current size.
385
382
386 if chain == prev:
383 if chain == prev:
387 cdelta = compress(delta)
384 cdelta = compress(delta)
388
385
389 if chain != prev or (end - start + len(cdelta)) > measure * 2:
386 if chain != prev or (end - start + len(cdelta)) > measure * 2:
390 # flush our writes here so we can read it in revision
387 # flush our writes here so we can read it in revision
391 dfh.flush()
388 dfh.flush()
392 ifh.flush()
389 ifh.flush()
393 text = self.revision(chain)
390 text = self.revision(chain)
394 text = self.patch(text, delta)
391 text = self.patch(text, delta)
395 chk = self.addrevision(text, transaction, link, p1, p2)
392 chk = self.addrevision(text, transaction, link, p1, p2)
396 if chk != node:
393 if chk != node:
397 raise "consistency error adding group"
394 raise "consistency error adding group"
398 measure = len(text)
395 measure = len(text)
399 else:
396 else:
400 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
397 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
401 self.index.append(e)
398 self.index.append(e)
402 self.nodemap[node] = r
399 self.nodemap[node] = r
403 dfh.write(cdelta)
400 dfh.write(cdelta)
404 ifh.write(struct.pack(indexformat, *e))
401 ifh.write(struct.pack(indexformat, *e))
405
402
406 t, r, chain, prev = r, r + 1, node, node
403 t, r, chain, prev = r, r + 1, node, node
407 start = self.start(self.base(t))
404 start = self.start(self.base(t))
408 end = self.end(t)
405 end = self.end(t)
409
406
410 dfh.close()
407 dfh.close()
411 ifh.close()
408 ifh.close()
412 return node
409 return node
General Comments 0
You need to be logged in to leave comments. Login now