##// END OF EJS Templates
Only use lazy indexing for big indices and avoid the overhead of the...
mpm@selenic.com -
r116:e484cd5e default
parent child Browse files
Show More
@@ -1,473 +1,493 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
4 # and O(changes) merge between branches
5 #
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
7 #
8 # This software may be used and distributed according to the terms
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
9 # of the GNU General Public License, incorporated herein by reference.
10
10
11 import zlib, struct, sha, os, tempfile, binascii
11 import zlib, struct, sha, os, tempfile, binascii
12 from mercurial import mdiff
12 from mercurial import mdiff
13
13
14 def hex(node): return binascii.hexlify(node)
14 def hex(node): return binascii.hexlify(node)
15 def bin(node): return binascii.unhexlify(node)
15 def bin(node): return binascii.unhexlify(node)
16 def short(node): return hex(node[:4])
16 def short(node): return hex(node[:4])
17
17
18 def compress(text):
18 def compress(text):
19 if not text: return text
19 if not text: return text
20 if len(text) < 44:
20 if len(text) < 44:
21 if text[0] == '\0': return text
21 if text[0] == '\0': return text
22 return 'u' + text
22 return 'u' + text
23 bin = zlib.compress(text)
23 bin = zlib.compress(text)
24 if len(bin) > len(text):
24 if len(bin) > len(text):
25 if text[0] == '\0': return text
25 if text[0] == '\0': return text
26 return 'u' + text
26 return 'u' + text
27 return bin
27 return bin
28
28
29 def decompress(bin):
29 def decompress(bin):
30 if not bin: return bin
30 if not bin: return bin
31 t = bin[0]
31 t = bin[0]
32 if t == '\0': return bin
32 if t == '\0': return bin
33 if t == 'x': return zlib.decompress(bin)
33 if t == 'x': return zlib.decompress(bin)
34 if t == 'u': return bin[1:]
34 if t == 'u': return bin[1:]
35 raise "unknown compression type %s" % t
35 raise "unknown compression type %s" % t
36
36
37 def hash(text, p1, p2):
37 def hash(text, p1, p2):
38 l = [p1, p2]
38 l = [p1, p2]
39 l.sort()
39 l.sort()
40 return sha.sha(l[0] + l[1] + text).digest()
40 return sha.sha(l[0] + l[1] + text).digest()
41
41
42 nullid = "\0" * 20
42 nullid = "\0" * 20
43 indexformat = ">4l20s20s20s"
43 indexformat = ">4l20s20s20s"
44
44
45 class lazyparser:
45 class lazyparser:
46 def __init__(self, data):
46 def __init__(self, data):
47 self.data = data
47 self.data = data
48 self.s = struct.calcsize(indexformat)
48 self.s = struct.calcsize(indexformat)
49 self.l = len(data)/self.s
49 self.l = len(data)/self.s
50 self.index = [None] * self.l
50 self.index = [None] * self.l
51 self.map = {nullid: -1}
51 self.map = {nullid: -1}
52
52
53 if 0:
53 if 0:
54 n = 0
54 n = 0
55 i = self.data
55 i = self.data
56 s = struct.calcsize(indexformat)
56 s = struct.calcsize(indexformat)
57 for f in xrange(0, len(i), s):
57 for f in xrange(0, len(i), s):
58 # offset, size, base, linkrev, p1, p2, nodeid
58 # offset, size, base, linkrev, p1, p2, nodeid
59 e = struct.unpack(indexformat, i[f:f + s])
59 e = struct.unpack(indexformat, i[f:f + s])
60 self.map[e[6]] = n
60 self.map[e[6]] = n
61 self.index.append(e)
61 self.index.append(e)
62 n += 1
62 n += 1
63
63
64 def load(self, pos):
64 def load(self, pos):
65 block = pos / 1000
65 block = pos / 1000
66 i = block * 1000
66 i = block * 1000
67 end = min(self.l, i + 1000)
67 end = min(self.l, i + 1000)
68 while i < end:
68 while i < end:
69 d = self.data[i * self.s: (i + 1) * self.s]
69 d = self.data[i * self.s: (i + 1) * self.s]
70 e = struct.unpack(indexformat, d)
70 e = struct.unpack(indexformat, d)
71 self.index[i] = e
71 self.index[i] = e
72 self.map[e[6]] = i
72 self.map[e[6]] = i
73 i += 1
73 i += 1
74
74
75 class lazyindex:
75 class lazyindex:
76 def __init__(self, parser):
76 def __init__(self, parser):
77 self.p = parser
77 self.p = parser
78 def __len__(self):
78 def __len__(self):
79 return len(self.p.index)
79 return len(self.p.index)
80 def load(self, pos):
80 def load(self, pos):
81 self.p.load(pos)
81 self.p.load(pos)
82 return self.p.index[pos]
82 return self.p.index[pos]
83 def __getitem__(self, pos):
83 def __getitem__(self, pos):
84 return self.p.index[pos] or self.load(pos)
84 return self.p.index[pos] or self.load(pos)
85 def append(self, e):
85 def append(self, e):
86 self.p.index.append(e)
86 self.p.index.append(e)
87
87
88 class lazymap:
88 class lazymap:
89 def __init__(self, parser):
89 def __init__(self, parser):
90 self.p = parser
90 self.p = parser
91 def load(self, key):
91 def load(self, key):
92 n = self.p.data.find(key)
92 n = self.p.data.find(key)
93 if n < 0: raise KeyError("node " + hex(key))
93 if n < 0: raise KeyError("node " + hex(key))
94 pos = n / self.p.s
94 pos = n / self.p.s
95 self.p.load(pos)
95 self.p.load(pos)
96 def __contains__(self, key):
96 def __contains__(self, key):
97 try:
97 try:
98 self[key]
98 self[key]
99 return True
99 return True
100 except KeyError:
100 except KeyError:
101 return False
101 return False
102 def __iter__(self):
102 def __iter__(self):
103 for i in xrange(self.p.l):
103 for i in xrange(self.p.l):
104 try:
104 try:
105 yield self.p.index[i][6]
105 yield self.p.index[i][6]
106 except:
106 except:
107 self.p.load(i)
107 self.p.load(i)
108 yield self.p.index[i][6]
108 yield self.p.index[i][6]
109 def __getitem__(self, key):
109 def __getitem__(self, key):
110 try:
110 try:
111 return self.p.map[key]
111 return self.p.map[key]
112 except KeyError:
112 except KeyError:
113 try:
113 try:
114 self.load(key)
114 self.load(key)
115 return self.p.map[key]
115 return self.p.map[key]
116 except KeyError:
116 except KeyError:
117 raise KeyError("node " + hex(key))
117 raise KeyError("node " + hex(key))
118 def __setitem__(self, key, val):
118 def __setitem__(self, key, val):
119 self.p.map[key] = val
119 self.p.map[key] = val
120
120
121 class revlog:
121 class revlog:
122 def __init__(self, opener, indexfile, datafile):
122 def __init__(self, opener, indexfile, datafile):
123 self.indexfile = indexfile
123 self.indexfile = indexfile
124 self.datafile = datafile
124 self.datafile = datafile
125 self.opener = opener
125 self.opener = opener
126 self.cache = None
126 self.cache = None
127 # read the whole index for now, handle on-demand later
127
128 try:
128 try:
129 i = self.opener(self.indexfile).read()
129 i = self.opener(self.indexfile).read()
130 except IOError:
130 except IOError:
131 i = ""
131 i = ""
132
133 if len(i) > 10000:
134 # big index, let's parse it on demand
132 parser = lazyparser(i)
135 parser = lazyparser(i)
133 self.index = lazyindex(parser)
136 self.index = lazyindex(parser)
134 self.nodemap = lazymap(parser)
137 self.nodemap = lazymap(parser)
138 else:
139 s = struct.calcsize(indexformat)
140 l = len(i) / s
141 self.index = [None] * l
142 m = [None] * l
143
144 n = 0
145 for f in xrange(0, len(i), s):
146 # offset, size, base, linkrev, p1, p2, nodeid
147 e = struct.unpack(indexformat, i[f:f + s])
148 m[n] = (e[6], n)
149 self.index[n] = e
150 n += 1
151
152 self.nodemap = dict(m)
153 self.nodemap[nullid] = -1
154
135
155
136 def tip(self): return self.node(len(self.index) - 1)
156 def tip(self): return self.node(len(self.index) - 1)
137 def count(self): return len(self.index)
157 def count(self): return len(self.index)
138 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
158 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
139 def rev(self, node): return self.nodemap[node]
159 def rev(self, node): return self.nodemap[node]
140 def linkrev(self, node): return self.index[self.nodemap[node]][3]
160 def linkrev(self, node): return self.index[self.nodemap[node]][3]
141 def parents(self, node):
161 def parents(self, node):
142 if node == nullid: return (nullid, nullid)
162 if node == nullid: return (nullid, nullid)
143 return self.index[self.nodemap[node]][4:6]
163 return self.index[self.nodemap[node]][4:6]
144
164
145 def start(self, rev): return self.index[rev][0]
165 def start(self, rev): return self.index[rev][0]
146 def length(self, rev): return self.index[rev][1]
166 def length(self, rev): return self.index[rev][1]
147 def end(self, rev): return self.start(rev) + self.length(rev)
167 def end(self, rev): return self.start(rev) + self.length(rev)
148 def base(self, rev): return self.index[rev][2]
168 def base(self, rev): return self.index[rev][2]
149
169
150 def lookup(self, id):
170 def lookup(self, id):
151 try:
171 try:
152 rev = int(id)
172 rev = int(id)
153 return self.node(rev)
173 return self.node(rev)
154 except ValueError:
174 except ValueError:
155 c = []
175 c = []
156 for n in self.nodemap:
176 for n in self.nodemap:
157 if id in hex(n):
177 if id in hex(n):
158 c.append(n)
178 c.append(n)
159 if len(c) > 1: raise KeyError("Ambiguous identifier")
179 if len(c) > 1: raise KeyError("Ambiguous identifier")
160 if len(c) < 1: raise KeyError("No match found")
180 if len(c) < 1: raise KeyError("No match found")
161 return c[0]
181 return c[0]
162
182
163 return None
183 return None
164
184
165 def diff(self, a, b):
185 def diff(self, a, b):
166 return mdiff.textdiff(a, b)
186 return mdiff.textdiff(a, b)
167
187
168 def patches(self, t, pl):
188 def patches(self, t, pl):
169 return mdiff.patches(t, pl)
189 return mdiff.patches(t, pl)
170
190
171 def revision(self, node):
191 def revision(self, node):
172 if node == nullid: return ""
192 if node == nullid: return ""
173 if self.cache and self.cache[0] == node: return self.cache[2]
193 if self.cache and self.cache[0] == node: return self.cache[2]
174
194
175 text = None
195 text = None
176 rev = self.rev(node)
196 rev = self.rev(node)
177 base = self.base(rev)
197 base = self.base(rev)
178 start = self.start(base)
198 start = self.start(base)
179 end = self.end(rev)
199 end = self.end(rev)
180
200
181 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
201 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
182 base = self.cache[1]
202 base = self.cache[1]
183 start = self.start(base + 1)
203 start = self.start(base + 1)
184 text = self.cache[2]
204 text = self.cache[2]
185 last = 0
205 last = 0
186
206
187 f = self.opener(self.datafile)
207 f = self.opener(self.datafile)
188 f.seek(start)
208 f.seek(start)
189 data = f.read(end - start)
209 data = f.read(end - start)
190
210
191 if not text:
211 if not text:
192 last = self.length(base)
212 last = self.length(base)
193 text = decompress(data[:last])
213 text = decompress(data[:last])
194
214
195 bins = []
215 bins = []
196 for r in xrange(base + 1, rev + 1):
216 for r in xrange(base + 1, rev + 1):
197 s = self.length(r)
217 s = self.length(r)
198 bins.append(decompress(data[last:last + s]))
218 bins.append(decompress(data[last:last + s]))
199 last = last + s
219 last = last + s
200
220
201 text = mdiff.patches(text, bins)
221 text = mdiff.patches(text, bins)
202
222
203 (p1, p2) = self.parents(node)
223 (p1, p2) = self.parents(node)
204 if node != hash(text, p1, p2):
224 if node != hash(text, p1, p2):
205 raise IOError("integrity check failed on %s:%d"
225 raise IOError("integrity check failed on %s:%d"
206 % (self.datafile, rev))
226 % (self.datafile, rev))
207
227
208 self.cache = (node, rev, text)
228 self.cache = (node, rev, text)
209 return text
229 return text
210
230
211 def addrevision(self, text, transaction, link, p1=None, p2=None):
231 def addrevision(self, text, transaction, link, p1=None, p2=None):
212 if text is None: text = ""
232 if text is None: text = ""
213 if p1 is None: p1 = self.tip()
233 if p1 is None: p1 = self.tip()
214 if p2 is None: p2 = nullid
234 if p2 is None: p2 = nullid
215
235
216 node = hash(text, p1, p2)
236 node = hash(text, p1, p2)
217
237
218 n = self.count()
238 n = self.count()
219 t = n - 1
239 t = n - 1
220
240
221 if n:
241 if n:
222 base = self.base(t)
242 base = self.base(t)
223 start = self.start(base)
243 start = self.start(base)
224 end = self.end(t)
244 end = self.end(t)
225 prev = self.revision(self.tip())
245 prev = self.revision(self.tip())
226 d = self.diff(prev, text)
246 d = self.diff(prev, text)
227 if self.patches(prev, [d]) != text:
247 if self.patches(prev, [d]) != text:
228 raise AssertionError("diff failed")
248 raise AssertionError("diff failed")
229 data = compress(d)
249 data = compress(d)
230 dist = end - start + len(data)
250 dist = end - start + len(data)
231
251
232 # full versions are inserted when the needed deltas
252 # full versions are inserted when the needed deltas
233 # become comparable to the uncompressed text
253 # become comparable to the uncompressed text
234 if not n or dist > len(text) * 2:
254 if not n or dist > len(text) * 2:
235 data = compress(text)
255 data = compress(text)
236 base = n
256 base = n
237 else:
257 else:
238 base = self.base(t)
258 base = self.base(t)
239
259
240 offset = 0
260 offset = 0
241 if t >= 0:
261 if t >= 0:
242 offset = self.end(t)
262 offset = self.end(t)
243
263
244 e = (offset, len(data), base, link, p1, p2, node)
264 e = (offset, len(data), base, link, p1, p2, node)
245
265
246 self.index.append(e)
266 self.index.append(e)
247 self.nodemap[node] = n
267 self.nodemap[node] = n
248 entry = struct.pack(indexformat, *e)
268 entry = struct.pack(indexformat, *e)
249
269
250 transaction.add(self.datafile, e[0])
270 transaction.add(self.datafile, e[0])
251 self.opener(self.datafile, "a").write(data)
271 self.opener(self.datafile, "a").write(data)
252 transaction.add(self.indexfile, n * len(entry))
272 transaction.add(self.indexfile, n * len(entry))
253 self.opener(self.indexfile, "a").write(entry)
273 self.opener(self.indexfile, "a").write(entry)
254
274
255 self.cache = (node, n, text)
275 self.cache = (node, n, text)
256 return node
276 return node
257
277
258 def ancestor(self, a, b):
278 def ancestor(self, a, b):
259 def expand(list, map):
279 def expand(list, map):
260 a = []
280 a = []
261 while list:
281 while list:
262 n = list.pop(0)
282 n = list.pop(0)
263 map[n] = 1
283 map[n] = 1
264 yield n
284 yield n
265 for p in self.parents(n):
285 for p in self.parents(n):
266 if p != nullid and p not in map:
286 if p != nullid and p not in map:
267 list.append(p)
287 list.append(p)
268 yield nullid
288 yield nullid
269
289
270 amap = {}
290 amap = {}
271 bmap = {}
291 bmap = {}
272 ag = expand([a], amap)
292 ag = expand([a], amap)
273 bg = expand([b], bmap)
293 bg = expand([b], bmap)
274 adone = bdone = 0
294 adone = bdone = 0
275
295
276 while not adone or not bdone:
296 while not adone or not bdone:
277 if not adone:
297 if not adone:
278 an = ag.next()
298 an = ag.next()
279 if an == nullid:
299 if an == nullid:
280 adone = 1
300 adone = 1
281 elif an in bmap:
301 elif an in bmap:
282 return an
302 return an
283 if not bdone:
303 if not bdone:
284 bn = bg.next()
304 bn = bg.next()
285 if bn == nullid:
305 if bn == nullid:
286 bdone = 1
306 bdone = 1
287 elif bn in amap:
307 elif bn in amap:
288 return bn
308 return bn
289
309
290 return nullid
310 return nullid
291
311
292 def group(self, linkmap):
312 def group(self, linkmap):
293 # given a list of changeset revs, return a set of deltas and
313 # given a list of changeset revs, return a set of deltas and
294 # metadata corresponding to nodes. the first delta is
314 # metadata corresponding to nodes. the first delta is
295 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
315 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
296 # have this parent as it has all history before these
316 # have this parent as it has all history before these
297 # changesets. parent is parent[0]
317 # changesets. parent is parent[0]
298
318
299 revs = []
319 revs = []
300 needed = {}
320 needed = {}
301
321
302 # find file nodes/revs that match changeset revs
322 # find file nodes/revs that match changeset revs
303 for i in xrange(0, self.count()):
323 for i in xrange(0, self.count()):
304 if self.index[i][3] in linkmap:
324 if self.index[i][3] in linkmap:
305 revs.append(i)
325 revs.append(i)
306 needed[i] = 1
326 needed[i] = 1
307
327
308 # if we don't have any revisions touched by these changesets, bail
328 # if we don't have any revisions touched by these changesets, bail
309 if not revs: return struct.pack(">l", 0)
329 if not revs: return struct.pack(">l", 0)
310
330
311 # add the parent of the first rev
331 # add the parent of the first rev
312 p = self.parents(self.node(revs[0]))[0]
332 p = self.parents(self.node(revs[0]))[0]
313 revs.insert(0, self.rev(p))
333 revs.insert(0, self.rev(p))
314
334
315 # for each delta that isn't contiguous in the log, we need to
335 # for each delta that isn't contiguous in the log, we need to
316 # reconstruct the base, reconstruct the result, and then
336 # reconstruct the base, reconstruct the result, and then
317 # calculate the delta. We also need to do this where we've
337 # calculate the delta. We also need to do this where we've
318 # stored a full version and not a delta
338 # stored a full version and not a delta
319 for i in xrange(0, len(revs) - 1):
339 for i in xrange(0, len(revs) - 1):
320 a, b = revs[i], revs[i + 1]
340 a, b = revs[i], revs[i + 1]
321 if a + 1 != b or self.base(b) == b:
341 if a + 1 != b or self.base(b) == b:
322 for j in xrange(self.base(a), a + 1):
342 for j in xrange(self.base(a), a + 1):
323 needed[j] = 1
343 needed[j] = 1
324 for j in xrange(self.base(b), b + 1):
344 for j in xrange(self.base(b), b + 1):
325 needed[j] = 1
345 needed[j] = 1
326
346
327 # calculate spans to retrieve from datafile
347 # calculate spans to retrieve from datafile
328 needed = needed.keys()
348 needed = needed.keys()
329 needed.sort()
349 needed.sort()
330 spans = []
350 spans = []
331 for n in needed:
351 for n in needed:
332 if n < 0: continue
352 if n < 0: continue
333 o = self.start(n)
353 o = self.start(n)
334 l = self.length(n)
354 l = self.length(n)
335 spans.append((o, l, [(n, l)]))
355 spans.append((o, l, [(n, l)]))
336
356
337 # merge spans
357 # merge spans
338 merge = [spans.pop(0)]
358 merge = [spans.pop(0)]
339 while spans:
359 while spans:
340 e = spans.pop(0)
360 e = spans.pop(0)
341 f = merge[-1]
361 f = merge[-1]
342 if e[0] == f[0] + f[1]:
362 if e[0] == f[0] + f[1]:
343 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
363 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
344 else:
364 else:
345 merge.append(e)
365 merge.append(e)
346
366
347 # read spans in, divide up chunks
367 # read spans in, divide up chunks
348 chunks = {}
368 chunks = {}
349 for span in merge:
369 for span in merge:
350 # we reopen the file for each span to make http happy for now
370 # we reopen the file for each span to make http happy for now
351 f = self.opener(self.datafile)
371 f = self.opener(self.datafile)
352 f.seek(span[0])
372 f.seek(span[0])
353 data = f.read(span[1])
373 data = f.read(span[1])
354
374
355 # divide up the span
375 # divide up the span
356 pos = 0
376 pos = 0
357 for r, l in span[2]:
377 for r, l in span[2]:
358 chunks[r] = data[pos: pos + l]
378 chunks[r] = data[pos: pos + l]
359 pos += l
379 pos += l
360
380
361 # helper to reconstruct intermediate versions
381 # helper to reconstruct intermediate versions
362 def construct(text, base, rev):
382 def construct(text, base, rev):
363 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
383 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
364 return mdiff.patches(text, bins)
384 return mdiff.patches(text, bins)
365
385
366 # build deltas
386 # build deltas
367 deltas = []
387 deltas = []
368 for d in xrange(0, len(revs) - 1):
388 for d in xrange(0, len(revs) - 1):
369 a, b = revs[d], revs[d + 1]
389 a, b = revs[d], revs[d + 1]
370 n = self.node(b)
390 n = self.node(b)
371
391
372 if a + 1 != b or self.base(b) == b:
392 if a + 1 != b or self.base(b) == b:
373 if a >= 0:
393 if a >= 0:
374 base = self.base(a)
394 base = self.base(a)
375 ta = decompress(chunks[self.base(a)])
395 ta = decompress(chunks[self.base(a)])
376 ta = construct(ta, base, a)
396 ta = construct(ta, base, a)
377 else:
397 else:
378 ta = ""
398 ta = ""
379
399
380 base = self.base(b)
400 base = self.base(b)
381 if a > base:
401 if a > base:
382 base = a
402 base = a
383 tb = ta
403 tb = ta
384 else:
404 else:
385 tb = decompress(chunks[self.base(b)])
405 tb = decompress(chunks[self.base(b)])
386 tb = construct(tb, base, b)
406 tb = construct(tb, base, b)
387 d = self.diff(ta, tb)
407 d = self.diff(ta, tb)
388 else:
408 else:
389 d = decompress(chunks[b])
409 d = decompress(chunks[b])
390
410
391 p = self.parents(n)
411 p = self.parents(n)
392 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
412 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
393 l = struct.pack(">l", len(meta) + len(d) + 4)
413 l = struct.pack(">l", len(meta) + len(d) + 4)
394 deltas.append(l + meta + d)
414 deltas.append(l + meta + d)
395
415
396 l = struct.pack(">l", sum(map(len, deltas)) + 4)
416 l = struct.pack(">l", sum(map(len, deltas)) + 4)
397 deltas.insert(0, l)
417 deltas.insert(0, l)
398 return "".join(deltas)
418 return "".join(deltas)
399
419
400 def addgroup(self, data, linkmapper, transaction):
420 def addgroup(self, data, linkmapper, transaction):
401 # given a set of deltas, add them to the revision log. the
421 # given a set of deltas, add them to the revision log. the
402 # first delta is against its parent, which should be in our
422 # first delta is against its parent, which should be in our
403 # log, the rest are against the previous delta.
423 # log, the rest are against the previous delta.
404
424
405 if not data: return self.tip()
425 if not data: return self.tip()
406
426
407 # retrieve the parent revision of the delta chain
427 # retrieve the parent revision of the delta chain
408 chain = data[24:44]
428 chain = data[24:44]
409 if not chain in self.nodemap:
429 if not chain in self.nodemap:
410 raise "unknown base %s" % short(chain[:4])
430 raise "unknown base %s" % short(chain[:4])
411
431
412 # track the base of the current delta log
432 # track the base of the current delta log
413 r = self.count()
433 r = self.count()
414 t = r - 1
434 t = r - 1
415
435
416 base = prev = -1
436 base = prev = -1
417 start = end = 0
437 start = end = 0
418 if r:
438 if r:
419 start = self.start(self.base(t))
439 start = self.start(self.base(t))
420 end = self.end(t)
440 end = self.end(t)
421 measure = self.length(self.base(t))
441 measure = self.length(self.base(t))
422 base = self.base(t)
442 base = self.base(t)
423 prev = self.tip()
443 prev = self.tip()
424
444
425 transaction.add(self.datafile, end)
445 transaction.add(self.datafile, end)
426 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
446 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
427 dfh = self.opener(self.datafile, "a")
447 dfh = self.opener(self.datafile, "a")
428 ifh = self.opener(self.indexfile, "a")
448 ifh = self.opener(self.indexfile, "a")
429
449
430 # loop through our set of deltas
450 # loop through our set of deltas
431 pos = 0
451 pos = 0
432 while pos < len(data):
452 while pos < len(data):
433 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
453 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
434 data[pos:pos+84])
454 data[pos:pos+84])
435 link = linkmapper(cs)
455 link = linkmapper(cs)
436 if node in self.nodemap:
456 if node in self.nodemap:
437 raise "already have %s" % hex(node[:4])
457 raise "already have %s" % hex(node[:4])
438 delta = data[pos + 84:pos + l]
458 delta = data[pos + 84:pos + l]
439 pos += l
459 pos += l
440
460
441 # full versions are inserted when the needed deltas become
461 # full versions are inserted when the needed deltas become
442 # comparable to the uncompressed text or when the previous
462 # comparable to the uncompressed text or when the previous
443 # version is not the one we have a delta against. We use
463 # version is not the one we have a delta against. We use
444 # the size of the previous full rev as a proxy for the
464 # the size of the previous full rev as a proxy for the
445 # current size.
465 # current size.
446
466
447 if chain == prev:
467 if chain == prev:
448 cdelta = compress(delta)
468 cdelta = compress(delta)
449
469
450 if chain != prev or (end - start + len(cdelta)) > measure * 2:
470 if chain != prev or (end - start + len(cdelta)) > measure * 2:
451 # flush our writes here so we can read it in revision
471 # flush our writes here so we can read it in revision
452 dfh.flush()
472 dfh.flush()
453 ifh.flush()
473 ifh.flush()
454 text = self.revision(chain)
474 text = self.revision(chain)
455 text = self.patches(text, [delta])
475 text = self.patches(text, [delta])
456 chk = self.addrevision(text, transaction, link, p1, p2)
476 chk = self.addrevision(text, transaction, link, p1, p2)
457 if chk != node:
477 if chk != node:
458 raise "consistency error adding group"
478 raise "consistency error adding group"
459 measure = len(text)
479 measure = len(text)
460 else:
480 else:
461 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
481 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
462 self.index.append(e)
482 self.index.append(e)
463 self.nodemap[node] = r
483 self.nodemap[node] = r
464 dfh.write(cdelta)
484 dfh.write(cdelta)
465 ifh.write(struct.pack(indexformat, *e))
485 ifh.write(struct.pack(indexformat, *e))
466
486
467 t, r, chain, prev = r, r + 1, node, node
487 t, r, chain, prev = r, r + 1, node, node
468 start = self.start(self.base(t))
488 start = self.start(self.base(t))
469 end = self.end(t)
489 end = self.end(t)
470
490
471 dfh.close()
491 dfh.close()
472 ifh.close()
492 ifh.close()
473 return node
493 return node
General Comments 0
You need to be logged in to leave comments. Login now