##// END OF EJS Templates
Make compression more intelligent:...
mpm@selenic.com -
r112:aea6562a default
parent child Browse files
Show More
@@ -1,461 +1,474 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
4 # and O(changes) merge between branches
5 #
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
7 #
8 # This software may be used and distributed according to the terms
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
9 # of the GNU General Public License, incorporated herein by reference.
10
10
11 import zlib, struct, sha, os, tempfile, binascii
11 import zlib, struct, sha, os, tempfile, binascii
12 from mercurial import mdiff
12 from mercurial import mdiff
13
13
14 def hex(node): return binascii.hexlify(node)
14 def hex(node): return binascii.hexlify(node)
15 def bin(node): return binascii.unhexlify(node)
15 def bin(node): return binascii.unhexlify(node)
16 def short(node): return hex(node[:4])
16 def short(node): return hex(node[:4])
17
17
18 def compress(text):
18 def compress(text):
19 return zlib.compress(text)
19 if not text: return text
20 if len(text) < 44:
21 if text[0] == '\0': return text
22 return 'u' + text
23 bin = zlib.compress(text)
24 if len(bin) > len(text):
25 if text[0] == '\0': return text
26 return 'u' + text
27 return bin
20
28
21 def decompress(bin):
29 def decompress(bin):
22 return zlib.decompress(bin)
30 if not bin: return bin
31 t = bin[0]
32 if t == '\0': return bin
33 if t == 'x': return zlib.decompress(bin)
34 if t == 'u': return bin[1:]
35 raise "unknown compression type %s" % t
23
36
24 def hash(text, p1, p2):
37 def hash(text, p1, p2):
25 l = [p1, p2]
38 l = [p1, p2]
26 l.sort()
39 l.sort()
27 return sha.sha(l[0] + l[1] + text).digest()
40 return sha.sha(l[0] + l[1] + text).digest()
28
41
29 nullid = "\0" * 20
42 nullid = "\0" * 20
30 indexformat = ">4l20s20s20s"
43 indexformat = ">4l20s20s20s"
31
44
32 class lazyparser:
45 class lazyparser:
33 def __init__(self, data):
46 def __init__(self, data):
34 self.data = data
47 self.data = data
35 self.s = struct.calcsize(indexformat)
48 self.s = struct.calcsize(indexformat)
36 self.l = len(data)/self.s
49 self.l = len(data)/self.s
37 self.index = [None] * self.l
50 self.index = [None] * self.l
38 self.map = {nullid: -1}
51 self.map = {nullid: -1}
39
52
40 if 0:
53 if 0:
41 n = 0
54 n = 0
42 i = self.data
55 i = self.data
43 s = struct.calcsize(indexformat)
56 s = struct.calcsize(indexformat)
44 for f in xrange(0, len(i), s):
57 for f in xrange(0, len(i), s):
45 # offset, size, base, linkrev, p1, p2, nodeid
58 # offset, size, base, linkrev, p1, p2, nodeid
46 e = struct.unpack(indexformat, i[f:f + s])
59 e = struct.unpack(indexformat, i[f:f + s])
47 self.map[e[6]] = n
60 self.map[e[6]] = n
48 self.index.append(e)
61 self.index.append(e)
49 n += 1
62 n += 1
50
63
51 def load(self, pos):
64 def load(self, pos):
52 block = pos / 1000
65 block = pos / 1000
53 i = block * 1000
66 i = block * 1000
54 end = min(self.l, i + 1000)
67 end = min(self.l, i + 1000)
55 while i < end:
68 while i < end:
56 d = self.data[i * self.s: (i + 1) * self.s]
69 d = self.data[i * self.s: (i + 1) * self.s]
57 e = struct.unpack(indexformat, d)
70 e = struct.unpack(indexformat, d)
58 self.index[i] = e
71 self.index[i] = e
59 self.map[e[6]] = i
72 self.map[e[6]] = i
60 i += 1
73 i += 1
61
74
62 class lazyindex:
75 class lazyindex:
63 def __init__(self, parser):
76 def __init__(self, parser):
64 self.p = parser
77 self.p = parser
65 def __len__(self):
78 def __len__(self):
66 return len(self.p.index)
79 return len(self.p.index)
67 def __getitem__(self, pos):
80 def __getitem__(self, pos):
68 i = self.p.index[pos]
81 i = self.p.index[pos]
69 if not i:
82 if not i:
70 self.p.load(pos)
83 self.p.load(pos)
71 return self.p.index[pos]
84 return self.p.index[pos]
72 return i
85 return i
73 def append(self, e):
86 def append(self, e):
74 self.p.index.append(e)
87 self.p.index.append(e)
75
88
76 class lazymap:
89 class lazymap:
77 def __init__(self, parser):
90 def __init__(self, parser):
78 self.p = parser
91 self.p = parser
79 def load(self, key):
92 def load(self, key):
80 n = self.p.data.find(key)
93 n = self.p.data.find(key)
81 if n < 0: raise KeyError("node " + hex(key))
94 if n < 0: raise KeyError("node " + hex(key))
82 pos = n / self.p.s
95 pos = n / self.p.s
83 self.p.load(pos)
96 self.p.load(pos)
84 def __contains__(self, key):
97 def __contains__(self, key):
85 try:
98 try:
86 self[key]
99 self[key]
87 return True
100 return True
88 except KeyError:
101 except KeyError:
89 return False
102 return False
90 def __iter__(self):
103 def __iter__(self):
91 for i in xrange(self.p.l):
104 for i in xrange(self.p.l):
92 try:
105 try:
93 yield self.p.index[i][6]
106 yield self.p.index[i][6]
94 except:
107 except:
95 self.p.load(i)
108 self.p.load(i)
96 yield self.p.index[i][6]
109 yield self.p.index[i][6]
97 def __getitem__(self, key):
110 def __getitem__(self, key):
98 try:
111 try:
99 return self.p.map[key]
112 return self.p.map[key]
100 except KeyError:
113 except KeyError:
101 try:
114 try:
102 self.load(key)
115 self.load(key)
103 return self.p.map[key]
116 return self.p.map[key]
104 except KeyError:
117 except KeyError:
105 raise KeyError("node " + hex(key))
118 raise KeyError("node " + hex(key))
106 def __setitem__(self, key, val):
119 def __setitem__(self, key, val):
107 self.p.map[key] = val
120 self.p.map[key] = val
108
121
109 class revlog:
122 class revlog:
110 def __init__(self, opener, indexfile, datafile):
123 def __init__(self, opener, indexfile, datafile):
111 self.indexfile = indexfile
124 self.indexfile = indexfile
112 self.datafile = datafile
125 self.datafile = datafile
113 self.opener = opener
126 self.opener = opener
114 self.cache = None
127 self.cache = None
115 # read the whole index for now, handle on-demand later
128 # read the whole index for now, handle on-demand later
116 try:
129 try:
117 i = self.opener(self.indexfile).read()
130 i = self.opener(self.indexfile).read()
118 except IOError:
131 except IOError:
119 i = ""
132 i = ""
120 parser = lazyparser(i)
133 parser = lazyparser(i)
121 self.index = lazyindex(parser)
134 self.index = lazyindex(parser)
122 self.nodemap = lazymap(parser)
135 self.nodemap = lazymap(parser)
123
136
124 def tip(self): return self.node(len(self.index) - 1)
137 def tip(self): return self.node(len(self.index) - 1)
125 def count(self): return len(self.index)
138 def count(self): return len(self.index)
126 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
139 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
127 def rev(self, node): return self.nodemap[node]
140 def rev(self, node): return self.nodemap[node]
128 def linkrev(self, node): return self.index[self.nodemap[node]][3]
141 def linkrev(self, node): return self.index[self.nodemap[node]][3]
129 def parents(self, node):
142 def parents(self, node):
130 if node == nullid: return (nullid, nullid)
143 if node == nullid: return (nullid, nullid)
131 return self.index[self.nodemap[node]][4:6]
144 return self.index[self.nodemap[node]][4:6]
132
145
133 def start(self, rev): return self.index[rev][0]
146 def start(self, rev): return self.index[rev][0]
134 def length(self, rev): return self.index[rev][1]
147 def length(self, rev): return self.index[rev][1]
135 def end(self, rev): return self.start(rev) + self.length(rev)
148 def end(self, rev): return self.start(rev) + self.length(rev)
136 def base(self, rev): return self.index[rev][2]
149 def base(self, rev): return self.index[rev][2]
137
150
138 def lookup(self, id):
151 def lookup(self, id):
139 try:
152 try:
140 rev = int(id)
153 rev = int(id)
141 return self.node(rev)
154 return self.node(rev)
142 except ValueError:
155 except ValueError:
143 c = []
156 c = []
144 for n in self.nodemap:
157 for n in self.nodemap:
145 if id in hex(n):
158 if id in hex(n):
146 c.append(n)
159 c.append(n)
147 if len(c) > 1: raise KeyError("Ambiguous identifier")
160 if len(c) > 1: raise KeyError("Ambiguous identifier")
148 if len(c) < 1: raise KeyError("No match found")
161 if len(c) < 1: raise KeyError("No match found")
149 return c[0]
162 return c[0]
150
163
151 return None
164 return None
152
165
153 def diff(self, a, b):
166 def diff(self, a, b):
154 return mdiff.textdiff(a, b)
167 return mdiff.textdiff(a, b)
155
168
156 def patches(self, t, pl):
169 def patches(self, t, pl):
157 return mdiff.patches(t, pl)
170 return mdiff.patches(t, pl)
158
171
159 def revision(self, node):
172 def revision(self, node):
160 if node == nullid: return ""
173 if node == nullid: return ""
161 if self.cache and self.cache[0] == node: return self.cache[2]
174 if self.cache and self.cache[0] == node: return self.cache[2]
162
175
163 text = None
176 text = None
164 rev = self.rev(node)
177 rev = self.rev(node)
165 base = self.base(rev)
178 base = self.base(rev)
166 start = self.start(base)
179 start = self.start(base)
167 end = self.end(rev)
180 end = self.end(rev)
168
181
169 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
182 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
170 base = self.cache[1]
183 base = self.cache[1]
171 start = self.start(base + 1)
184 start = self.start(base + 1)
172 text = self.cache[2]
185 text = self.cache[2]
173 last = 0
186 last = 0
174
187
175 f = self.opener(self.datafile)
188 f = self.opener(self.datafile)
176 f.seek(start)
189 f.seek(start)
177 data = f.read(end - start)
190 data = f.read(end - start)
178
191
179 if not text:
192 if not text:
180 last = self.length(base)
193 last = self.length(base)
181 text = decompress(data[:last])
194 text = decompress(data[:last])
182
195
183 bins = []
196 bins = []
184 for r in xrange(base + 1, rev + 1):
197 for r in xrange(base + 1, rev + 1):
185 s = self.length(r)
198 s = self.length(r)
186 bins.append(decompress(data[last:last + s]))
199 bins.append(decompress(data[last:last + s]))
187 last = last + s
200 last = last + s
188
201
189 text = mdiff.patches(text, bins)
202 text = mdiff.patches(text, bins)
190
203
191 (p1, p2) = self.parents(node)
204 (p1, p2) = self.parents(node)
192 if node != hash(text, p1, p2):
205 if node != hash(text, p1, p2):
193 raise IOError("integrity check failed on %s:%d"
206 raise IOError("integrity check failed on %s:%d"
194 % (self.datafile, rev))
207 % (self.datafile, rev))
195
208
196 self.cache = (node, rev, text)
209 self.cache = (node, rev, text)
197 return text
210 return text
198
211
199 def addrevision(self, text, transaction, link, p1=None, p2=None):
212 def addrevision(self, text, transaction, link, p1=None, p2=None):
200 if text is None: text = ""
213 if text is None: text = ""
201 if p1 is None: p1 = self.tip()
214 if p1 is None: p1 = self.tip()
202 if p2 is None: p2 = nullid
215 if p2 is None: p2 = nullid
203
216
204 node = hash(text, p1, p2)
217 node = hash(text, p1, p2)
205
218
206 n = self.count()
219 n = self.count()
207 t = n - 1
220 t = n - 1
208
221
209 if n:
222 if n:
210 base = self.base(t)
223 base = self.base(t)
211 start = self.start(base)
224 start = self.start(base)
212 end = self.end(t)
225 end = self.end(t)
213 prev = self.revision(self.tip())
226 prev = self.revision(self.tip())
214 d = self.diff(prev, text)
227 d = self.diff(prev, text)
215 if self.patches(prev, [d]) != text:
228 if self.patches(prev, [d]) != text:
216 raise AssertionError("diff failed")
229 raise AssertionError("diff failed")
217 data = compress(d)
230 data = compress(d)
218 dist = end - start + len(data)
231 dist = end - start + len(data)
219
232
220 # full versions are inserted when the needed deltas
233 # full versions are inserted when the needed deltas
221 # become comparable to the uncompressed text
234 # become comparable to the uncompressed text
222 if not n or dist > len(text) * 2:
235 if not n or dist > len(text) * 2:
223 data = compress(text)
236 data = compress(text)
224 base = n
237 base = n
225 else:
238 else:
226 base = self.base(t)
239 base = self.base(t)
227
240
228 offset = 0
241 offset = 0
229 if t >= 0:
242 if t >= 0:
230 offset = self.end(t)
243 offset = self.end(t)
231
244
232 e = (offset, len(data), base, link, p1, p2, node)
245 e = (offset, len(data), base, link, p1, p2, node)
233
246
234 self.index.append(e)
247 self.index.append(e)
235 self.nodemap[node] = n
248 self.nodemap[node] = n
236 entry = struct.pack(indexformat, *e)
249 entry = struct.pack(indexformat, *e)
237
250
238 transaction.add(self.datafile, e[0])
251 transaction.add(self.datafile, e[0])
239 self.opener(self.datafile, "a").write(data)
252 self.opener(self.datafile, "a").write(data)
240 transaction.add(self.indexfile, n * len(entry))
253 transaction.add(self.indexfile, n * len(entry))
241 self.opener(self.indexfile, "a").write(entry)
254 self.opener(self.indexfile, "a").write(entry)
242
255
243 self.cache = (node, n, text)
256 self.cache = (node, n, text)
244 return node
257 return node
245
258
246 def ancestor(self, a, b):
259 def ancestor(self, a, b):
247 def expand(list, map):
260 def expand(list, map):
248 a = []
261 a = []
249 while list:
262 while list:
250 n = list.pop(0)
263 n = list.pop(0)
251 map[n] = 1
264 map[n] = 1
252 yield n
265 yield n
253 for p in self.parents(n):
266 for p in self.parents(n):
254 if p != nullid and p not in map:
267 if p != nullid and p not in map:
255 list.append(p)
268 list.append(p)
256 yield nullid
269 yield nullid
257
270
258 amap = {}
271 amap = {}
259 bmap = {}
272 bmap = {}
260 ag = expand([a], amap)
273 ag = expand([a], amap)
261 bg = expand([b], bmap)
274 bg = expand([b], bmap)
262 adone = bdone = 0
275 adone = bdone = 0
263
276
264 while not adone or not bdone:
277 while not adone or not bdone:
265 if not adone:
278 if not adone:
266 an = ag.next()
279 an = ag.next()
267 if an == nullid:
280 if an == nullid:
268 adone = 1
281 adone = 1
269 elif an in bmap:
282 elif an in bmap:
270 return an
283 return an
271 if not bdone:
284 if not bdone:
272 bn = bg.next()
285 bn = bg.next()
273 if bn == nullid:
286 if bn == nullid:
274 bdone = 1
287 bdone = 1
275 elif bn in amap:
288 elif bn in amap:
276 return bn
289 return bn
277
290
278 return nullid
291 return nullid
279
292
280 def group(self, linkmap):
293 def group(self, linkmap):
281 # given a list of changeset revs, return a set of deltas and
294 # given a list of changeset revs, return a set of deltas and
282 # metadata corresponding to nodes. the first delta is
295 # metadata corresponding to nodes. the first delta is
283 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
296 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
284 # have this parent as it has all history before these
297 # have this parent as it has all history before these
285 # changesets. parent is parent[0]
298 # changesets. parent is parent[0]
286
299
287 revs = []
300 revs = []
288 needed = {}
301 needed = {}
289
302
290 # find file nodes/revs that match changeset revs
303 # find file nodes/revs that match changeset revs
291 for i in xrange(0, self.count()):
304 for i in xrange(0, self.count()):
292 if self.index[i][3] in linkmap:
305 if self.index[i][3] in linkmap:
293 revs.append(i)
306 revs.append(i)
294 needed[i] = 1
307 needed[i] = 1
295
308
296 # if we don't have any revisions touched by these changesets, bail
309 # if we don't have any revisions touched by these changesets, bail
297 if not revs: return struct.pack(">l", 0)
310 if not revs: return struct.pack(">l", 0)
298
311
299 # add the parent of the first rev
312 # add the parent of the first rev
300 p = self.parents(self.node(revs[0]))[0]
313 p = self.parents(self.node(revs[0]))[0]
301 revs.insert(0, self.rev(p))
314 revs.insert(0, self.rev(p))
302
315
303 # for each delta that isn't contiguous in the log, we need to
316 # for each delta that isn't contiguous in the log, we need to
304 # reconstruct the base, reconstruct the result, and then
317 # reconstruct the base, reconstruct the result, and then
305 # calculate the delta. We also need to do this where we've
318 # calculate the delta. We also need to do this where we've
306 # stored a full version and not a delta
319 # stored a full version and not a delta
307 for i in xrange(0, len(revs) - 1):
320 for i in xrange(0, len(revs) - 1):
308 a, b = revs[i], revs[i + 1]
321 a, b = revs[i], revs[i + 1]
309 if a + 1 != b or self.base(b) == b:
322 if a + 1 != b or self.base(b) == b:
310 for j in xrange(self.base(a), a + 1):
323 for j in xrange(self.base(a), a + 1):
311 needed[j] = 1
324 needed[j] = 1
312 for j in xrange(self.base(b), b + 1):
325 for j in xrange(self.base(b), b + 1):
313 needed[j] = 1
326 needed[j] = 1
314
327
315 # calculate spans to retrieve from datafile
328 # calculate spans to retrieve from datafile
316 needed = needed.keys()
329 needed = needed.keys()
317 needed.sort()
330 needed.sort()
318 spans = []
331 spans = []
319 for n in needed:
332 for n in needed:
320 if n < 0: continue
333 if n < 0: continue
321 o = self.start(n)
334 o = self.start(n)
322 l = self.length(n)
335 l = self.length(n)
323 spans.append((o, l, [(n, l)]))
336 spans.append((o, l, [(n, l)]))
324
337
325 # merge spans
338 # merge spans
326 merge = [spans.pop(0)]
339 merge = [spans.pop(0)]
327 while spans:
340 while spans:
328 e = spans.pop(0)
341 e = spans.pop(0)
329 f = merge[-1]
342 f = merge[-1]
330 if e[0] == f[0] + f[1]:
343 if e[0] == f[0] + f[1]:
331 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
344 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
332 else:
345 else:
333 merge.append(e)
346 merge.append(e)
334
347
335 # read spans in, divide up chunks
348 # read spans in, divide up chunks
336 chunks = {}
349 chunks = {}
337 for span in merge:
350 for span in merge:
338 # we reopen the file for each span to make http happy for now
351 # we reopen the file for each span to make http happy for now
339 f = self.opener(self.datafile)
352 f = self.opener(self.datafile)
340 f.seek(span[0])
353 f.seek(span[0])
341 data = f.read(span[1])
354 data = f.read(span[1])
342
355
343 # divide up the span
356 # divide up the span
344 pos = 0
357 pos = 0
345 for r, l in span[2]:
358 for r, l in span[2]:
346 chunks[r] = data[pos: pos + l]
359 chunks[r] = data[pos: pos + l]
347 pos += l
360 pos += l
348
361
349 # helper to reconstruct intermediate versions
362 # helper to reconstruct intermediate versions
350 def construct(text, base, rev):
363 def construct(text, base, rev):
351 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
364 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
352 return mdiff.patches(text, bins)
365 return mdiff.patches(text, bins)
353
366
354 # build deltas
367 # build deltas
355 deltas = []
368 deltas = []
356 for d in xrange(0, len(revs) - 1):
369 for d in xrange(0, len(revs) - 1):
357 a, b = revs[d], revs[d + 1]
370 a, b = revs[d], revs[d + 1]
358 n = self.node(b)
371 n = self.node(b)
359
372
360 if a + 1 != b or self.base(b) == b:
373 if a + 1 != b or self.base(b) == b:
361 if a >= 0:
374 if a >= 0:
362 base = self.base(a)
375 base = self.base(a)
363 ta = decompress(chunks[self.base(a)])
376 ta = decompress(chunks[self.base(a)])
364 ta = construct(ta, base, a)
377 ta = construct(ta, base, a)
365 else:
378 else:
366 ta = ""
379 ta = ""
367
380
368 base = self.base(b)
381 base = self.base(b)
369 if a > base:
382 if a > base:
370 base = a
383 base = a
371 tb = ta
384 tb = ta
372 else:
385 else:
373 tb = decompress(chunks[self.base(b)])
386 tb = decompress(chunks[self.base(b)])
374 tb = construct(tb, base, b)
387 tb = construct(tb, base, b)
375 d = self.diff(ta, tb)
388 d = self.diff(ta, tb)
376 else:
389 else:
377 d = decompress(chunks[b])
390 d = decompress(chunks[b])
378
391
379 p = self.parents(n)
392 p = self.parents(n)
380 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
393 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
381 l = struct.pack(">l", len(meta) + len(d) + 4)
394 l = struct.pack(">l", len(meta) + len(d) + 4)
382 deltas.append(l + meta + d)
395 deltas.append(l + meta + d)
383
396
384 l = struct.pack(">l", sum(map(len, deltas)) + 4)
397 l = struct.pack(">l", sum(map(len, deltas)) + 4)
385 deltas.insert(0, l)
398 deltas.insert(0, l)
386 return "".join(deltas)
399 return "".join(deltas)
387
400
388 def addgroup(self, data, linkmapper, transaction):
401 def addgroup(self, data, linkmapper, transaction):
389 # given a set of deltas, add them to the revision log. the
402 # given a set of deltas, add them to the revision log. the
390 # first delta is against its parent, which should be in our
403 # first delta is against its parent, which should be in our
391 # log, the rest are against the previous delta.
404 # log, the rest are against the previous delta.
392
405
393 if not data: return self.tip()
406 if not data: return self.tip()
394
407
395 # retrieve the parent revision of the delta chain
408 # retrieve the parent revision of the delta chain
396 chain = data[24:44]
409 chain = data[24:44]
397 if not chain in self.nodemap:
410 if not chain in self.nodemap:
398 raise "unknown base %s" % short(chain[:4])
411 raise "unknown base %s" % short(chain[:4])
399
412
400 # track the base of the current delta log
413 # track the base of the current delta log
401 r = self.count()
414 r = self.count()
402 t = r - 1
415 t = r - 1
403
416
404 base = prev = -1
417 base = prev = -1
405 start = end = 0
418 start = end = 0
406 if r:
419 if r:
407 start = self.start(self.base(t))
420 start = self.start(self.base(t))
408 end = self.end(t)
421 end = self.end(t)
409 measure = self.length(self.base(t))
422 measure = self.length(self.base(t))
410 base = self.base(t)
423 base = self.base(t)
411 prev = self.tip()
424 prev = self.tip()
412
425
413 transaction.add(self.datafile, end)
426 transaction.add(self.datafile, end)
414 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
427 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
415 dfh = self.opener(self.datafile, "a")
428 dfh = self.opener(self.datafile, "a")
416 ifh = self.opener(self.indexfile, "a")
429 ifh = self.opener(self.indexfile, "a")
417
430
418 # loop through our set of deltas
431 # loop through our set of deltas
419 pos = 0
432 pos = 0
420 while pos < len(data):
433 while pos < len(data):
421 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
434 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
422 data[pos:pos+84])
435 data[pos:pos+84])
423 link = linkmapper(cs)
436 link = linkmapper(cs)
424 if node in self.nodemap:
437 if node in self.nodemap:
425 raise "already have %s" % hex(node[:4])
438 raise "already have %s" % hex(node[:4])
426 delta = data[pos + 84:pos + l]
439 delta = data[pos + 84:pos + l]
427 pos += l
440 pos += l
428
441
429 # full versions are inserted when the needed deltas become
442 # full versions are inserted when the needed deltas become
430 # comparable to the uncompressed text or when the previous
443 # comparable to the uncompressed text or when the previous
431 # version is not the one we have a delta against. We use
444 # version is not the one we have a delta against. We use
432 # the size of the previous full rev as a proxy for the
445 # the size of the previous full rev as a proxy for the
433 # current size.
446 # current size.
434
447
435 if chain == prev:
448 if chain == prev:
436 cdelta = compress(delta)
449 cdelta = compress(delta)
437
450
438 if chain != prev or (end - start + len(cdelta)) > measure * 2:
451 if chain != prev or (end - start + len(cdelta)) > measure * 2:
439 # flush our writes here so we can read it in revision
452 # flush our writes here so we can read it in revision
440 dfh.flush()
453 dfh.flush()
441 ifh.flush()
454 ifh.flush()
442 text = self.revision(chain)
455 text = self.revision(chain)
443 text = self.patches(text, [delta])
456 text = self.patches(text, [delta])
444 chk = self.addrevision(text, transaction, link, p1, p2)
457 chk = self.addrevision(text, transaction, link, p1, p2)
445 if chk != node:
458 if chk != node:
446 raise "consistency error adding group"
459 raise "consistency error adding group"
447 measure = len(text)
460 measure = len(text)
448 else:
461 else:
449 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
462 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
450 self.index.append(e)
463 self.index.append(e)
451 self.nodemap[node] = r
464 self.nodemap[node] = r
452 dfh.write(cdelta)
465 dfh.write(cdelta)
453 ifh.write(struct.pack(indexformat, *e))
466 ifh.write(struct.pack(indexformat, *e))
454
467
455 t, r, chain, prev = r, r + 1, node, node
468 t, r, chain, prev = r, r + 1, node, node
456 start = self.start(self.base(t))
469 start = self.start(self.base(t))
457 end = self.end(t)
470 end = self.end(t)
458
471
459 dfh.close()
472 dfh.close()
460 ifh.close()
473 ifh.close()
461 return node
474 return node
General Comments 0
You need to be logged in to leave comments. Login now