##// END OF EJS Templates
Add revlog.reachable to find a graph of ancestors for a given rev
mason@suse.com -
r1074:55bf5cfd default
parent child Browse files
Show More
@@ -1,553 +1,575 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
4 # and O(changes) merge between branches
5 #
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
7 #
8 # This software may be used and distributed according to the terms
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
9 # of the GNU General Public License, incorporated herein by reference.
10
10
11 import zlib, struct, sha, binascii, heapq
11 import zlib, struct, sha, binascii, heapq
12 from mercurial import mdiff
12 from mercurial import mdiff
13
13
14 def hex(node): return binascii.hexlify(node)
14 def hex(node): return binascii.hexlify(node)
15 def bin(node): return binascii.unhexlify(node)
15 def bin(node): return binascii.unhexlify(node)
16 def short(node): return hex(node[:6])
16 def short(node): return hex(node[:6])
17
17
18 def compress(text):
18 def compress(text):
19 if not text: return text
19 if not text: return text
20 if len(text) < 44:
20 if len(text) < 44:
21 if text[0] == '\0': return text
21 if text[0] == '\0': return text
22 return 'u' + text
22 return 'u' + text
23 bin = zlib.compress(text)
23 bin = zlib.compress(text)
24 if len(bin) > len(text):
24 if len(bin) > len(text):
25 if text[0] == '\0': return text
25 if text[0] == '\0': return text
26 return 'u' + text
26 return 'u' + text
27 return bin
27 return bin
28
28
29 def decompress(bin):
29 def decompress(bin):
30 if not bin: return bin
30 if not bin: return bin
31 t = bin[0]
31 t = bin[0]
32 if t == '\0': return bin
32 if t == '\0': return bin
33 if t == 'x': return zlib.decompress(bin)
33 if t == 'x': return zlib.decompress(bin)
34 if t == 'u': return bin[1:]
34 if t == 'u': return bin[1:]
35 raise RevlogError("unknown compression type %s" % t)
35 raise RevlogError("unknown compression type %s" % t)
36
36
37 def hash(text, p1, p2):
37 def hash(text, p1, p2):
38 l = [p1, p2]
38 l = [p1, p2]
39 l.sort()
39 l.sort()
40 s = sha.new(l[0])
40 s = sha.new(l[0])
41 s.update(l[1])
41 s.update(l[1])
42 s.update(text)
42 s.update(text)
43 return s.digest()
43 return s.digest()
44
44
45 nullid = "\0" * 20
45 nullid = "\0" * 20
46 indexformat = ">4l20s20s20s"
46 indexformat = ">4l20s20s20s"
47
47
48 class lazyparser:
48 class lazyparser:
49 def __init__(self, data, revlog):
49 def __init__(self, data, revlog):
50 self.data = data
50 self.data = data
51 self.s = struct.calcsize(indexformat)
51 self.s = struct.calcsize(indexformat)
52 self.l = len(data)/self.s
52 self.l = len(data)/self.s
53 self.index = [None] * self.l
53 self.index = [None] * self.l
54 self.map = {nullid: -1}
54 self.map = {nullid: -1}
55 self.all = 0
55 self.all = 0
56 self.revlog = revlog
56 self.revlog = revlog
57
57
58 def load(self, pos=None):
58 def load(self, pos=None):
59 if self.all: return
59 if self.all: return
60 if pos is not None:
60 if pos is not None:
61 block = pos / 1000
61 block = pos / 1000
62 i = block * 1000
62 i = block * 1000
63 end = min(self.l, i + 1000)
63 end = min(self.l, i + 1000)
64 else:
64 else:
65 self.all = 1
65 self.all = 1
66 i = 0
66 i = 0
67 end = self.l
67 end = self.l
68 self.revlog.index = self.index
68 self.revlog.index = self.index
69 self.revlog.nodemap = self.map
69 self.revlog.nodemap = self.map
70
70
71 while i < end:
71 while i < end:
72 d = self.data[i * self.s: (i + 1) * self.s]
72 d = self.data[i * self.s: (i + 1) * self.s]
73 e = struct.unpack(indexformat, d)
73 e = struct.unpack(indexformat, d)
74 self.index[i] = e
74 self.index[i] = e
75 self.map[e[6]] = i
75 self.map[e[6]] = i
76 i += 1
76 i += 1
77
77
78 class lazyindex:
78 class lazyindex:
79 def __init__(self, parser):
79 def __init__(self, parser):
80 self.p = parser
80 self.p = parser
81 def __len__(self):
81 def __len__(self):
82 return len(self.p.index)
82 return len(self.p.index)
83 def load(self, pos):
83 def load(self, pos):
84 self.p.load(pos)
84 self.p.load(pos)
85 return self.p.index[pos]
85 return self.p.index[pos]
86 def __getitem__(self, pos):
86 def __getitem__(self, pos):
87 return self.p.index[pos] or self.load(pos)
87 return self.p.index[pos] or self.load(pos)
88 def append(self, e):
88 def append(self, e):
89 self.p.index.append(e)
89 self.p.index.append(e)
90
90
91 class lazymap:
91 class lazymap:
92 def __init__(self, parser):
92 def __init__(self, parser):
93 self.p = parser
93 self.p = parser
94 def load(self, key):
94 def load(self, key):
95 if self.p.all: return
95 if self.p.all: return
96 n = self.p.data.find(key)
96 n = self.p.data.find(key)
97 if n < 0: raise KeyError("node " + hex(key))
97 if n < 0: raise KeyError("node " + hex(key))
98 pos = n / self.p.s
98 pos = n / self.p.s
99 self.p.load(pos)
99 self.p.load(pos)
100 def __contains__(self, key):
100 def __contains__(self, key):
101 self.p.load()
101 self.p.load()
102 return key in self.p.map
102 return key in self.p.map
103 def __iter__(self):
103 def __iter__(self):
104 yield nullid
104 yield nullid
105 for i in xrange(self.p.l):
105 for i in xrange(self.p.l):
106 try:
106 try:
107 yield self.p.index[i][6]
107 yield self.p.index[i][6]
108 except:
108 except:
109 self.p.load(i)
109 self.p.load(i)
110 yield self.p.index[i][6]
110 yield self.p.index[i][6]
111 def __getitem__(self, key):
111 def __getitem__(self, key):
112 try:
112 try:
113 return self.p.map[key]
113 return self.p.map[key]
114 except KeyError:
114 except KeyError:
115 try:
115 try:
116 self.load(key)
116 self.load(key)
117 return self.p.map[key]
117 return self.p.map[key]
118 except KeyError:
118 except KeyError:
119 raise KeyError("node " + hex(key))
119 raise KeyError("node " + hex(key))
120 def __setitem__(self, key, val):
120 def __setitem__(self, key, val):
121 self.p.map[key] = val
121 self.p.map[key] = val
122
122
123 class RevlogError(Exception): pass
123 class RevlogError(Exception): pass
124
124
125 class revlog:
125 class revlog:
126 def __init__(self, opener, indexfile, datafile):
126 def __init__(self, opener, indexfile, datafile):
127 self.indexfile = indexfile
127 self.indexfile = indexfile
128 self.datafile = datafile
128 self.datafile = datafile
129 self.opener = opener
129 self.opener = opener
130 self.cache = None
130 self.cache = None
131
131
132 try:
132 try:
133 i = self.opener(self.indexfile).read()
133 i = self.opener(self.indexfile).read()
134 except IOError:
134 except IOError:
135 i = ""
135 i = ""
136
136
137 if len(i) > 10000:
137 if len(i) > 10000:
138 # big index, let's parse it on demand
138 # big index, let's parse it on demand
139 parser = lazyparser(i, self)
139 parser = lazyparser(i, self)
140 self.index = lazyindex(parser)
140 self.index = lazyindex(parser)
141 self.nodemap = lazymap(parser)
141 self.nodemap = lazymap(parser)
142 else:
142 else:
143 s = struct.calcsize(indexformat)
143 s = struct.calcsize(indexformat)
144 l = len(i) / s
144 l = len(i) / s
145 self.index = [None] * l
145 self.index = [None] * l
146 m = [None] * l
146 m = [None] * l
147
147
148 n = 0
148 n = 0
149 for f in xrange(0, len(i), s):
149 for f in xrange(0, len(i), s):
150 # offset, size, base, linkrev, p1, p2, nodeid
150 # offset, size, base, linkrev, p1, p2, nodeid
151 e = struct.unpack(indexformat, i[f:f + s])
151 e = struct.unpack(indexformat, i[f:f + s])
152 m[n] = (e[6], n)
152 m[n] = (e[6], n)
153 self.index[n] = e
153 self.index[n] = e
154 n += 1
154 n += 1
155
155
156 self.nodemap = dict(m)
156 self.nodemap = dict(m)
157 self.nodemap[nullid] = -1
157 self.nodemap[nullid] = -1
158
158
159 def tip(self): return self.node(len(self.index) - 1)
159 def tip(self): return self.node(len(self.index) - 1)
160 def count(self): return len(self.index)
160 def count(self): return len(self.index)
161 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
161 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
162 def rev(self, node): return self.nodemap[node]
162 def rev(self, node): return self.nodemap[node]
163 def linkrev(self, node): return self.index[self.nodemap[node]][3]
163 def linkrev(self, node): return self.index[self.nodemap[node]][3]
164 def parents(self, node):
164 def parents(self, node):
165 if node == nullid: return (nullid, nullid)
165 if node == nullid: return (nullid, nullid)
166 return self.index[self.nodemap[node]][4:6]
166 return self.index[self.nodemap[node]][4:6]
167
167
168 def start(self, rev): return self.index[rev][0]
168 def start(self, rev): return self.index[rev][0]
169 def length(self, rev): return self.index[rev][1]
169 def length(self, rev): return self.index[rev][1]
170 def end(self, rev): return self.start(rev) + self.length(rev)
170 def end(self, rev): return self.start(rev) + self.length(rev)
171 def base(self, rev): return self.index[rev][2]
171 def base(self, rev): return self.index[rev][2]
172
172
173 def reachable(self, rev, stop=None):
174 reachable = {}
175 visit = [rev]
176 reachable[rev] = 1
177 if stop:
178 stopn = self.rev(stop)
179 else:
180 stopn = 0
181 while visit:
182 n = visit.pop(0)
183 if n == stop:
184 continue
185 if n == nullid:
186 continue
187 for p in self.parents(n):
188 if self.rev(p) < stopn:
189 continue
190 if p not in reachable:
191 reachable[p] = 1
192 visit.append(p)
193 return reachable
194
173 def heads(self, stop=None):
195 def heads(self, stop=None):
174 p = {}
196 p = {}
175 h = []
197 h = []
176 stoprev = 0
198 stoprev = 0
177 if stop and stop in self.nodemap:
199 if stop and stop in self.nodemap:
178 stoprev = self.rev(stop)
200 stoprev = self.rev(stop)
179
201
180 for r in range(self.count() - 1, -1, -1):
202 for r in range(self.count() - 1, -1, -1):
181 n = self.node(r)
203 n = self.node(r)
182 if n not in p:
204 if n not in p:
183 h.append(n)
205 h.append(n)
184 if n == stop:
206 if n == stop:
185 break
207 break
186 if r < stoprev:
208 if r < stoprev:
187 break
209 break
188 for pn in self.parents(n):
210 for pn in self.parents(n):
189 p[pn] = 1
211 p[pn] = 1
190 return h
212 return h
191
213
192 def children(self, node):
214 def children(self, node):
193 c = []
215 c = []
194 p = self.rev(node)
216 p = self.rev(node)
195 for r in range(p + 1, self.count()):
217 for r in range(p + 1, self.count()):
196 n = self.node(r)
218 n = self.node(r)
197 for pn in self.parents(n):
219 for pn in self.parents(n):
198 if pn == node:
220 if pn == node:
199 c.append(n)
221 c.append(n)
200 continue
222 continue
201 elif pn == nullid:
223 elif pn == nullid:
202 continue
224 continue
203 return c
225 return c
204
226
205 def lookup(self, id):
227 def lookup(self, id):
206 try:
228 try:
207 rev = int(id)
229 rev = int(id)
208 if str(rev) != id: raise ValueError
230 if str(rev) != id: raise ValueError
209 if rev < 0: rev = self.count() + rev
231 if rev < 0: rev = self.count() + rev
210 if rev < 0 or rev >= self.count(): raise ValueError
232 if rev < 0 or rev >= self.count(): raise ValueError
211 return self.node(rev)
233 return self.node(rev)
212 except (ValueError, OverflowError):
234 except (ValueError, OverflowError):
213 c = []
235 c = []
214 for n in self.nodemap:
236 for n in self.nodemap:
215 if hex(n).startswith(id):
237 if hex(n).startswith(id):
216 c.append(n)
238 c.append(n)
217 if len(c) > 1: raise KeyError("Ambiguous identifier")
239 if len(c) > 1: raise KeyError("Ambiguous identifier")
218 if len(c) < 1: raise KeyError("No match found")
240 if len(c) < 1: raise KeyError("No match found")
219 return c[0]
241 return c[0]
220
242
221 return None
243 return None
222
244
223 def diff(self, a, b):
245 def diff(self, a, b):
224 return mdiff.textdiff(a, b)
246 return mdiff.textdiff(a, b)
225
247
226 def patches(self, t, pl):
248 def patches(self, t, pl):
227 return mdiff.patches(t, pl)
249 return mdiff.patches(t, pl)
228
250
229 def delta(self, node):
251 def delta(self, node):
230 r = self.rev(node)
252 r = self.rev(node)
231 b = self.base(r)
253 b = self.base(r)
232 if r == b:
254 if r == b:
233 return self.diff(self.revision(self.node(r - 1)),
255 return self.diff(self.revision(self.node(r - 1)),
234 self.revision(node))
256 self.revision(node))
235 else:
257 else:
236 f = self.opener(self.datafile)
258 f = self.opener(self.datafile)
237 f.seek(self.start(r))
259 f.seek(self.start(r))
238 data = f.read(self.length(r))
260 data = f.read(self.length(r))
239 return decompress(data)
261 return decompress(data)
240
262
241 def revision(self, node):
263 def revision(self, node):
242 if node == nullid: return ""
264 if node == nullid: return ""
243 if self.cache and self.cache[0] == node: return self.cache[2]
265 if self.cache and self.cache[0] == node: return self.cache[2]
244
266
245 text = None
267 text = None
246 rev = self.rev(node)
268 rev = self.rev(node)
247 start, length, base, link, p1, p2, node = self.index[rev]
269 start, length, base, link, p1, p2, node = self.index[rev]
248 end = start + length
270 end = start + length
249 if base != rev: start = self.start(base)
271 if base != rev: start = self.start(base)
250
272
251 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
273 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
252 base = self.cache[1]
274 base = self.cache[1]
253 start = self.start(base + 1)
275 start = self.start(base + 1)
254 text = self.cache[2]
276 text = self.cache[2]
255 last = 0
277 last = 0
256
278
257 f = self.opener(self.datafile)
279 f = self.opener(self.datafile)
258 f.seek(start)
280 f.seek(start)
259 data = f.read(end - start)
281 data = f.read(end - start)
260
282
261 if text is None:
283 if text is None:
262 last = self.length(base)
284 last = self.length(base)
263 text = decompress(data[:last])
285 text = decompress(data[:last])
264
286
265 bins = []
287 bins = []
266 for r in xrange(base + 1, rev + 1):
288 for r in xrange(base + 1, rev + 1):
267 s = self.length(r)
289 s = self.length(r)
268 bins.append(decompress(data[last:last + s]))
290 bins.append(decompress(data[last:last + s]))
269 last = last + s
291 last = last + s
270
292
271 text = mdiff.patches(text, bins)
293 text = mdiff.patches(text, bins)
272
294
273 if node != hash(text, p1, p2):
295 if node != hash(text, p1, p2):
274 raise IOError("integrity check failed on %s:%d"
296 raise IOError("integrity check failed on %s:%d"
275 % (self.datafile, rev))
297 % (self.datafile, rev))
276
298
277 self.cache = (node, rev, text)
299 self.cache = (node, rev, text)
278 return text
300 return text
279
301
280 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
281 if text is None: text = ""
303 if text is None: text = ""
282 if p1 is None: p1 = self.tip()
304 if p1 is None: p1 = self.tip()
283 if p2 is None: p2 = nullid
305 if p2 is None: p2 = nullid
284
306
285 node = hash(text, p1, p2)
307 node = hash(text, p1, p2)
286
308
287 if node in self.nodemap:
309 if node in self.nodemap:
288 return node
310 return node
289
311
290 n = self.count()
312 n = self.count()
291 t = n - 1
313 t = n - 1
292
314
293 if n:
315 if n:
294 base = self.base(t)
316 base = self.base(t)
295 start = self.start(base)
317 start = self.start(base)
296 end = self.end(t)
318 end = self.end(t)
297 if not d:
319 if not d:
298 prev = self.revision(self.tip())
320 prev = self.revision(self.tip())
299 d = self.diff(prev, text)
321 d = self.diff(prev, text)
300 data = compress(d)
322 data = compress(d)
301 dist = end - start + len(data)
323 dist = end - start + len(data)
302
324
303 # full versions are inserted when the needed deltas
325 # full versions are inserted when the needed deltas
304 # become comparable to the uncompressed text
326 # become comparable to the uncompressed text
305 if not n or dist > len(text) * 2:
327 if not n or dist > len(text) * 2:
306 data = compress(text)
328 data = compress(text)
307 base = n
329 base = n
308 else:
330 else:
309 base = self.base(t)
331 base = self.base(t)
310
332
311 offset = 0
333 offset = 0
312 if t >= 0:
334 if t >= 0:
313 offset = self.end(t)
335 offset = self.end(t)
314
336
315 e = (offset, len(data), base, link, p1, p2, node)
337 e = (offset, len(data), base, link, p1, p2, node)
316
338
317 self.index.append(e)
339 self.index.append(e)
318 self.nodemap[node] = n
340 self.nodemap[node] = n
319 entry = struct.pack(indexformat, *e)
341 entry = struct.pack(indexformat, *e)
320
342
321 transaction.add(self.datafile, e[0])
343 transaction.add(self.datafile, e[0])
322 self.opener(self.datafile, "a").write(data)
344 self.opener(self.datafile, "a").write(data)
323 transaction.add(self.indexfile, n * len(entry))
345 transaction.add(self.indexfile, n * len(entry))
324 self.opener(self.indexfile, "a").write(entry)
346 self.opener(self.indexfile, "a").write(entry)
325
347
326 self.cache = (node, n, text)
348 self.cache = (node, n, text)
327 return node
349 return node
328
350
329 def ancestor(self, a, b):
351 def ancestor(self, a, b):
330 # calculate the distance of every node from root
352 # calculate the distance of every node from root
331 dist = {nullid: 0}
353 dist = {nullid: 0}
332 for i in xrange(self.count()):
354 for i in xrange(self.count()):
333 n = self.node(i)
355 n = self.node(i)
334 p1, p2 = self.parents(n)
356 p1, p2 = self.parents(n)
335 dist[n] = max(dist[p1], dist[p2]) + 1
357 dist[n] = max(dist[p1], dist[p2]) + 1
336
358
337 # traverse ancestors in order of decreasing distance from root
359 # traverse ancestors in order of decreasing distance from root
338 def ancestors(node):
360 def ancestors(node):
339 # we store negative distances because heap returns smallest member
361 # we store negative distances because heap returns smallest member
340 h = [(-dist[node], node)]
362 h = [(-dist[node], node)]
341 seen = {}
363 seen = {}
342 earliest = self.count()
364 earliest = self.count()
343 while h:
365 while h:
344 d, n = heapq.heappop(h)
366 d, n = heapq.heappop(h)
345 if n not in seen:
367 if n not in seen:
346 seen[n] = 1
368 seen[n] = 1
347 r = self.rev(n)
369 r = self.rev(n)
348 yield (-d, r, n)
370 yield (-d, r, n)
349 for p in self.parents(n):
371 for p in self.parents(n):
350 heapq.heappush(h, (-dist[p], p))
372 heapq.heappush(h, (-dist[p], p))
351
373
352 x = ancestors(a)
374 x = ancestors(a)
353 y = ancestors(b)
375 y = ancestors(b)
354 lx = x.next()
376 lx = x.next()
355 ly = y.next()
377 ly = y.next()
356
378
357 # increment each ancestor list until it is closer to root than
379 # increment each ancestor list until it is closer to root than
358 # the other, or they match
380 # the other, or they match
359 while 1:
381 while 1:
360 if lx == ly:
382 if lx == ly:
361 return lx[2]
383 return lx[2]
362 elif lx < ly:
384 elif lx < ly:
363 ly = y.next()
385 ly = y.next()
364 elif lx > ly:
386 elif lx > ly:
365 lx = x.next()
387 lx = x.next()
366
388
367 def group(self, linkmap):
389 def group(self, linkmap):
368 # given a list of changeset revs, return a set of deltas and
390 # given a list of changeset revs, return a set of deltas and
369 # metadata corresponding to nodes. the first delta is
391 # metadata corresponding to nodes. the first delta is
370 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
371 # have this parent as it has all history before these
393 # have this parent as it has all history before these
372 # changesets. parent is parent[0]
394 # changesets. parent is parent[0]
373
395
374 revs = []
396 revs = []
375 needed = {}
397 needed = {}
376
398
377 # find file nodes/revs that match changeset revs
399 # find file nodes/revs that match changeset revs
378 for i in xrange(0, self.count()):
400 for i in xrange(0, self.count()):
379 if self.index[i][3] in linkmap:
401 if self.index[i][3] in linkmap:
380 revs.append(i)
402 revs.append(i)
381 needed[i] = 1
403 needed[i] = 1
382
404
383 # if we don't have any revisions touched by these changesets, bail
405 # if we don't have any revisions touched by these changesets, bail
384 if not revs:
406 if not revs:
385 yield struct.pack(">l", 0)
407 yield struct.pack(">l", 0)
386 return
408 return
387
409
388 # add the parent of the first rev
410 # add the parent of the first rev
389 p = self.parents(self.node(revs[0]))[0]
411 p = self.parents(self.node(revs[0]))[0]
390 revs.insert(0, self.rev(p))
412 revs.insert(0, self.rev(p))
391
413
392 # for each delta that isn't contiguous in the log, we need to
414 # for each delta that isn't contiguous in the log, we need to
393 # reconstruct the base, reconstruct the result, and then
415 # reconstruct the base, reconstruct the result, and then
394 # calculate the delta. We also need to do this where we've
416 # calculate the delta. We also need to do this where we've
395 # stored a full version and not a delta
417 # stored a full version and not a delta
396 for i in xrange(0, len(revs) - 1):
418 for i in xrange(0, len(revs) - 1):
397 a, b = revs[i], revs[i + 1]
419 a, b = revs[i], revs[i + 1]
398 if a + 1 != b or self.base(b) == b:
420 if a + 1 != b or self.base(b) == b:
399 for j in xrange(self.base(a), a + 1):
421 for j in xrange(self.base(a), a + 1):
400 needed[j] = 1
422 needed[j] = 1
401 for j in xrange(self.base(b), b + 1):
423 for j in xrange(self.base(b), b + 1):
402 needed[j] = 1
424 needed[j] = 1
403
425
404 # calculate spans to retrieve from datafile
426 # calculate spans to retrieve from datafile
405 needed = needed.keys()
427 needed = needed.keys()
406 needed.sort()
428 needed.sort()
407 spans = []
429 spans = []
408 oo = -1
430 oo = -1
409 ol = 0
431 ol = 0
410 for n in needed:
432 for n in needed:
411 if n < 0: continue
433 if n < 0: continue
412 o = self.start(n)
434 o = self.start(n)
413 l = self.length(n)
435 l = self.length(n)
414 if oo + ol == o: # can we merge with the previous?
436 if oo + ol == o: # can we merge with the previous?
415 nl = spans[-1][2]
437 nl = spans[-1][2]
416 nl.append((n, l))
438 nl.append((n, l))
417 ol += l
439 ol += l
418 spans[-1] = (oo, ol, nl)
440 spans[-1] = (oo, ol, nl)
419 else:
441 else:
420 oo = o
442 oo = o
421 ol = l
443 ol = l
422 spans.append((oo, ol, [(n, l)]))
444 spans.append((oo, ol, [(n, l)]))
423
445
424 # read spans in, divide up chunks
446 # read spans in, divide up chunks
425 chunks = {}
447 chunks = {}
426 for span in spans:
448 for span in spans:
427 # we reopen the file for each span to make http happy for now
449 # we reopen the file for each span to make http happy for now
428 f = self.opener(self.datafile)
450 f = self.opener(self.datafile)
429 f.seek(span[0])
451 f.seek(span[0])
430 data = f.read(span[1])
452 data = f.read(span[1])
431
453
432 # divide up the span
454 # divide up the span
433 pos = 0
455 pos = 0
434 for r, l in span[2]:
456 for r, l in span[2]:
435 chunks[r] = decompress(data[pos: pos + l])
457 chunks[r] = decompress(data[pos: pos + l])
436 pos += l
458 pos += l
437
459
438 # helper to reconstruct intermediate versions
460 # helper to reconstruct intermediate versions
439 def construct(text, base, rev):
461 def construct(text, base, rev):
440 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
462 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
441 return mdiff.patches(text, bins)
463 return mdiff.patches(text, bins)
442
464
443 # build deltas
465 # build deltas
444 deltas = []
466 deltas = []
445 for d in xrange(0, len(revs) - 1):
467 for d in xrange(0, len(revs) - 1):
446 a, b = revs[d], revs[d + 1]
468 a, b = revs[d], revs[d + 1]
447 n = self.node(b)
469 n = self.node(b)
448
470
449 # do we need to construct a new delta?
471 # do we need to construct a new delta?
450 if a + 1 != b or self.base(b) == b:
472 if a + 1 != b or self.base(b) == b:
451 if a >= 0:
473 if a >= 0:
452 base = self.base(a)
474 base = self.base(a)
453 ta = chunks[self.base(a)]
475 ta = chunks[self.base(a)]
454 ta = construct(ta, base, a)
476 ta = construct(ta, base, a)
455 else:
477 else:
456 ta = ""
478 ta = ""
457
479
458 base = self.base(b)
480 base = self.base(b)
459 if a > base:
481 if a > base:
460 base = a
482 base = a
461 tb = ta
483 tb = ta
462 else:
484 else:
463 tb = chunks[self.base(b)]
485 tb = chunks[self.base(b)]
464 tb = construct(tb, base, b)
486 tb = construct(tb, base, b)
465 d = self.diff(ta, tb)
487 d = self.diff(ta, tb)
466 else:
488 else:
467 d = chunks[b]
489 d = chunks[b]
468
490
469 p = self.parents(n)
491 p = self.parents(n)
470 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
492 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
471 l = struct.pack(">l", len(meta) + len(d) + 4)
493 l = struct.pack(">l", len(meta) + len(d) + 4)
472 yield l
494 yield l
473 yield meta
495 yield meta
474 yield d
496 yield d
475
497
476 yield struct.pack(">l", 0)
498 yield struct.pack(">l", 0)
477
499
478 def addgroup(self, revs, linkmapper, transaction, unique=0):
500 def addgroup(self, revs, linkmapper, transaction, unique=0):
479 # given a set of deltas, add them to the revision log. the
501 # given a set of deltas, add them to the revision log. the
480 # first delta is against its parent, which should be in our
502 # first delta is against its parent, which should be in our
481 # log, the rest are against the previous delta.
503 # log, the rest are against the previous delta.
482
504
483 # track the base of the current delta log
505 # track the base of the current delta log
484 r = self.count()
506 r = self.count()
485 t = r - 1
507 t = r - 1
486 node = nullid
508 node = nullid
487
509
488 base = prev = -1
510 base = prev = -1
489 start = end = measure = 0
511 start = end = measure = 0
490 if r:
512 if r:
491 start = self.start(self.base(t))
513 start = self.start(self.base(t))
492 end = self.end(t)
514 end = self.end(t)
493 measure = self.length(self.base(t))
515 measure = self.length(self.base(t))
494 base = self.base(t)
516 base = self.base(t)
495 prev = self.tip()
517 prev = self.tip()
496
518
497 transaction.add(self.datafile, end)
519 transaction.add(self.datafile, end)
498 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
520 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
499 dfh = self.opener(self.datafile, "a")
521 dfh = self.opener(self.datafile, "a")
500 ifh = self.opener(self.indexfile, "a")
522 ifh = self.opener(self.indexfile, "a")
501
523
502 # loop through our set of deltas
524 # loop through our set of deltas
503 chain = None
525 chain = None
504 for chunk in revs:
526 for chunk in revs:
505 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
527 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
506 link = linkmapper(cs)
528 link = linkmapper(cs)
507 if node in self.nodemap:
529 if node in self.nodemap:
508 # this can happen if two branches make the same change
530 # this can happen if two branches make the same change
509 if unique:
531 if unique:
510 raise RevlogError("already have %s" % hex(node[:4]))
532 raise RevlogError("already have %s" % hex(node[:4]))
511 chain = node
533 chain = node
512 continue
534 continue
513 delta = chunk[80:]
535 delta = chunk[80:]
514
536
515 if not chain:
537 if not chain:
516 # retrieve the parent revision of the delta chain
538 # retrieve the parent revision of the delta chain
517 chain = p1
539 chain = p1
518 if not chain in self.nodemap:
540 if not chain in self.nodemap:
519 raise RevlogError("unknown base %s" % short(chain[:4]))
541 raise RevlogError("unknown base %s" % short(chain[:4]))
520
542
521 # full versions are inserted when the needed deltas become
543 # full versions are inserted when the needed deltas become
522 # comparable to the uncompressed text or when the previous
544 # comparable to the uncompressed text or when the previous
523 # version is not the one we have a delta against. We use
545 # version is not the one we have a delta against. We use
524 # the size of the previous full rev as a proxy for the
546 # the size of the previous full rev as a proxy for the
525 # current size.
547 # current size.
526
548
527 if chain == prev:
549 if chain == prev:
528 cdelta = compress(delta)
550 cdelta = compress(delta)
529
551
530 if chain != prev or (end - start + len(cdelta)) > measure * 2:
552 if chain != prev or (end - start + len(cdelta)) > measure * 2:
531 # flush our writes here so we can read it in revision
553 # flush our writes here so we can read it in revision
532 dfh.flush()
554 dfh.flush()
533 ifh.flush()
555 ifh.flush()
534 text = self.revision(chain)
556 text = self.revision(chain)
535 text = self.patches(text, [delta])
557 text = self.patches(text, [delta])
536 chk = self.addrevision(text, transaction, link, p1, p2)
558 chk = self.addrevision(text, transaction, link, p1, p2)
537 if chk != node:
559 if chk != node:
538 raise RevlogError("consistency error adding group")
560 raise RevlogError("consistency error adding group")
539 measure = len(text)
561 measure = len(text)
540 else:
562 else:
541 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
563 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
542 self.index.append(e)
564 self.index.append(e)
543 self.nodemap[node] = r
565 self.nodemap[node] = r
544 dfh.write(cdelta)
566 dfh.write(cdelta)
545 ifh.write(struct.pack(indexformat, *e))
567 ifh.write(struct.pack(indexformat, *e))
546
568
547 t, r, chain, prev = r, r + 1, node, node
569 t, r, chain, prev = r, r + 1, node, node
548 start = self.start(self.base(t))
570 start = self.start(self.base(t))
549 end = self.end(t)
571 end = self.end(t)
550
572
551 dfh.close()
573 dfh.close()
552 ifh.close()
574 ifh.close()
553 return node
575 return node
General Comments 0
You need to be logged in to leave comments. Login now