##// END OF EJS Templates
Add some docstrings to revlog.py
mpm@selenic.com -
r1083:30974cf7 default
parent child Browse files
Show More
@@ -1,575 +1,650 b''
1 # revlog.py - storage back-end for mercurial
1 """
2 #
2 revlog.py - storage back-end for mercurial
3 # This provides efficient delta storage with O(1) retrieve and append
3
4 # and O(changes) merge between branches
4 This provides efficient delta storage with O(1) retrieve and append
5 #
5 and O(changes) merge between branches
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6
7 #
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 # This software may be used and distributed according to the terms
8
9 # of the GNU General Public License, incorporated herein by reference.
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
11 """
10
12
11 import zlib, struct, sha, binascii, heapq
13 import zlib, struct, sha, binascii, heapq
12 from mercurial import mdiff
14 from mercurial import mdiff
13
15
14 def hex(node): return binascii.hexlify(node)
16 def hex(node): return binascii.hexlify(node)
15 def bin(node): return binascii.unhexlify(node)
17 def bin(node): return binascii.unhexlify(node)
16 def short(node): return hex(node[:6])
18 def short(node): return hex(node[:6])
17
19
18 def compress(text):
20 def compress(text):
21 """ generate a possibly-compressed representation of text """
19 if not text: return text
22 if not text: return text
20 if len(text) < 44:
23 if len(text) < 44:
21 if text[0] == '\0': return text
24 if text[0] == '\0': return text
22 return 'u' + text
25 return 'u' + text
23 bin = zlib.compress(text)
26 bin = zlib.compress(text)
24 if len(bin) > len(text):
27 if len(bin) > len(text):
25 if text[0] == '\0': return text
28 if text[0] == '\0': return text
26 return 'u' + text
29 return 'u' + text
27 return bin
30 return bin
28
31
29 def decompress(bin):
32 def decompress(bin):
33 """ decompress the given input """
30 if not bin: return bin
34 if not bin: return bin
31 t = bin[0]
35 t = bin[0]
32 if t == '\0': return bin
36 if t == '\0': return bin
33 if t == 'x': return zlib.decompress(bin)
37 if t == 'x': return zlib.decompress(bin)
34 if t == 'u': return bin[1:]
38 if t == 'u': return bin[1:]
35 raise RevlogError("unknown compression type %s" % t)
39 raise RevlogError("unknown compression type %s" % t)
36
40
37 def hash(text, p1, p2):
41 def hash(text, p1, p2):
42 """generate a hash from the given text and its parent hashes
43
44 This hash combines both the current file contents and its history
45 in a manner that makes it easy to distinguish nodes with the same
46 content in the revision graph.
47 """
38 l = [p1, p2]
48 l = [p1, p2]
39 l.sort()
49 l.sort()
40 s = sha.new(l[0])
50 s = sha.new(l[0])
41 s.update(l[1])
51 s.update(l[1])
42 s.update(text)
52 s.update(text)
43 return s.digest()
53 return s.digest()
44
54
45 nullid = "\0" * 20
55 nullid = "\0" * 20
46 indexformat = ">4l20s20s20s"
56 indexformat = ">4l20s20s20s"
47
57
48 class lazyparser:
58 class lazyparser:
59 """
60 this class avoids the need to parse the entirety of large indices
61
62 By default we parse and load 1000 entries at a time.
63
64 If no position is specified, we load the whole index, and replace
65 the lazy objects in revlog with the underlying objects for
66 efficiency in cases where we look at most of the nodes.
67 """
49 def __init__(self, data, revlog):
68 def __init__(self, data, revlog):
50 self.data = data
69 self.data = data
51 self.s = struct.calcsize(indexformat)
70 self.s = struct.calcsize(indexformat)
52 self.l = len(data)/self.s
71 self.l = len(data)/self.s
53 self.index = [None] * self.l
72 self.index = [None] * self.l
54 self.map = {nullid: -1}
73 self.map = {nullid: -1}
55 self.all = 0
74 self.all = 0
56 self.revlog = revlog
75 self.revlog = revlog
57
76
58 def load(self, pos=None):
77 def load(self, pos=None):
59 if self.all: return
78 if self.all: return
60 if pos is not None:
79 if pos is not None:
61 block = pos / 1000
80 block = pos / 1000
62 i = block * 1000
81 i = block * 1000
63 end = min(self.l, i + 1000)
82 end = min(self.l, i + 1000)
64 else:
83 else:
65 self.all = 1
84 self.all = 1
66 i = 0
85 i = 0
67 end = self.l
86 end = self.l
68 self.revlog.index = self.index
87 self.revlog.index = self.index
69 self.revlog.nodemap = self.map
88 self.revlog.nodemap = self.map
70
89
71 while i < end:
90 while i < end:
72 d = self.data[i * self.s: (i + 1) * self.s]
91 d = self.data[i * self.s: (i + 1) * self.s]
73 e = struct.unpack(indexformat, d)
92 e = struct.unpack(indexformat, d)
74 self.index[i] = e
93 self.index[i] = e
75 self.map[e[6]] = i
94 self.map[e[6]] = i
76 i += 1
95 i += 1
77
96
78 class lazyindex:
97 class lazyindex:
98 """a lazy version of the index array"""
79 def __init__(self, parser):
99 def __init__(self, parser):
80 self.p = parser
100 self.p = parser
81 def __len__(self):
101 def __len__(self):
82 return len(self.p.index)
102 return len(self.p.index)
83 def load(self, pos):
103 def load(self, pos):
84 self.p.load(pos)
104 self.p.load(pos)
85 return self.p.index[pos]
105 return self.p.index[pos]
86 def __getitem__(self, pos):
106 def __getitem__(self, pos):
87 return self.p.index[pos] or self.load(pos)
107 return self.p.index[pos] or self.load(pos)
88 def append(self, e):
108 def append(self, e):
89 self.p.index.append(e)
109 self.p.index.append(e)
90
110
91 class lazymap:
111 class lazymap:
112 """a lazy version of the node map"""
92 def __init__(self, parser):
113 def __init__(self, parser):
93 self.p = parser
114 self.p = parser
94 def load(self, key):
115 def load(self, key):
95 if self.p.all: return
116 if self.p.all: return
96 n = self.p.data.find(key)
117 n = self.p.data.find(key)
97 if n < 0: raise KeyError("node " + hex(key))
118 if n < 0: raise KeyError("node " + hex(key))
98 pos = n / self.p.s
119 pos = n / self.p.s
99 self.p.load(pos)
120 self.p.load(pos)
100 def __contains__(self, key):
121 def __contains__(self, key):
101 self.p.load()
122 self.p.load()
102 return key in self.p.map
123 return key in self.p.map
103 def __iter__(self):
124 def __iter__(self):
104 yield nullid
125 yield nullid
105 for i in xrange(self.p.l):
126 for i in xrange(self.p.l):
106 try:
127 try:
107 yield self.p.index[i][6]
128 yield self.p.index[i][6]
108 except:
129 except:
109 self.p.load(i)
130 self.p.load(i)
110 yield self.p.index[i][6]
131 yield self.p.index[i][6]
111 def __getitem__(self, key):
132 def __getitem__(self, key):
112 try:
133 try:
113 return self.p.map[key]
134 return self.p.map[key]
114 except KeyError:
135 except KeyError:
115 try:
136 try:
116 self.load(key)
137 self.load(key)
117 return self.p.map[key]
138 return self.p.map[key]
118 except KeyError:
139 except KeyError:
119 raise KeyError("node " + hex(key))
140 raise KeyError("node " + hex(key))
120 def __setitem__(self, key, val):
141 def __setitem__(self, key, val):
121 self.p.map[key] = val
142 self.p.map[key] = val
122
143
123 class RevlogError(Exception): pass
144 class RevlogError(Exception): pass
124
145
125 class revlog:
146 class revlog:
147 """
148 the underlying revision storage object
149
150 A revlog consists of two parts, an index and the revision data.
151
152 The index is a file with a fixed record size containing
153 information on each revision, includings its nodeid (hash), the
154 nodeids of its parents, the position and offset of its data within
155 the data file, and the revision it's based on. Finally, each entry
156 contains a linkrev entry that can serve as a pointer to external
157 data.
158
159 The revision data itself is a linear collection of data chunks.
160 Each chunk represents a revision and is usually represented as a
161 delta against the previous chunk. To bound lookup time, runs of
162 deltas are limited to about 2 times the length of the original
163 version data. This makes retrieval of a version proportional to
164 its size, or O(1) relative to the number of revisions.
165
166 Both pieces of the revlog are written to in an append-only
167 fashion, which means we never need to rewrite a file to insert or
168 remove data, and can use some simple techniques to avoid the need
169 for locking while reading.
170 """
126 def __init__(self, opener, indexfile, datafile):
171 def __init__(self, opener, indexfile, datafile):
172 """
173 create a revlog object
174
175 opener is a function that abstracts the file opening operation
176 and can be used to implement COW semantics or the like.
177 """
127 self.indexfile = indexfile
178 self.indexfile = indexfile
128 self.datafile = datafile
179 self.datafile = datafile
129 self.opener = opener
180 self.opener = opener
130 self.cache = None
181 self.cache = None
131
182
132 try:
183 try:
133 i = self.opener(self.indexfile).read()
184 i = self.opener(self.indexfile).read()
134 except IOError:
185 except IOError:
135 i = ""
186 i = ""
136
187
137 if len(i) > 10000:
188 if len(i) > 10000:
138 # big index, let's parse it on demand
189 # big index, let's parse it on demand
139 parser = lazyparser(i, self)
190 parser = lazyparser(i, self)
140 self.index = lazyindex(parser)
191 self.index = lazyindex(parser)
141 self.nodemap = lazymap(parser)
192 self.nodemap = lazymap(parser)
142 else:
193 else:
143 s = struct.calcsize(indexformat)
194 s = struct.calcsize(indexformat)
144 l = len(i) / s
195 l = len(i) / s
145 self.index = [None] * l
196 self.index = [None] * l
146 m = [None] * l
197 m = [None] * l
147
198
148 n = 0
199 n = 0
149 for f in xrange(0, len(i), s):
200 for f in xrange(0, len(i), s):
150 # offset, size, base, linkrev, p1, p2, nodeid
201 # offset, size, base, linkrev, p1, p2, nodeid
151 e = struct.unpack(indexformat, i[f:f + s])
202 e = struct.unpack(indexformat, i[f:f + s])
152 m[n] = (e[6], n)
203 m[n] = (e[6], n)
153 self.index[n] = e
204 self.index[n] = e
154 n += 1
205 n += 1
155
206
156 self.nodemap = dict(m)
207 self.nodemap = dict(m)
157 self.nodemap[nullid] = -1
208 self.nodemap[nullid] = -1
158
209
159 def tip(self): return self.node(len(self.index) - 1)
210 def tip(self): return self.node(len(self.index) - 1)
160 def count(self): return len(self.index)
211 def count(self): return len(self.index)
161 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
212 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
162 def rev(self, node): return self.nodemap[node]
213 def rev(self, node): return self.nodemap[node]
163 def linkrev(self, node): return self.index[self.nodemap[node]][3]
214 def linkrev(self, node): return self.index[self.nodemap[node]][3]
164 def parents(self, node):
215 def parents(self, node):
165 if node == nullid: return (nullid, nullid)
216 if node == nullid: return (nullid, nullid)
166 return self.index[self.nodemap[node]][4:6]
217 return self.index[self.nodemap[node]][4:6]
167
218
168 def start(self, rev): return self.index[rev][0]
219 def start(self, rev): return self.index[rev][0]
169 def length(self, rev): return self.index[rev][1]
220 def length(self, rev): return self.index[rev][1]
170 def end(self, rev): return self.start(rev) + self.length(rev)
221 def end(self, rev): return self.start(rev) + self.length(rev)
171 def base(self, rev): return self.index[rev][2]
222 def base(self, rev): return self.index[rev][2]
172
223
173 def reachable(self, rev, stop=None):
224 def reachable(self, rev, stop=None):
174 reachable = {}
225 reachable = {}
175 visit = [rev]
226 visit = [rev]
176 reachable[rev] = 1
227 reachable[rev] = 1
177 if stop:
228 if stop:
178 stopn = self.rev(stop)
229 stopn = self.rev(stop)
179 else:
230 else:
180 stopn = 0
231 stopn = 0
181 while visit:
232 while visit:
182 n = visit.pop(0)
233 n = visit.pop(0)
183 if n == stop:
234 if n == stop:
184 continue
235 continue
185 if n == nullid:
236 if n == nullid:
186 continue
237 continue
187 for p in self.parents(n):
238 for p in self.parents(n):
188 if self.rev(p) < stopn:
239 if self.rev(p) < stopn:
189 continue
240 continue
190 if p not in reachable:
241 if p not in reachable:
191 reachable[p] = 1
242 reachable[p] = 1
192 visit.append(p)
243 visit.append(p)
193 return reachable
244 return reachable
194
245
195 def heads(self, stop=None):
246 def heads(self, stop=None):
247 """return the list of all nodes that have no children"""
196 p = {}
248 p = {}
197 h = []
249 h = []
198 stoprev = 0
250 stoprev = 0
199 if stop and stop in self.nodemap:
251 if stop and stop in self.nodemap:
200 stoprev = self.rev(stop)
252 stoprev = self.rev(stop)
201
253
202 for r in range(self.count() - 1, -1, -1):
254 for r in range(self.count() - 1, -1, -1):
203 n = self.node(r)
255 n = self.node(r)
204 if n not in p:
256 if n not in p:
205 h.append(n)
257 h.append(n)
206 if n == stop:
258 if n == stop:
207 break
259 break
208 if r < stoprev:
260 if r < stoprev:
209 break
261 break
210 for pn in self.parents(n):
262 for pn in self.parents(n):
211 p[pn] = 1
263 p[pn] = 1
212 return h
264 return h
213
265
214 def children(self, node):
266 def children(self, node):
267 """find the children of a given node"""
215 c = []
268 c = []
216 p = self.rev(node)
269 p = self.rev(node)
217 for r in range(p + 1, self.count()):
270 for r in range(p + 1, self.count()):
218 n = self.node(r)
271 n = self.node(r)
219 for pn in self.parents(n):
272 for pn in self.parents(n):
220 if pn == node:
273 if pn == node:
221 c.append(n)
274 c.append(n)
222 continue
275 continue
223 elif pn == nullid:
276 elif pn == nullid:
224 continue
277 continue
225 return c
278 return c
226
279
227 def lookup(self, id):
280 def lookup(self, id):
281 """locate a node based on revision number or subset of hex nodeid"""
228 try:
282 try:
229 rev = int(id)
283 rev = int(id)
230 if str(rev) != id: raise ValueError
284 if str(rev) != id: raise ValueError
231 if rev < 0: rev = self.count() + rev
285 if rev < 0: rev = self.count() + rev
232 if rev < 0 or rev >= self.count(): raise ValueError
286 if rev < 0 or rev >= self.count(): raise ValueError
233 return self.node(rev)
287 return self.node(rev)
234 except (ValueError, OverflowError):
288 except (ValueError, OverflowError):
235 c = []
289 c = []
236 for n in self.nodemap:
290 for n in self.nodemap:
237 if hex(n).startswith(id):
291 if hex(n).startswith(id):
238 c.append(n)
292 c.append(n)
239 if len(c) > 1: raise KeyError("Ambiguous identifier")
293 if len(c) > 1: raise KeyError("Ambiguous identifier")
240 if len(c) < 1: raise KeyError("No match found")
294 if len(c) < 1: raise KeyError("No match found")
241 return c[0]
295 return c[0]
242
296
243 return None
297 return None
244
298
245 def diff(self, a, b):
299 def diff(self, a, b):
300 """return a delta between two revisions"""
246 return mdiff.textdiff(a, b)
301 return mdiff.textdiff(a, b)
247
302
248 def patches(self, t, pl):
303 def patches(self, t, pl):
304 """apply a list of patches to a string"""
249 return mdiff.patches(t, pl)
305 return mdiff.patches(t, pl)
250
306
251 def delta(self, node):
307 def delta(self, node):
308 """return or calculate a delta between a node and its predecessor"""
252 r = self.rev(node)
309 r = self.rev(node)
253 b = self.base(r)
310 b = self.base(r)
254 if r == b:
311 if r == b:
255 return self.diff(self.revision(self.node(r - 1)),
312 return self.diff(self.revision(self.node(r - 1)),
256 self.revision(node))
313 self.revision(node))
257 else:
314 else:
258 f = self.opener(self.datafile)
315 f = self.opener(self.datafile)
259 f.seek(self.start(r))
316 f.seek(self.start(r))
260 data = f.read(self.length(r))
317 data = f.read(self.length(r))
261 return decompress(data)
318 return decompress(data)
262
319
263 def revision(self, node):
320 def revision(self, node):
321 """return an uncompressed revision of a given"""
264 if node == nullid: return ""
322 if node == nullid: return ""
265 if self.cache and self.cache[0] == node: return self.cache[2]
323 if self.cache and self.cache[0] == node: return self.cache[2]
266
324
325 # look up what we need to read
267 text = None
326 text = None
268 rev = self.rev(node)
327 rev = self.rev(node)
269 start, length, base, link, p1, p2, node = self.index[rev]
328 start, length, base, link, p1, p2, node = self.index[rev]
270 end = start + length
329 end = start + length
271 if base != rev: start = self.start(base)
330 if base != rev: start = self.start(base)
272
331
332 # do we have useful data cached?
273 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
333 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
274 base = self.cache[1]
334 base = self.cache[1]
275 start = self.start(base + 1)
335 start = self.start(base + 1)
276 text = self.cache[2]
336 text = self.cache[2]
277 last = 0
337 last = 0
278
338
279 f = self.opener(self.datafile)
339 f = self.opener(self.datafile)
280 f.seek(start)
340 f.seek(start)
281 data = f.read(end - start)
341 data = f.read(end - start)
282
342
283 if text is None:
343 if text is None:
284 last = self.length(base)
344 last = self.length(base)
285 text = decompress(data[:last])
345 text = decompress(data[:last])
286
346
287 bins = []
347 bins = []
288 for r in xrange(base + 1, rev + 1):
348 for r in xrange(base + 1, rev + 1):
289 s = self.length(r)
349 s = self.length(r)
290 bins.append(decompress(data[last:last + s]))
350 bins.append(decompress(data[last:last + s]))
291 last = last + s
351 last = last + s
292
352
293 text = mdiff.patches(text, bins)
353 text = mdiff.patches(text, bins)
294
354
295 if node != hash(text, p1, p2):
355 if node != hash(text, p1, p2):
296 raise IOError("integrity check failed on %s:%d"
356 raise IOError("integrity check failed on %s:%d"
297 % (self.datafile, rev))
357 % (self.datafile, rev))
298
358
299 self.cache = (node, rev, text)
359 self.cache = (node, rev, text)
300 return text
360 return text
301
361
302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
363 """add a revision to the log
364
365 text - the revision data to add
366 transaction - the transaction object used for rollback
367 link - the linkrev data to add
368 p1, p2 - the parent nodeids of the revision
369 d - an optional precomputed delta
370 """
303 if text is None: text = ""
371 if text is None: text = ""
304 if p1 is None: p1 = self.tip()
372 if p1 is None: p1 = self.tip()
305 if p2 is None: p2 = nullid
373 if p2 is None: p2 = nullid
306
374
307 node = hash(text, p1, p2)
375 node = hash(text, p1, p2)
308
376
309 if node in self.nodemap:
377 if node in self.nodemap:
310 return node
378 return node
311
379
312 n = self.count()
380 n = self.count()
313 t = n - 1
381 t = n - 1
314
382
315 if n:
383 if n:
316 base = self.base(t)
384 base = self.base(t)
317 start = self.start(base)
385 start = self.start(base)
318 end = self.end(t)
386 end = self.end(t)
319 if not d:
387 if not d:
320 prev = self.revision(self.tip())
388 prev = self.revision(self.tip())
321 d = self.diff(prev, text)
389 d = self.diff(prev, text)
322 data = compress(d)
390 data = compress(d)
323 dist = end - start + len(data)
391 dist = end - start + len(data)
324
392
325 # full versions are inserted when the needed deltas
393 # full versions are inserted when the needed deltas
326 # become comparable to the uncompressed text
394 # become comparable to the uncompressed text
327 if not n or dist > len(text) * 2:
395 if not n or dist > len(text) * 2:
328 data = compress(text)
396 data = compress(text)
329 base = n
397 base = n
330 else:
398 else:
331 base = self.base(t)
399 base = self.base(t)
332
400
333 offset = 0
401 offset = 0
334 if t >= 0:
402 if t >= 0:
335 offset = self.end(t)
403 offset = self.end(t)
336
404
337 e = (offset, len(data), base, link, p1, p2, node)
405 e = (offset, len(data), base, link, p1, p2, node)
338
406
339 self.index.append(e)
407 self.index.append(e)
340 self.nodemap[node] = n
408 self.nodemap[node] = n
341 entry = struct.pack(indexformat, *e)
409 entry = struct.pack(indexformat, *e)
342
410
343 transaction.add(self.datafile, e[0])
411 transaction.add(self.datafile, e[0])
344 self.opener(self.datafile, "a").write(data)
412 self.opener(self.datafile, "a").write(data)
345 transaction.add(self.indexfile, n * len(entry))
413 transaction.add(self.indexfile, n * len(entry))
346 self.opener(self.indexfile, "a").write(entry)
414 self.opener(self.indexfile, "a").write(entry)
347
415
348 self.cache = (node, n, text)
416 self.cache = (node, n, text)
349 return node
417 return node
350
418
351 def ancestor(self, a, b):
419 def ancestor(self, a, b):
420 """calculate the least common ancestor of nodes a and b"""
352 # calculate the distance of every node from root
421 # calculate the distance of every node from root
353 dist = {nullid: 0}
422 dist = {nullid: 0}
354 for i in xrange(self.count()):
423 for i in xrange(self.count()):
355 n = self.node(i)
424 n = self.node(i)
356 p1, p2 = self.parents(n)
425 p1, p2 = self.parents(n)
357 dist[n] = max(dist[p1], dist[p2]) + 1
426 dist[n] = max(dist[p1], dist[p2]) + 1
358
427
359 # traverse ancestors in order of decreasing distance from root
428 # traverse ancestors in order of decreasing distance from root
360 def ancestors(node):
429 def ancestors(node):
361 # we store negative distances because heap returns smallest member
430 # we store negative distances because heap returns smallest member
362 h = [(-dist[node], node)]
431 h = [(-dist[node], node)]
363 seen = {}
432 seen = {}
364 earliest = self.count()
433 earliest = self.count()
365 while h:
434 while h:
366 d, n = heapq.heappop(h)
435 d, n = heapq.heappop(h)
367 if n not in seen:
436 if n not in seen:
368 seen[n] = 1
437 seen[n] = 1
369 r = self.rev(n)
438 r = self.rev(n)
370 yield (-d, r, n)
439 yield (-d, r, n)
371 for p in self.parents(n):
440 for p in self.parents(n):
372 heapq.heappush(h, (-dist[p], p))
441 heapq.heappush(h, (-dist[p], p))
373
442
374 x = ancestors(a)
443 x = ancestors(a)
375 y = ancestors(b)
444 y = ancestors(b)
376 lx = x.next()
445 lx = x.next()
377 ly = y.next()
446 ly = y.next()
378
447
379 # increment each ancestor list until it is closer to root than
448 # increment each ancestor list until it is closer to root than
380 # the other, or they match
449 # the other, or they match
381 while 1:
450 while 1:
382 if lx == ly:
451 if lx == ly:
383 return lx[2]
452 return lx[2]
384 elif lx < ly:
453 elif lx < ly:
385 ly = y.next()
454 ly = y.next()
386 elif lx > ly:
455 elif lx > ly:
387 lx = x.next()
456 lx = x.next()
388
457
389 def group(self, linkmap):
458 def group(self, linkmap):
390 # given a list of changeset revs, return a set of deltas and
459 """calculate a delta group
391 # metadata corresponding to nodes. the first delta is
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
393 # have this parent as it has all history before these
394 # changesets. parent is parent[0]
395
460
461 Given a list of changeset revs, return a set of deltas and
462 metadata corresponding to nodes. the first delta is
463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
464 have this parent as it has all history before these
465 changesets. parent is parent[0]
466 """
396 revs = []
467 revs = []
397 needed = {}
468 needed = {}
398
469
399 # find file nodes/revs that match changeset revs
470 # find file nodes/revs that match changeset revs
400 for i in xrange(0, self.count()):
471 for i in xrange(0, self.count()):
401 if self.index[i][3] in linkmap:
472 if self.index[i][3] in linkmap:
402 revs.append(i)
473 revs.append(i)
403 needed[i] = 1
474 needed[i] = 1
404
475
405 # if we don't have any revisions touched by these changesets, bail
476 # if we don't have any revisions touched by these changesets, bail
406 if not revs:
477 if not revs:
407 yield struct.pack(">l", 0)
478 yield struct.pack(">l", 0)
408 return
479 return
409
480
410 # add the parent of the first rev
481 # add the parent of the first rev
411 p = self.parents(self.node(revs[0]))[0]
482 p = self.parents(self.node(revs[0]))[0]
412 revs.insert(0, self.rev(p))
483 revs.insert(0, self.rev(p))
413
484
414 # for each delta that isn't contiguous in the log, we need to
485 # for each delta that isn't contiguous in the log, we need to
415 # reconstruct the base, reconstruct the result, and then
486 # reconstruct the base, reconstruct the result, and then
416 # calculate the delta. We also need to do this where we've
487 # calculate the delta. We also need to do this where we've
417 # stored a full version and not a delta
488 # stored a full version and not a delta
418 for i in xrange(0, len(revs) - 1):
489 for i in xrange(0, len(revs) - 1):
419 a, b = revs[i], revs[i + 1]
490 a, b = revs[i], revs[i + 1]
420 if a + 1 != b or self.base(b) == b:
491 if a + 1 != b or self.base(b) == b:
421 for j in xrange(self.base(a), a + 1):
492 for j in xrange(self.base(a), a + 1):
422 needed[j] = 1
493 needed[j] = 1
423 for j in xrange(self.base(b), b + 1):
494 for j in xrange(self.base(b), b + 1):
424 needed[j] = 1
495 needed[j] = 1
425
496
426 # calculate spans to retrieve from datafile
497 # calculate spans to retrieve from datafile
427 needed = needed.keys()
498 needed = needed.keys()
428 needed.sort()
499 needed.sort()
429 spans = []
500 spans = []
430 oo = -1
501 oo = -1
431 ol = 0
502 ol = 0
432 for n in needed:
503 for n in needed:
433 if n < 0: continue
504 if n < 0: continue
434 o = self.start(n)
505 o = self.start(n)
435 l = self.length(n)
506 l = self.length(n)
436 if oo + ol == o: # can we merge with the previous?
507 if oo + ol == o: # can we merge with the previous?
437 nl = spans[-1][2]
508 nl = spans[-1][2]
438 nl.append((n, l))
509 nl.append((n, l))
439 ol += l
510 ol += l
440 spans[-1] = (oo, ol, nl)
511 spans[-1] = (oo, ol, nl)
441 else:
512 else:
442 oo = o
513 oo = o
443 ol = l
514 ol = l
444 spans.append((oo, ol, [(n, l)]))
515 spans.append((oo, ol, [(n, l)]))
445
516
446 # read spans in, divide up chunks
517 # read spans in, divide up chunks
447 chunks = {}
518 chunks = {}
448 for span in spans:
519 for span in spans:
449 # we reopen the file for each span to make http happy for now
520 # we reopen the file for each span to make http happy for now
450 f = self.opener(self.datafile)
521 f = self.opener(self.datafile)
451 f.seek(span[0])
522 f.seek(span[0])
452 data = f.read(span[1])
523 data = f.read(span[1])
453
524
454 # divide up the span
525 # divide up the span
455 pos = 0
526 pos = 0
456 for r, l in span[2]:
527 for r, l in span[2]:
457 chunks[r] = decompress(data[pos: pos + l])
528 chunks[r] = decompress(data[pos: pos + l])
458 pos += l
529 pos += l
459
530
460 # helper to reconstruct intermediate versions
531 # helper to reconstruct intermediate versions
461 def construct(text, base, rev):
532 def construct(text, base, rev):
462 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
533 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
463 return mdiff.patches(text, bins)
534 return mdiff.patches(text, bins)
464
535
465 # build deltas
536 # build deltas
466 deltas = []
537 deltas = []
467 for d in xrange(0, len(revs) - 1):
538 for d in xrange(0, len(revs) - 1):
468 a, b = revs[d], revs[d + 1]
539 a, b = revs[d], revs[d + 1]
469 n = self.node(b)
540 n = self.node(b)
470
541
471 # do we need to construct a new delta?
542 # do we need to construct a new delta?
472 if a + 1 != b or self.base(b) == b:
543 if a + 1 != b or self.base(b) == b:
473 if a >= 0:
544 if a >= 0:
474 base = self.base(a)
545 base = self.base(a)
475 ta = chunks[self.base(a)]
546 ta = chunks[self.base(a)]
476 ta = construct(ta, base, a)
547 ta = construct(ta, base, a)
477 else:
548 else:
478 ta = ""
549 ta = ""
479
550
480 base = self.base(b)
551 base = self.base(b)
481 if a > base:
552 if a > base:
482 base = a
553 base = a
483 tb = ta
554 tb = ta
484 else:
555 else:
485 tb = chunks[self.base(b)]
556 tb = chunks[self.base(b)]
486 tb = construct(tb, base, b)
557 tb = construct(tb, base, b)
487 d = self.diff(ta, tb)
558 d = self.diff(ta, tb)
488 else:
559 else:
489 d = chunks[b]
560 d = chunks[b]
490
561
491 p = self.parents(n)
562 p = self.parents(n)
492 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
563 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
493 l = struct.pack(">l", len(meta) + len(d) + 4)
564 l = struct.pack(">l", len(meta) + len(d) + 4)
494 yield l
565 yield l
495 yield meta
566 yield meta
496 yield d
567 yield d
497
568
498 yield struct.pack(">l", 0)
569 yield struct.pack(">l", 0)
499
570
500 def addgroup(self, revs, linkmapper, transaction, unique=0):
571 def addgroup(self, revs, linkmapper, transaction, unique=0):
501 # given a set of deltas, add them to the revision log. the
572 """
502 # first delta is against its parent, which should be in our
573 add a delta group
503 # log, the rest are against the previous delta.
574
575 given a set of deltas, add them to the revision log. the
576 first delta is against its parent, which should be in our
577 log, the rest are against the previous delta.
578 """
504
579
505 # track the base of the current delta log
580 #track the base of the current delta log
506 r = self.count()
581 r = self.count()
507 t = r - 1
582 t = r - 1
508 node = nullid
583 node = nullid
509
584
510 base = prev = -1
585 base = prev = -1
511 start = end = measure = 0
586 start = end = measure = 0
512 if r:
587 if r:
513 start = self.start(self.base(t))
588 start = self.start(self.base(t))
514 end = self.end(t)
589 end = self.end(t)
515 measure = self.length(self.base(t))
590 measure = self.length(self.base(t))
516 base = self.base(t)
591 base = self.base(t)
517 prev = self.tip()
592 prev = self.tip()
518
593
519 transaction.add(self.datafile, end)
594 transaction.add(self.datafile, end)
520 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
595 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
521 dfh = self.opener(self.datafile, "a")
596 dfh = self.opener(self.datafile, "a")
522 ifh = self.opener(self.indexfile, "a")
597 ifh = self.opener(self.indexfile, "a")
523
598
524 # loop through our set of deltas
599 # loop through our set of deltas
525 chain = None
600 chain = None
526 for chunk in revs:
601 for chunk in revs:
527 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
602 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
528 link = linkmapper(cs)
603 link = linkmapper(cs)
529 if node in self.nodemap:
604 if node in self.nodemap:
530 # this can happen if two branches make the same change
605 # this can happen if two branches make the same change
531 if unique:
606 if unique:
532 raise RevlogError("already have %s" % hex(node[:4]))
607 raise RevlogError("already have %s" % hex(node[:4]))
533 chain = node
608 chain = node
534 continue
609 continue
535 delta = chunk[80:]
610 delta = chunk[80:]
536
611
537 if not chain:
612 if not chain:
538 # retrieve the parent revision of the delta chain
613 # retrieve the parent revision of the delta chain
539 chain = p1
614 chain = p1
540 if not chain in self.nodemap:
615 if not chain in self.nodemap:
541 raise RevlogError("unknown base %s" % short(chain[:4]))
616 raise RevlogError("unknown base %s" % short(chain[:4]))
542
617
543 # full versions are inserted when the needed deltas become
618 # full versions are inserted when the needed deltas become
544 # comparable to the uncompressed text or when the previous
619 # comparable to the uncompressed text or when the previous
545 # version is not the one we have a delta against. We use
620 # version is not the one we have a delta against. We use
546 # the size of the previous full rev as a proxy for the
621 # the size of the previous full rev as a proxy for the
547 # current size.
622 # current size.
548
623
549 if chain == prev:
624 if chain == prev:
550 cdelta = compress(delta)
625 cdelta = compress(delta)
551
626
552 if chain != prev or (end - start + len(cdelta)) > measure * 2:
627 if chain != prev or (end - start + len(cdelta)) > measure * 2:
553 # flush our writes here so we can read it in revision
628 # flush our writes here so we can read it in revision
554 dfh.flush()
629 dfh.flush()
555 ifh.flush()
630 ifh.flush()
556 text = self.revision(chain)
631 text = self.revision(chain)
557 text = self.patches(text, [delta])
632 text = self.patches(text, [delta])
558 chk = self.addrevision(text, transaction, link, p1, p2)
633 chk = self.addrevision(text, transaction, link, p1, p2)
559 if chk != node:
634 if chk != node:
560 raise RevlogError("consistency error adding group")
635 raise RevlogError("consistency error adding group")
561 measure = len(text)
636 measure = len(text)
562 else:
637 else:
563 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
638 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
564 self.index.append(e)
639 self.index.append(e)
565 self.nodemap[node] = r
640 self.nodemap[node] = r
566 dfh.write(cdelta)
641 dfh.write(cdelta)
567 ifh.write(struct.pack(indexformat, *e))
642 ifh.write(struct.pack(indexformat, *e))
568
643
569 t, r, chain, prev = r, r + 1, node, node
644 t, r, chain, prev = r, r + 1, node, node
570 start = self.start(self.base(t))
645 start = self.start(self.base(t))
571 end = self.end(t)
646 end = self.end(t)
572
647
573 dfh.close()
648 dfh.close()
574 ifh.close()
649 ifh.close()
575 return node
650 return node
General Comments 0
You need to be logged in to leave comments. Login now