##// END OF EJS Templates
revlog: raise informative exception if file is missing.
Bryan O'Sullivan -
r1201:59bfbdbc default
parent child Browse files
Show More
@@ -1,646 +1,650 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 import zlib, struct, sha, binascii, heapq
13 import zlib, struct, sha, binascii, heapq
14 import mdiff
14 import mdiff
15 from node import *
15 from node import *
16
16
17 def hash(text, p1, p2):
17 def hash(text, p1, p2):
18 """generate a hash from the given text and its parent hashes
18 """generate a hash from the given text and its parent hashes
19
19
20 This hash combines both the current file contents and its history
20 This hash combines both the current file contents and its history
21 in a manner that makes it easy to distinguish nodes with the same
21 in a manner that makes it easy to distinguish nodes with the same
22 content in the revision graph.
22 content in the revision graph.
23 """
23 """
24 l = [p1, p2]
24 l = [p1, p2]
25 l.sort()
25 l.sort()
26 s = sha.new(l[0])
26 s = sha.new(l[0])
27 s.update(l[1])
27 s.update(l[1])
28 s.update(text)
28 s.update(text)
29 return s.digest()
29 return s.digest()
30
30
31 def compress(text):
31 def compress(text):
32 """ generate a possibly-compressed representation of text """
32 """ generate a possibly-compressed representation of text """
33 if not text: return text
33 if not text: return text
34 if len(text) < 44:
34 if len(text) < 44:
35 if text[0] == '\0': return text
35 if text[0] == '\0': return text
36 return 'u' + text
36 return 'u' + text
37 bin = zlib.compress(text)
37 bin = zlib.compress(text)
38 if len(bin) > len(text):
38 if len(bin) > len(text):
39 if text[0] == '\0': return text
39 if text[0] == '\0': return text
40 return 'u' + text
40 return 'u' + text
41 return bin
41 return bin
42
42
43 def decompress(bin):
43 def decompress(bin):
44 """ decompress the given input """
44 """ decompress the given input """
45 if not bin: return bin
45 if not bin: return bin
46 t = bin[0]
46 t = bin[0]
47 if t == '\0': return bin
47 if t == '\0': return bin
48 if t == 'x': return zlib.decompress(bin)
48 if t == 'x': return zlib.decompress(bin)
49 if t == 'u': return bin[1:]
49 if t == 'u': return bin[1:]
50 raise RevlogError("unknown compression type %s" % t)
50 raise RevlogError("unknown compression type %s" % t)
51
51
52 indexformat = ">4l20s20s20s"
52 indexformat = ">4l20s20s20s"
53
53
54 class lazyparser:
54 class lazyparser:
55 """
55 """
56 this class avoids the need to parse the entirety of large indices
56 this class avoids the need to parse the entirety of large indices
57
57
58 By default we parse and load 1000 entries at a time.
58 By default we parse and load 1000 entries at a time.
59
59
60 If no position is specified, we load the whole index, and replace
60 If no position is specified, we load the whole index, and replace
61 the lazy objects in revlog with the underlying objects for
61 the lazy objects in revlog with the underlying objects for
62 efficiency in cases where we look at most of the nodes.
62 efficiency in cases where we look at most of the nodes.
63 """
63 """
64 def __init__(self, data, revlog):
64 def __init__(self, data, revlog):
65 self.data = data
65 self.data = data
66 self.s = struct.calcsize(indexformat)
66 self.s = struct.calcsize(indexformat)
67 self.l = len(data)/self.s
67 self.l = len(data)/self.s
68 self.index = [None] * self.l
68 self.index = [None] * self.l
69 self.map = {nullid: -1}
69 self.map = {nullid: -1}
70 self.all = 0
70 self.all = 0
71 self.revlog = revlog
71 self.revlog = revlog
72
72
73 def load(self, pos=None):
73 def load(self, pos=None):
74 if self.all: return
74 if self.all: return
75 if pos is not None:
75 if pos is not None:
76 block = pos / 1000
76 block = pos / 1000
77 i = block * 1000
77 i = block * 1000
78 end = min(self.l, i + 1000)
78 end = min(self.l, i + 1000)
79 else:
79 else:
80 self.all = 1
80 self.all = 1
81 i = 0
81 i = 0
82 end = self.l
82 end = self.l
83 self.revlog.index = self.index
83 self.revlog.index = self.index
84 self.revlog.nodemap = self.map
84 self.revlog.nodemap = self.map
85
85
86 while i < end:
86 while i < end:
87 d = self.data[i * self.s: (i + 1) * self.s]
87 d = self.data[i * self.s: (i + 1) * self.s]
88 e = struct.unpack(indexformat, d)
88 e = struct.unpack(indexformat, d)
89 self.index[i] = e
89 self.index[i] = e
90 self.map[e[6]] = i
90 self.map[e[6]] = i
91 i += 1
91 i += 1
92
92
93 class lazyindex:
93 class lazyindex:
94 """a lazy version of the index array"""
94 """a lazy version of the index array"""
95 def __init__(self, parser):
95 def __init__(self, parser):
96 self.p = parser
96 self.p = parser
97 def __len__(self):
97 def __len__(self):
98 return len(self.p.index)
98 return len(self.p.index)
99 def load(self, pos):
99 def load(self, pos):
100 self.p.load(pos)
100 self.p.load(pos)
101 return self.p.index[pos]
101 return self.p.index[pos]
102 def __getitem__(self, pos):
102 def __getitem__(self, pos):
103 return self.p.index[pos] or self.load(pos)
103 return self.p.index[pos] or self.load(pos)
104 def append(self, e):
104 def append(self, e):
105 self.p.index.append(e)
105 self.p.index.append(e)
106
106
107 class lazymap:
107 class lazymap:
108 """a lazy version of the node map"""
108 """a lazy version of the node map"""
109 def __init__(self, parser):
109 def __init__(self, parser):
110 self.p = parser
110 self.p = parser
111 def load(self, key):
111 def load(self, key):
112 if self.p.all: return
112 if self.p.all: return
113 n = self.p.data.find(key)
113 n = self.p.data.find(key)
114 if n < 0: raise KeyError("node " + hex(key))
114 if n < 0: raise KeyError("node " + hex(key))
115 pos = n / self.p.s
115 pos = n / self.p.s
116 self.p.load(pos)
116 self.p.load(pos)
117 def __contains__(self, key):
117 def __contains__(self, key):
118 self.p.load()
118 self.p.load()
119 return key in self.p.map
119 return key in self.p.map
120 def __iter__(self):
120 def __iter__(self):
121 yield nullid
121 yield nullid
122 for i in xrange(self.p.l):
122 for i in xrange(self.p.l):
123 try:
123 try:
124 yield self.p.index[i][6]
124 yield self.p.index[i][6]
125 except:
125 except:
126 self.p.load(i)
126 self.p.load(i)
127 yield self.p.index[i][6]
127 yield self.p.index[i][6]
128 def __getitem__(self, key):
128 def __getitem__(self, key):
129 try:
129 try:
130 return self.p.map[key]
130 return self.p.map[key]
131 except KeyError:
131 except KeyError:
132 try:
132 try:
133 self.load(key)
133 self.load(key)
134 return self.p.map[key]
134 return self.p.map[key]
135 except KeyError:
135 except KeyError:
136 raise KeyError("node " + hex(key))
136 raise KeyError("node " + hex(key))
137 def __setitem__(self, key, val):
137 def __setitem__(self, key, val):
138 self.p.map[key] = val
138 self.p.map[key] = val
139
139
140 class RevlogError(Exception): pass
140 class RevlogError(Exception): pass
141
141
142 class revlog:
142 class revlog:
143 """
143 """
144 the underlying revision storage object
144 the underlying revision storage object
145
145
146 A revlog consists of two parts, an index and the revision data.
146 A revlog consists of two parts, an index and the revision data.
147
147
148 The index is a file with a fixed record size containing
148 The index is a file with a fixed record size containing
149 information on each revision, includings its nodeid (hash), the
149 information on each revision, includings its nodeid (hash), the
150 nodeids of its parents, the position and offset of its data within
150 nodeids of its parents, the position and offset of its data within
151 the data file, and the revision it's based on. Finally, each entry
151 the data file, and the revision it's based on. Finally, each entry
152 contains a linkrev entry that can serve as a pointer to external
152 contains a linkrev entry that can serve as a pointer to external
153 data.
153 data.
154
154
155 The revision data itself is a linear collection of data chunks.
155 The revision data itself is a linear collection of data chunks.
156 Each chunk represents a revision and is usually represented as a
156 Each chunk represents a revision and is usually represented as a
157 delta against the previous chunk. To bound lookup time, runs of
157 delta against the previous chunk. To bound lookup time, runs of
158 deltas are limited to about 2 times the length of the original
158 deltas are limited to about 2 times the length of the original
159 version data. This makes retrieval of a version proportional to
159 version data. This makes retrieval of a version proportional to
160 its size, or O(1) relative to the number of revisions.
160 its size, or O(1) relative to the number of revisions.
161
161
162 Both pieces of the revlog are written to in an append-only
162 Both pieces of the revlog are written to in an append-only
163 fashion, which means we never need to rewrite a file to insert or
163 fashion, which means we never need to rewrite a file to insert or
164 remove data, and can use some simple techniques to avoid the need
164 remove data, and can use some simple techniques to avoid the need
165 for locking while reading.
165 for locking while reading.
166 """
166 """
167 def __init__(self, opener, indexfile, datafile):
167 def __init__(self, opener, indexfile, datafile):
168 """
168 """
169 create a revlog object
169 create a revlog object
170
170
171 opener is a function that abstracts the file opening operation
171 opener is a function that abstracts the file opening operation
172 and can be used to implement COW semantics or the like.
172 and can be used to implement COW semantics or the like.
173 """
173 """
174 self.indexfile = indexfile
174 self.indexfile = indexfile
175 self.datafile = datafile
175 self.datafile = datafile
176 self.opener = opener
176 self.opener = opener
177 self.cache = None
177 self.cache = None
178
178
179 try:
179 try:
180 i = self.opener(self.indexfile).read()
180 i = self.opener(self.indexfile).read()
181 except IOError:
181 except IOError:
182 i = ""
182 i = ""
183
183
184 if len(i) > 10000:
184 if len(i) > 10000:
185 # big index, let's parse it on demand
185 # big index, let's parse it on demand
186 parser = lazyparser(i, self)
186 parser = lazyparser(i, self)
187 self.index = lazyindex(parser)
187 self.index = lazyindex(parser)
188 self.nodemap = lazymap(parser)
188 self.nodemap = lazymap(parser)
189 else:
189 else:
190 s = struct.calcsize(indexformat)
190 s = struct.calcsize(indexformat)
191 l = len(i) / s
191 l = len(i) / s
192 self.index = [None] * l
192 self.index = [None] * l
193 m = [None] * l
193 m = [None] * l
194
194
195 n = 0
195 n = 0
196 for f in xrange(0, len(i), s):
196 for f in xrange(0, len(i), s):
197 # offset, size, base, linkrev, p1, p2, nodeid
197 # offset, size, base, linkrev, p1, p2, nodeid
198 e = struct.unpack(indexformat, i[f:f + s])
198 e = struct.unpack(indexformat, i[f:f + s])
199 m[n] = (e[6], n)
199 m[n] = (e[6], n)
200 self.index[n] = e
200 self.index[n] = e
201 n += 1
201 n += 1
202
202
203 self.nodemap = dict(m)
203 self.nodemap = dict(m)
204 self.nodemap[nullid] = -1
204 self.nodemap[nullid] = -1
205
205
206 def tip(self): return self.node(len(self.index) - 1)
206 def tip(self): return self.node(len(self.index) - 1)
207 def count(self): return len(self.index)
207 def count(self): return len(self.index)
208 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
208 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
209 def rev(self, node): return self.nodemap[node]
209 def rev(self, node):
210 def linkrev(self, node): return self.index[self.nodemap[node]][3]
210 try:
211 return self.nodemap[node]
212 except KeyError:
213 raise KeyError('%s: no node %s' % (self.indexfile, hex(node)))
214 def linkrev(self, node): return self.index[self.rev(node)][3]
211 def parents(self, node):
215 def parents(self, node):
212 if node == nullid: return (nullid, nullid)
216 if node == nullid: return (nullid, nullid)
213 return self.index[self.nodemap[node]][4:6]
217 return self.index[self.rev(node)][4:6]
214
218
215 def start(self, rev): return self.index[rev][0]
219 def start(self, rev): return self.index[rev][0]
216 def length(self, rev): return self.index[rev][1]
220 def length(self, rev): return self.index[rev][1]
217 def end(self, rev): return self.start(rev) + self.length(rev)
221 def end(self, rev): return self.start(rev) + self.length(rev)
218 def base(self, rev): return self.index[rev][2]
222 def base(self, rev): return self.index[rev][2]
219
223
220 def reachable(self, rev, stop=None):
224 def reachable(self, rev, stop=None):
221 reachable = {}
225 reachable = {}
222 visit = [rev]
226 visit = [rev]
223 reachable[rev] = 1
227 reachable[rev] = 1
224 if stop:
228 if stop:
225 stopn = self.rev(stop)
229 stopn = self.rev(stop)
226 else:
230 else:
227 stopn = 0
231 stopn = 0
228 while visit:
232 while visit:
229 n = visit.pop(0)
233 n = visit.pop(0)
230 if n == stop:
234 if n == stop:
231 continue
235 continue
232 if n == nullid:
236 if n == nullid:
233 continue
237 continue
234 for p in self.parents(n):
238 for p in self.parents(n):
235 if self.rev(p) < stopn:
239 if self.rev(p) < stopn:
236 continue
240 continue
237 if p not in reachable:
241 if p not in reachable:
238 reachable[p] = 1
242 reachable[p] = 1
239 visit.append(p)
243 visit.append(p)
240 return reachable
244 return reachable
241
245
242 def heads(self, stop=None):
246 def heads(self, stop=None):
243 """return the list of all nodes that have no children"""
247 """return the list of all nodes that have no children"""
244 p = {}
248 p = {}
245 h = []
249 h = []
246 stoprev = 0
250 stoprev = 0
247 if stop and stop in self.nodemap:
251 if stop and stop in self.nodemap:
248 stoprev = self.rev(stop)
252 stoprev = self.rev(stop)
249
253
250 for r in range(self.count() - 1, -1, -1):
254 for r in range(self.count() - 1, -1, -1):
251 n = self.node(r)
255 n = self.node(r)
252 if n not in p:
256 if n not in p:
253 h.append(n)
257 h.append(n)
254 if n == stop:
258 if n == stop:
255 break
259 break
256 if r < stoprev:
260 if r < stoprev:
257 break
261 break
258 for pn in self.parents(n):
262 for pn in self.parents(n):
259 p[pn] = 1
263 p[pn] = 1
260 return h
264 return h
261
265
262 def children(self, node):
266 def children(self, node):
263 """find the children of a given node"""
267 """find the children of a given node"""
264 c = []
268 c = []
265 p = self.rev(node)
269 p = self.rev(node)
266 for r in range(p + 1, self.count()):
270 for r in range(p + 1, self.count()):
267 n = self.node(r)
271 n = self.node(r)
268 for pn in self.parents(n):
272 for pn in self.parents(n):
269 if pn == node:
273 if pn == node:
270 c.append(n)
274 c.append(n)
271 continue
275 continue
272 elif pn == nullid:
276 elif pn == nullid:
273 continue
277 continue
274 return c
278 return c
275
279
276 def lookup(self, id):
280 def lookup(self, id):
277 """locate a node based on revision number or subset of hex nodeid"""
281 """locate a node based on revision number or subset of hex nodeid"""
278 try:
282 try:
279 rev = int(id)
283 rev = int(id)
280 if str(rev) != id: raise ValueError
284 if str(rev) != id: raise ValueError
281 if rev < 0: rev = self.count() + rev
285 if rev < 0: rev = self.count() + rev
282 if rev < 0 or rev >= self.count(): raise ValueError
286 if rev < 0 or rev >= self.count(): raise ValueError
283 return self.node(rev)
287 return self.node(rev)
284 except (ValueError, OverflowError):
288 except (ValueError, OverflowError):
285 c = []
289 c = []
286 for n in self.nodemap:
290 for n in self.nodemap:
287 if hex(n).startswith(id):
291 if hex(n).startswith(id):
288 c.append(n)
292 c.append(n)
289 if len(c) > 1: raise KeyError("Ambiguous identifier")
293 if len(c) > 1: raise KeyError("Ambiguous identifier")
290 if len(c) < 1: raise KeyError("No match found")
294 if len(c) < 1: raise KeyError("No match found")
291 return c[0]
295 return c[0]
292
296
293 return None
297 return None
294
298
295 def diff(self, a, b):
299 def diff(self, a, b):
296 """return a delta between two revisions"""
300 """return a delta between two revisions"""
297 return mdiff.textdiff(a, b)
301 return mdiff.textdiff(a, b)
298
302
299 def patches(self, t, pl):
303 def patches(self, t, pl):
300 """apply a list of patches to a string"""
304 """apply a list of patches to a string"""
301 return mdiff.patches(t, pl)
305 return mdiff.patches(t, pl)
302
306
303 def delta(self, node):
307 def delta(self, node):
304 """return or calculate a delta between a node and its predecessor"""
308 """return or calculate a delta between a node and its predecessor"""
305 r = self.rev(node)
309 r = self.rev(node)
306 b = self.base(r)
310 b = self.base(r)
307 if r == b:
311 if r == b:
308 return self.diff(self.revision(self.node(r - 1)),
312 return self.diff(self.revision(self.node(r - 1)),
309 self.revision(node))
313 self.revision(node))
310 else:
314 else:
311 f = self.opener(self.datafile)
315 f = self.opener(self.datafile)
312 f.seek(self.start(r))
316 f.seek(self.start(r))
313 data = f.read(self.length(r))
317 data = f.read(self.length(r))
314 return decompress(data)
318 return decompress(data)
315
319
316 def revision(self, node):
320 def revision(self, node):
317 """return an uncompressed revision of a given"""
321 """return an uncompressed revision of a given"""
318 if node == nullid: return ""
322 if node == nullid: return ""
319 if self.cache and self.cache[0] == node: return self.cache[2]
323 if self.cache and self.cache[0] == node: return self.cache[2]
320
324
321 # look up what we need to read
325 # look up what we need to read
322 text = None
326 text = None
323 rev = self.rev(node)
327 rev = self.rev(node)
324 start, length, base, link, p1, p2, node = self.index[rev]
328 start, length, base, link, p1, p2, node = self.index[rev]
325 end = start + length
329 end = start + length
326 if base != rev: start = self.start(base)
330 if base != rev: start = self.start(base)
327
331
328 # do we have useful data cached?
332 # do we have useful data cached?
329 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
333 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
330 base = self.cache[1]
334 base = self.cache[1]
331 start = self.start(base + 1)
335 start = self.start(base + 1)
332 text = self.cache[2]
336 text = self.cache[2]
333 last = 0
337 last = 0
334
338
335 f = self.opener(self.datafile)
339 f = self.opener(self.datafile)
336 f.seek(start)
340 f.seek(start)
337 data = f.read(end - start)
341 data = f.read(end - start)
338
342
339 if text is None:
343 if text is None:
340 last = self.length(base)
344 last = self.length(base)
341 text = decompress(data[:last])
345 text = decompress(data[:last])
342
346
343 bins = []
347 bins = []
344 for r in xrange(base + 1, rev + 1):
348 for r in xrange(base + 1, rev + 1):
345 s = self.length(r)
349 s = self.length(r)
346 bins.append(decompress(data[last:last + s]))
350 bins.append(decompress(data[last:last + s]))
347 last = last + s
351 last = last + s
348
352
349 text = mdiff.patches(text, bins)
353 text = mdiff.patches(text, bins)
350
354
351 if node != hash(text, p1, p2):
355 if node != hash(text, p1, p2):
352 raise IOError("integrity check failed on %s:%d"
356 raise IOError("integrity check failed on %s:%d"
353 % (self.datafile, rev))
357 % (self.datafile, rev))
354
358
355 self.cache = (node, rev, text)
359 self.cache = (node, rev, text)
356 return text
360 return text
357
361
358 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
359 """add a revision to the log
363 """add a revision to the log
360
364
361 text - the revision data to add
365 text - the revision data to add
362 transaction - the transaction object used for rollback
366 transaction - the transaction object used for rollback
363 link - the linkrev data to add
367 link - the linkrev data to add
364 p1, p2 - the parent nodeids of the revision
368 p1, p2 - the parent nodeids of the revision
365 d - an optional precomputed delta
369 d - an optional precomputed delta
366 """
370 """
367 if text is None: text = ""
371 if text is None: text = ""
368 if p1 is None: p1 = self.tip()
372 if p1 is None: p1 = self.tip()
369 if p2 is None: p2 = nullid
373 if p2 is None: p2 = nullid
370
374
371 node = hash(text, p1, p2)
375 node = hash(text, p1, p2)
372
376
373 if node in self.nodemap:
377 if node in self.nodemap:
374 return node
378 return node
375
379
376 n = self.count()
380 n = self.count()
377 t = n - 1
381 t = n - 1
378
382
379 if n:
383 if n:
380 base = self.base(t)
384 base = self.base(t)
381 start = self.start(base)
385 start = self.start(base)
382 end = self.end(t)
386 end = self.end(t)
383 if not d:
387 if not d:
384 prev = self.revision(self.tip())
388 prev = self.revision(self.tip())
385 d = self.diff(prev, text)
389 d = self.diff(prev, text)
386 data = compress(d)
390 data = compress(d)
387 dist = end - start + len(data)
391 dist = end - start + len(data)
388
392
389 # full versions are inserted when the needed deltas
393 # full versions are inserted when the needed deltas
390 # become comparable to the uncompressed text
394 # become comparable to the uncompressed text
391 if not n or dist > len(text) * 2:
395 if not n or dist > len(text) * 2:
392 data = compress(text)
396 data = compress(text)
393 base = n
397 base = n
394 else:
398 else:
395 base = self.base(t)
399 base = self.base(t)
396
400
397 offset = 0
401 offset = 0
398 if t >= 0:
402 if t >= 0:
399 offset = self.end(t)
403 offset = self.end(t)
400
404
401 e = (offset, len(data), base, link, p1, p2, node)
405 e = (offset, len(data), base, link, p1, p2, node)
402
406
403 self.index.append(e)
407 self.index.append(e)
404 self.nodemap[node] = n
408 self.nodemap[node] = n
405 entry = struct.pack(indexformat, *e)
409 entry = struct.pack(indexformat, *e)
406
410
407 transaction.add(self.datafile, e[0])
411 transaction.add(self.datafile, e[0])
408 self.opener(self.datafile, "a").write(data)
412 self.opener(self.datafile, "a").write(data)
409 transaction.add(self.indexfile, n * len(entry))
413 transaction.add(self.indexfile, n * len(entry))
410 self.opener(self.indexfile, "a").write(entry)
414 self.opener(self.indexfile, "a").write(entry)
411
415
412 self.cache = (node, n, text)
416 self.cache = (node, n, text)
413 return node
417 return node
414
418
415 def ancestor(self, a, b):
419 def ancestor(self, a, b):
416 """calculate the least common ancestor of nodes a and b"""
420 """calculate the least common ancestor of nodes a and b"""
417 # calculate the distance of every node from root
421 # calculate the distance of every node from root
418 dist = {nullid: 0}
422 dist = {nullid: 0}
419 for i in xrange(self.count()):
423 for i in xrange(self.count()):
420 n = self.node(i)
424 n = self.node(i)
421 p1, p2 = self.parents(n)
425 p1, p2 = self.parents(n)
422 dist[n] = max(dist[p1], dist[p2]) + 1
426 dist[n] = max(dist[p1], dist[p2]) + 1
423
427
424 # traverse ancestors in order of decreasing distance from root
428 # traverse ancestors in order of decreasing distance from root
425 def ancestors(node):
429 def ancestors(node):
426 # we store negative distances because heap returns smallest member
430 # we store negative distances because heap returns smallest member
427 h = [(-dist[node], node)]
431 h = [(-dist[node], node)]
428 seen = {}
432 seen = {}
429 earliest = self.count()
433 earliest = self.count()
430 while h:
434 while h:
431 d, n = heapq.heappop(h)
435 d, n = heapq.heappop(h)
432 if n not in seen:
436 if n not in seen:
433 seen[n] = 1
437 seen[n] = 1
434 r = self.rev(n)
438 r = self.rev(n)
435 yield (-d, r, n)
439 yield (-d, r, n)
436 for p in self.parents(n):
440 for p in self.parents(n):
437 heapq.heappush(h, (-dist[p], p))
441 heapq.heappush(h, (-dist[p], p))
438
442
439 x = ancestors(a)
443 x = ancestors(a)
440 y = ancestors(b)
444 y = ancestors(b)
441 lx = x.next()
445 lx = x.next()
442 ly = y.next()
446 ly = y.next()
443
447
444 # increment each ancestor list until it is closer to root than
448 # increment each ancestor list until it is closer to root than
445 # the other, or they match
449 # the other, or they match
446 while 1:
450 while 1:
447 if lx == ly:
451 if lx == ly:
448 return lx[2]
452 return lx[2]
449 elif lx < ly:
453 elif lx < ly:
450 ly = y.next()
454 ly = y.next()
451 elif lx > ly:
455 elif lx > ly:
452 lx = x.next()
456 lx = x.next()
453
457
454 def group(self, linkmap):
458 def group(self, linkmap):
455 """calculate a delta group
459 """calculate a delta group
456
460
457 Given a list of changeset revs, return a set of deltas and
461 Given a list of changeset revs, return a set of deltas and
458 metadata corresponding to nodes. the first delta is
462 metadata corresponding to nodes. the first delta is
459 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
460 have this parent as it has all history before these
464 have this parent as it has all history before these
461 changesets. parent is parent[0]
465 changesets. parent is parent[0]
462 """
466 """
463 revs = []
467 revs = []
464 needed = {}
468 needed = {}
465
469
466 # find file nodes/revs that match changeset revs
470 # find file nodes/revs that match changeset revs
467 for i in xrange(0, self.count()):
471 for i in xrange(0, self.count()):
468 if self.index[i][3] in linkmap:
472 if self.index[i][3] in linkmap:
469 revs.append(i)
473 revs.append(i)
470 needed[i] = 1
474 needed[i] = 1
471
475
472 # if we don't have any revisions touched by these changesets, bail
476 # if we don't have any revisions touched by these changesets, bail
473 if not revs:
477 if not revs:
474 yield struct.pack(">l", 0)
478 yield struct.pack(">l", 0)
475 return
479 return
476
480
477 # add the parent of the first rev
481 # add the parent of the first rev
478 p = self.parents(self.node(revs[0]))[0]
482 p = self.parents(self.node(revs[0]))[0]
479 revs.insert(0, self.rev(p))
483 revs.insert(0, self.rev(p))
480
484
481 # for each delta that isn't contiguous in the log, we need to
485 # for each delta that isn't contiguous in the log, we need to
482 # reconstruct the base, reconstruct the result, and then
486 # reconstruct the base, reconstruct the result, and then
483 # calculate the delta. We also need to do this where we've
487 # calculate the delta. We also need to do this where we've
484 # stored a full version and not a delta
488 # stored a full version and not a delta
485 for i in xrange(0, len(revs) - 1):
489 for i in xrange(0, len(revs) - 1):
486 a, b = revs[i], revs[i + 1]
490 a, b = revs[i], revs[i + 1]
487 if a + 1 != b or self.base(b) == b:
491 if a + 1 != b or self.base(b) == b:
488 for j in xrange(self.base(a), a + 1):
492 for j in xrange(self.base(a), a + 1):
489 needed[j] = 1
493 needed[j] = 1
490 for j in xrange(self.base(b), b + 1):
494 for j in xrange(self.base(b), b + 1):
491 needed[j] = 1
495 needed[j] = 1
492
496
493 # calculate spans to retrieve from datafile
497 # calculate spans to retrieve from datafile
494 needed = needed.keys()
498 needed = needed.keys()
495 needed.sort()
499 needed.sort()
496 spans = []
500 spans = []
497 oo = -1
501 oo = -1
498 ol = 0
502 ol = 0
499 for n in needed:
503 for n in needed:
500 if n < 0: continue
504 if n < 0: continue
501 o = self.start(n)
505 o = self.start(n)
502 l = self.length(n)
506 l = self.length(n)
503 if oo + ol == o: # can we merge with the previous?
507 if oo + ol == o: # can we merge with the previous?
504 nl = spans[-1][2]
508 nl = spans[-1][2]
505 nl.append((n, l))
509 nl.append((n, l))
506 ol += l
510 ol += l
507 spans[-1] = (oo, ol, nl)
511 spans[-1] = (oo, ol, nl)
508 else:
512 else:
509 oo = o
513 oo = o
510 ol = l
514 ol = l
511 spans.append((oo, ol, [(n, l)]))
515 spans.append((oo, ol, [(n, l)]))
512
516
513 # read spans in, divide up chunks
517 # read spans in, divide up chunks
514 chunks = {}
518 chunks = {}
515 for span in spans:
519 for span in spans:
516 # we reopen the file for each span to make http happy for now
520 # we reopen the file for each span to make http happy for now
517 f = self.opener(self.datafile)
521 f = self.opener(self.datafile)
518 f.seek(span[0])
522 f.seek(span[0])
519 data = f.read(span[1])
523 data = f.read(span[1])
520
524
521 # divide up the span
525 # divide up the span
522 pos = 0
526 pos = 0
523 for r, l in span[2]:
527 for r, l in span[2]:
524 chunks[r] = decompress(data[pos: pos + l])
528 chunks[r] = decompress(data[pos: pos + l])
525 pos += l
529 pos += l
526
530
527 # helper to reconstruct intermediate versions
531 # helper to reconstruct intermediate versions
528 def construct(text, base, rev):
532 def construct(text, base, rev):
529 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
533 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
530 return mdiff.patches(text, bins)
534 return mdiff.patches(text, bins)
531
535
532 # build deltas
536 # build deltas
533 deltas = []
537 deltas = []
534 for d in xrange(0, len(revs) - 1):
538 for d in xrange(0, len(revs) - 1):
535 a, b = revs[d], revs[d + 1]
539 a, b = revs[d], revs[d + 1]
536 n = self.node(b)
540 n = self.node(b)
537
541
538 # do we need to construct a new delta?
542 # do we need to construct a new delta?
539 if a + 1 != b or self.base(b) == b:
543 if a + 1 != b or self.base(b) == b:
540 if a >= 0:
544 if a >= 0:
541 base = self.base(a)
545 base = self.base(a)
542 ta = chunks[self.base(a)]
546 ta = chunks[self.base(a)]
543 ta = construct(ta, base, a)
547 ta = construct(ta, base, a)
544 else:
548 else:
545 ta = ""
549 ta = ""
546
550
547 base = self.base(b)
551 base = self.base(b)
548 if a > base:
552 if a > base:
549 base = a
553 base = a
550 tb = ta
554 tb = ta
551 else:
555 else:
552 tb = chunks[self.base(b)]
556 tb = chunks[self.base(b)]
553 tb = construct(tb, base, b)
557 tb = construct(tb, base, b)
554 d = self.diff(ta, tb)
558 d = self.diff(ta, tb)
555 else:
559 else:
556 d = chunks[b]
560 d = chunks[b]
557
561
558 p = self.parents(n)
562 p = self.parents(n)
559 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
563 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
560 l = struct.pack(">l", len(meta) + len(d) + 4)
564 l = struct.pack(">l", len(meta) + len(d) + 4)
561 yield l
565 yield l
562 yield meta
566 yield meta
563 yield d
567 yield d
564
568
565 yield struct.pack(">l", 0)
569 yield struct.pack(">l", 0)
566
570
567 def addgroup(self, revs, linkmapper, transaction, unique=0):
571 def addgroup(self, revs, linkmapper, transaction, unique=0):
568 """
572 """
569 add a delta group
573 add a delta group
570
574
571 given a set of deltas, add them to the revision log. the
575 given a set of deltas, add them to the revision log. the
572 first delta is against its parent, which should be in our
576 first delta is against its parent, which should be in our
573 log, the rest are against the previous delta.
577 log, the rest are against the previous delta.
574 """
578 """
575
579
576 #track the base of the current delta log
580 #track the base of the current delta log
577 r = self.count()
581 r = self.count()
578 t = r - 1
582 t = r - 1
579 node = nullid
583 node = nullid
580
584
581 base = prev = -1
585 base = prev = -1
582 start = end = measure = 0
586 start = end = measure = 0
583 if r:
587 if r:
584 start = self.start(self.base(t))
588 start = self.start(self.base(t))
585 end = self.end(t)
589 end = self.end(t)
586 measure = self.length(self.base(t))
590 measure = self.length(self.base(t))
587 base = self.base(t)
591 base = self.base(t)
588 prev = self.tip()
592 prev = self.tip()
589
593
590 transaction.add(self.datafile, end)
594 transaction.add(self.datafile, end)
591 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
595 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
592 dfh = self.opener(self.datafile, "a")
596 dfh = self.opener(self.datafile, "a")
593 ifh = self.opener(self.indexfile, "a")
597 ifh = self.opener(self.indexfile, "a")
594
598
595 # loop through our set of deltas
599 # loop through our set of deltas
596 chain = None
600 chain = None
597 for chunk in revs:
601 for chunk in revs:
598 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
602 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
599 link = linkmapper(cs)
603 link = linkmapper(cs)
600 if node in self.nodemap:
604 if node in self.nodemap:
601 # this can happen if two branches make the same change
605 # this can happen if two branches make the same change
602 if unique:
606 if unique:
603 raise RevlogError("already have %s" % hex(node[:4]))
607 raise RevlogError("already have %s" % hex(node[:4]))
604 chain = node
608 chain = node
605 continue
609 continue
606 delta = chunk[80:]
610 delta = chunk[80:]
607
611
608 if not chain:
612 if not chain:
609 # retrieve the parent revision of the delta chain
613 # retrieve the parent revision of the delta chain
610 chain = p1
614 chain = p1
611 if not chain in self.nodemap:
615 if not chain in self.nodemap:
612 raise RevlogError("unknown base %s" % short(chain[:4]))
616 raise RevlogError("unknown base %s" % short(chain[:4]))
613
617
614 # full versions are inserted when the needed deltas become
618 # full versions are inserted when the needed deltas become
615 # comparable to the uncompressed text or when the previous
619 # comparable to the uncompressed text or when the previous
616 # version is not the one we have a delta against. We use
620 # version is not the one we have a delta against. We use
617 # the size of the previous full rev as a proxy for the
621 # the size of the previous full rev as a proxy for the
618 # current size.
622 # current size.
619
623
620 if chain == prev:
624 if chain == prev:
621 cdelta = compress(delta)
625 cdelta = compress(delta)
622
626
623 if chain != prev or (end - start + len(cdelta)) > measure * 2:
627 if chain != prev or (end - start + len(cdelta)) > measure * 2:
624 # flush our writes here so we can read it in revision
628 # flush our writes here so we can read it in revision
625 dfh.flush()
629 dfh.flush()
626 ifh.flush()
630 ifh.flush()
627 text = self.revision(chain)
631 text = self.revision(chain)
628 text = self.patches(text, [delta])
632 text = self.patches(text, [delta])
629 chk = self.addrevision(text, transaction, link, p1, p2)
633 chk = self.addrevision(text, transaction, link, p1, p2)
630 if chk != node:
634 if chk != node:
631 raise RevlogError("consistency error adding group")
635 raise RevlogError("consistency error adding group")
632 measure = len(text)
636 measure = len(text)
633 else:
637 else:
634 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
638 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
635 self.index.append(e)
639 self.index.append(e)
636 self.nodemap[node] = r
640 self.nodemap[node] = r
637 dfh.write(cdelta)
641 dfh.write(cdelta)
638 ifh.write(struct.pack(indexformat, *e))
642 ifh.write(struct.pack(indexformat, *e))
639
643
640 t, r, chain, prev = r, r + 1, node, node
644 t, r, chain, prev = r, r + 1, node, node
641 start = self.start(self.base(t))
645 start = self.start(self.base(t))
642 end = self.end(t)
646 end = self.end(t)
643
647
644 dfh.close()
648 dfh.close()
645 ifh.close()
649 ifh.close()
646 return node
650 return node
General Comments 0
You need to be logged in to leave comments. Login now