##// END OF EJS Templates
Fix traceback on bad revlog.lookup
Matt Mackall -
r1393:67779d34 default
parent child Browse files
Show More
@@ -1,674 +1,674
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from demandload import demandload
14 from demandload import demandload
15 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
15 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
16
16
17 def hash(text, p1, p2):
17 def hash(text, p1, p2):
18 """generate a hash from the given text and its parent hashes
18 """generate a hash from the given text and its parent hashes
19
19
20 This hash combines both the current file contents and its history
20 This hash combines both the current file contents and its history
21 in a manner that makes it easy to distinguish nodes with the same
21 in a manner that makes it easy to distinguish nodes with the same
22 content in the revision graph.
22 content in the revision graph.
23 """
23 """
24 l = [p1, p2]
24 l = [p1, p2]
25 l.sort()
25 l.sort()
26 s = sha.new(l[0])
26 s = sha.new(l[0])
27 s.update(l[1])
27 s.update(l[1])
28 s.update(text)
28 s.update(text)
29 return s.digest()
29 return s.digest()
30
30
31 def compress(text):
31 def compress(text):
32 """ generate a possibly-compressed representation of text """
32 """ generate a possibly-compressed representation of text """
33 if not text: return text
33 if not text: return text
34 if len(text) < 44:
34 if len(text) < 44:
35 if text[0] == '\0': return text
35 if text[0] == '\0': return text
36 return 'u' + text
36 return 'u' + text
37 bin = zlib.compress(text)
37 bin = zlib.compress(text)
38 if len(bin) > len(text):
38 if len(bin) > len(text):
39 if text[0] == '\0': return text
39 if text[0] == '\0': return text
40 return 'u' + text
40 return 'u' + text
41 return bin
41 return bin
42
42
43 def decompress(bin):
43 def decompress(bin):
44 """ decompress the given input """
44 """ decompress the given input """
45 if not bin: return bin
45 if not bin: return bin
46 t = bin[0]
46 t = bin[0]
47 if t == '\0': return bin
47 if t == '\0': return bin
48 if t == 'x': return zlib.decompress(bin)
48 if t == 'x': return zlib.decompress(bin)
49 if t == 'u': return bin[1:]
49 if t == 'u': return bin[1:]
50 raise RevlogError("unknown compression type %s" % t)
50 raise RevlogError("unknown compression type %s" % t)
51
51
52 indexformat = ">4l20s20s20s"
52 indexformat = ">4l20s20s20s"
53
53
54 class lazyparser:
54 class lazyparser:
55 """
55 """
56 this class avoids the need to parse the entirety of large indices
56 this class avoids the need to parse the entirety of large indices
57
57
58 By default we parse and load 1000 entries at a time.
58 By default we parse and load 1000 entries at a time.
59
59
60 If no position is specified, we load the whole index, and replace
60 If no position is specified, we load the whole index, and replace
61 the lazy objects in revlog with the underlying objects for
61 the lazy objects in revlog with the underlying objects for
62 efficiency in cases where we look at most of the nodes.
62 efficiency in cases where we look at most of the nodes.
63 """
63 """
64 def __init__(self, data, revlog):
64 def __init__(self, data, revlog):
65 self.data = data
65 self.data = data
66 self.s = struct.calcsize(indexformat)
66 self.s = struct.calcsize(indexformat)
67 self.l = len(data)/self.s
67 self.l = len(data)/self.s
68 self.index = [None] * self.l
68 self.index = [None] * self.l
69 self.map = {nullid: -1}
69 self.map = {nullid: -1}
70 self.all = 0
70 self.all = 0
71 self.revlog = revlog
71 self.revlog = revlog
72
72
73 def load(self, pos=None):
73 def load(self, pos=None):
74 if self.all: return
74 if self.all: return
75 if pos is not None:
75 if pos is not None:
76 block = pos / 1000
76 block = pos / 1000
77 i = block * 1000
77 i = block * 1000
78 end = min(self.l, i + 1000)
78 end = min(self.l, i + 1000)
79 else:
79 else:
80 self.all = 1
80 self.all = 1
81 i = 0
81 i = 0
82 end = self.l
82 end = self.l
83 self.revlog.index = self.index
83 self.revlog.index = self.index
84 self.revlog.nodemap = self.map
84 self.revlog.nodemap = self.map
85
85
86 while i < end:
86 while i < end:
87 d = self.data[i * self.s: (i + 1) * self.s]
87 d = self.data[i * self.s: (i + 1) * self.s]
88 e = struct.unpack(indexformat, d)
88 e = struct.unpack(indexformat, d)
89 self.index[i] = e
89 self.index[i] = e
90 self.map[e[6]] = i
90 self.map[e[6]] = i
91 i += 1
91 i += 1
92
92
93 class lazyindex:
93 class lazyindex:
94 """a lazy version of the index array"""
94 """a lazy version of the index array"""
95 def __init__(self, parser):
95 def __init__(self, parser):
96 self.p = parser
96 self.p = parser
97 def __len__(self):
97 def __len__(self):
98 return len(self.p.index)
98 return len(self.p.index)
99 def load(self, pos):
99 def load(self, pos):
100 self.p.load(pos)
100 self.p.load(pos)
101 return self.p.index[pos]
101 return self.p.index[pos]
102 def __getitem__(self, pos):
102 def __getitem__(self, pos):
103 return self.p.index[pos] or self.load(pos)
103 return self.p.index[pos] or self.load(pos)
104 def append(self, e):
104 def append(self, e):
105 self.p.index.append(e)
105 self.p.index.append(e)
106
106
107 class lazymap:
107 class lazymap:
108 """a lazy version of the node map"""
108 """a lazy version of the node map"""
109 def __init__(self, parser):
109 def __init__(self, parser):
110 self.p = parser
110 self.p = parser
111 def load(self, key):
111 def load(self, key):
112 if self.p.all: return
112 if self.p.all: return
113 n = self.p.data.find(key)
113 n = self.p.data.find(key)
114 if n < 0:
114 if n < 0:
115 raise KeyError(key)
115 raise KeyError(key)
116 pos = n / self.p.s
116 pos = n / self.p.s
117 self.p.load(pos)
117 self.p.load(pos)
118 def __contains__(self, key):
118 def __contains__(self, key):
119 self.p.load()
119 self.p.load()
120 return key in self.p.map
120 return key in self.p.map
121 def __iter__(self):
121 def __iter__(self):
122 yield nullid
122 yield nullid
123 for i in xrange(self.p.l):
123 for i in xrange(self.p.l):
124 try:
124 try:
125 yield self.p.index[i][6]
125 yield self.p.index[i][6]
126 except:
126 except:
127 self.p.load(i)
127 self.p.load(i)
128 yield self.p.index[i][6]
128 yield self.p.index[i][6]
129 def __getitem__(self, key):
129 def __getitem__(self, key):
130 try:
130 try:
131 return self.p.map[key]
131 return self.p.map[key]
132 except KeyError:
132 except KeyError:
133 try:
133 try:
134 self.load(key)
134 self.load(key)
135 return self.p.map[key]
135 return self.p.map[key]
136 except KeyError:
136 except KeyError:
137 raise KeyError("node " + hex(key))
137 raise KeyError("node " + hex(key))
138 def __setitem__(self, key, val):
138 def __setitem__(self, key, val):
139 self.p.map[key] = val
139 self.p.map[key] = val
140
140
141 class RevlogError(Exception): pass
141 class RevlogError(Exception): pass
142
142
143 class revlog:
143 class revlog:
144 """
144 """
145 the underlying revision storage object
145 the underlying revision storage object
146
146
147 A revlog consists of two parts, an index and the revision data.
147 A revlog consists of two parts, an index and the revision data.
148
148
149 The index is a file with a fixed record size containing
149 The index is a file with a fixed record size containing
150 information on each revision, includings its nodeid (hash), the
150 information on each revision, includings its nodeid (hash), the
151 nodeids of its parents, the position and offset of its data within
151 nodeids of its parents, the position and offset of its data within
152 the data file, and the revision it's based on. Finally, each entry
152 the data file, and the revision it's based on. Finally, each entry
153 contains a linkrev entry that can serve as a pointer to external
153 contains a linkrev entry that can serve as a pointer to external
154 data.
154 data.
155
155
156 The revision data itself is a linear collection of data chunks.
156 The revision data itself is a linear collection of data chunks.
157 Each chunk represents a revision and is usually represented as a
157 Each chunk represents a revision and is usually represented as a
158 delta against the previous chunk. To bound lookup time, runs of
158 delta against the previous chunk. To bound lookup time, runs of
159 deltas are limited to about 2 times the length of the original
159 deltas are limited to about 2 times the length of the original
160 version data. This makes retrieval of a version proportional to
160 version data. This makes retrieval of a version proportional to
161 its size, or O(1) relative to the number of revisions.
161 its size, or O(1) relative to the number of revisions.
162
162
163 Both pieces of the revlog are written to in an append-only
163 Both pieces of the revlog are written to in an append-only
164 fashion, which means we never need to rewrite a file to insert or
164 fashion, which means we never need to rewrite a file to insert or
165 remove data, and can use some simple techniques to avoid the need
165 remove data, and can use some simple techniques to avoid the need
166 for locking while reading.
166 for locking while reading.
167 """
167 """
168 def __init__(self, opener, indexfile, datafile):
168 def __init__(self, opener, indexfile, datafile):
169 """
169 """
170 create a revlog object
170 create a revlog object
171
171
172 opener is a function that abstracts the file opening operation
172 opener is a function that abstracts the file opening operation
173 and can be used to implement COW semantics or the like.
173 and can be used to implement COW semantics or the like.
174 """
174 """
175 self.indexfile = indexfile
175 self.indexfile = indexfile
176 self.datafile = datafile
176 self.datafile = datafile
177 self.opener = opener
177 self.opener = opener
178 self.cache = None
178 self.cache = None
179
179
180 try:
180 try:
181 i = self.opener(self.indexfile).read()
181 i = self.opener(self.indexfile).read()
182 except IOError, inst:
182 except IOError, inst:
183 if inst.errno != errno.ENOENT:
183 if inst.errno != errno.ENOENT:
184 raise
184 raise
185 i = ""
185 i = ""
186
186
187 if len(i) > 10000:
187 if len(i) > 10000:
188 # big index, let's parse it on demand
188 # big index, let's parse it on demand
189 parser = lazyparser(i, self)
189 parser = lazyparser(i, self)
190 self.index = lazyindex(parser)
190 self.index = lazyindex(parser)
191 self.nodemap = lazymap(parser)
191 self.nodemap = lazymap(parser)
192 else:
192 else:
193 s = struct.calcsize(indexformat)
193 s = struct.calcsize(indexformat)
194 l = len(i) / s
194 l = len(i) / s
195 self.index = [None] * l
195 self.index = [None] * l
196 m = [None] * l
196 m = [None] * l
197
197
198 n = 0
198 n = 0
199 for f in xrange(0, len(i), s):
199 for f in xrange(0, len(i), s):
200 # offset, size, base, linkrev, p1, p2, nodeid
200 # offset, size, base, linkrev, p1, p2, nodeid
201 e = struct.unpack(indexformat, i[f:f + s])
201 e = struct.unpack(indexformat, i[f:f + s])
202 m[n] = (e[6], n)
202 m[n] = (e[6], n)
203 self.index[n] = e
203 self.index[n] = e
204 n += 1
204 n += 1
205
205
206 self.nodemap = dict(m)
206 self.nodemap = dict(m)
207 self.nodemap[nullid] = -1
207 self.nodemap[nullid] = -1
208
208
209 def tip(self): return self.node(len(self.index) - 1)
209 def tip(self): return self.node(len(self.index) - 1)
210 def count(self): return len(self.index)
210 def count(self): return len(self.index)
211 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
211 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
212 def rev(self, node):
212 def rev(self, node):
213 try:
213 try:
214 return self.nodemap[node]
214 return self.nodemap[node]
215 except KeyError:
215 except KeyError:
216 raise RevlogError('%s: no node %s' % (self.indexfile, hex(node)))
216 raise RevlogError('%s: no node %s' % (self.indexfile, hex(node)))
217 def linkrev(self, node): return self.index[self.rev(node)][3]
217 def linkrev(self, node): return self.index[self.rev(node)][3]
218 def parents(self, node):
218 def parents(self, node):
219 if node == nullid: return (nullid, nullid)
219 if node == nullid: return (nullid, nullid)
220 return self.index[self.rev(node)][4:6]
220 return self.index[self.rev(node)][4:6]
221
221
222 def start(self, rev): return self.index[rev][0]
222 def start(self, rev): return self.index[rev][0]
223 def length(self, rev): return self.index[rev][1]
223 def length(self, rev): return self.index[rev][1]
224 def end(self, rev): return self.start(rev) + self.length(rev)
224 def end(self, rev): return self.start(rev) + self.length(rev)
225 def base(self, rev): return self.index[rev][2]
225 def base(self, rev): return self.index[rev][2]
226
226
227 def reachable(self, rev, stop=None):
227 def reachable(self, rev, stop=None):
228 reachable = {}
228 reachable = {}
229 visit = [rev]
229 visit = [rev]
230 reachable[rev] = 1
230 reachable[rev] = 1
231 if stop:
231 if stop:
232 stopn = self.rev(stop)
232 stopn = self.rev(stop)
233 else:
233 else:
234 stopn = 0
234 stopn = 0
235 while visit:
235 while visit:
236 n = visit.pop(0)
236 n = visit.pop(0)
237 if n == stop:
237 if n == stop:
238 continue
238 continue
239 if n == nullid:
239 if n == nullid:
240 continue
240 continue
241 for p in self.parents(n):
241 for p in self.parents(n):
242 if self.rev(p) < stopn:
242 if self.rev(p) < stopn:
243 continue
243 continue
244 if p not in reachable:
244 if p not in reachable:
245 reachable[p] = 1
245 reachable[p] = 1
246 visit.append(p)
246 visit.append(p)
247 return reachable
247 return reachable
248
248
249 def heads(self, stop=None):
249 def heads(self, stop=None):
250 """return the list of all nodes that have no children"""
250 """return the list of all nodes that have no children"""
251 p = {}
251 p = {}
252 h = []
252 h = []
253 stoprev = 0
253 stoprev = 0
254 if stop and stop in self.nodemap:
254 if stop and stop in self.nodemap:
255 stoprev = self.rev(stop)
255 stoprev = self.rev(stop)
256
256
257 for r in range(self.count() - 1, -1, -1):
257 for r in range(self.count() - 1, -1, -1):
258 n = self.node(r)
258 n = self.node(r)
259 if n not in p:
259 if n not in p:
260 h.append(n)
260 h.append(n)
261 if n == stop:
261 if n == stop:
262 break
262 break
263 if r < stoprev:
263 if r < stoprev:
264 break
264 break
265 for pn in self.parents(n):
265 for pn in self.parents(n):
266 p[pn] = 1
266 p[pn] = 1
267 return h
267 return h
268
268
269 def children(self, node):
269 def children(self, node):
270 """find the children of a given node"""
270 """find the children of a given node"""
271 c = []
271 c = []
272 p = self.rev(node)
272 p = self.rev(node)
273 for r in range(p + 1, self.count()):
273 for r in range(p + 1, self.count()):
274 n = self.node(r)
274 n = self.node(r)
275 for pn in self.parents(n):
275 for pn in self.parents(n):
276 if pn == node:
276 if pn == node:
277 c.append(n)
277 c.append(n)
278 continue
278 continue
279 elif pn == nullid:
279 elif pn == nullid:
280 continue
280 continue
281 return c
281 return c
282
282
283 def lookup(self, id):
283 def lookup(self, id):
284 """locate a node based on revision number or subset of hex nodeid"""
284 """locate a node based on revision number or subset of hex nodeid"""
285 try:
285 try:
286 rev = int(id)
286 rev = int(id)
287 if str(rev) != id: raise ValueError
287 if str(rev) != id: raise ValueError
288 if rev < 0: rev = self.count() + rev
288 if rev < 0: rev = self.count() + rev
289 if rev < 0 or rev >= self.count(): raise ValueError
289 if rev < 0 or rev >= self.count(): raise ValueError
290 return self.node(rev)
290 return self.node(rev)
291 except (ValueError, OverflowError):
291 except (ValueError, OverflowError):
292 c = []
292 c = []
293 for n in self.nodemap:
293 for n in self.nodemap:
294 if hex(n).startswith(id):
294 if hex(n).startswith(id):
295 c.append(n)
295 c.append(n)
296 if len(c) > 1: raise KeyError("Ambiguous identifier")
296 if len(c) > 1: raise RevlogError("Ambiguous identifier")
297 if len(c) < 1: raise KeyError("No match found")
297 if len(c) < 1: raise RevlogError("No match found")
298 return c[0]
298 return c[0]
299
299
300 return None
300 return None
301
301
302 def diff(self, a, b):
302 def diff(self, a, b):
303 """return a delta between two revisions"""
303 """return a delta between two revisions"""
304 return mdiff.textdiff(a, b)
304 return mdiff.textdiff(a, b)
305
305
306 def patches(self, t, pl):
306 def patches(self, t, pl):
307 """apply a list of patches to a string"""
307 """apply a list of patches to a string"""
308 return mdiff.patches(t, pl)
308 return mdiff.patches(t, pl)
309
309
310 def delta(self, node):
310 def delta(self, node):
311 """return or calculate a delta between a node and its predecessor"""
311 """return or calculate a delta between a node and its predecessor"""
312 r = self.rev(node)
312 r = self.rev(node)
313 b = self.base(r)
313 b = self.base(r)
314 if r == b:
314 if r == b:
315 return self.diff(self.revision(self.node(r - 1)),
315 return self.diff(self.revision(self.node(r - 1)),
316 self.revision(node))
316 self.revision(node))
317 else:
317 else:
318 f = self.opener(self.datafile)
318 f = self.opener(self.datafile)
319 f.seek(self.start(r))
319 f.seek(self.start(r))
320 data = f.read(self.length(r))
320 data = f.read(self.length(r))
321 return decompress(data)
321 return decompress(data)
322
322
323 def revision(self, node):
323 def revision(self, node):
324 """return an uncompressed revision of a given"""
324 """return an uncompressed revision of a given"""
325 if node == nullid: return ""
325 if node == nullid: return ""
326 if self.cache and self.cache[0] == node: return self.cache[2]
326 if self.cache and self.cache[0] == node: return self.cache[2]
327
327
328 # look up what we need to read
328 # look up what we need to read
329 text = None
329 text = None
330 rev = self.rev(node)
330 rev = self.rev(node)
331 start, length, base, link, p1, p2, node = self.index[rev]
331 start, length, base, link, p1, p2, node = self.index[rev]
332 end = start + length
332 end = start + length
333 if base != rev: start = self.start(base)
333 if base != rev: start = self.start(base)
334
334
335 # do we have useful data cached?
335 # do we have useful data cached?
336 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
336 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
337 base = self.cache[1]
337 base = self.cache[1]
338 start = self.start(base + 1)
338 start = self.start(base + 1)
339 text = self.cache[2]
339 text = self.cache[2]
340 last = 0
340 last = 0
341
341
342 f = self.opener(self.datafile)
342 f = self.opener(self.datafile)
343 f.seek(start)
343 f.seek(start)
344 data = f.read(end - start)
344 data = f.read(end - start)
345
345
346 if text is None:
346 if text is None:
347 last = self.length(base)
347 last = self.length(base)
348 text = decompress(data[:last])
348 text = decompress(data[:last])
349
349
350 bins = []
350 bins = []
351 for r in xrange(base + 1, rev + 1):
351 for r in xrange(base + 1, rev + 1):
352 s = self.length(r)
352 s = self.length(r)
353 bins.append(decompress(data[last:last + s]))
353 bins.append(decompress(data[last:last + s]))
354 last = last + s
354 last = last + s
355
355
356 text = mdiff.patches(text, bins)
356 text = mdiff.patches(text, bins)
357
357
358 if node != hash(text, p1, p2):
358 if node != hash(text, p1, p2):
359 raise RevlogError("integrity check failed on %s:%d"
359 raise RevlogError("integrity check failed on %s:%d"
360 % (self.datafile, rev))
360 % (self.datafile, rev))
361
361
362 self.cache = (node, rev, text)
362 self.cache = (node, rev, text)
363 return text
363 return text
364
364
365 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
365 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
366 """add a revision to the log
366 """add a revision to the log
367
367
368 text - the revision data to add
368 text - the revision data to add
369 transaction - the transaction object used for rollback
369 transaction - the transaction object used for rollback
370 link - the linkrev data to add
370 link - the linkrev data to add
371 p1, p2 - the parent nodeids of the revision
371 p1, p2 - the parent nodeids of the revision
372 d - an optional precomputed delta
372 d - an optional precomputed delta
373 """
373 """
374 if text is None: text = ""
374 if text is None: text = ""
375 if p1 is None: p1 = self.tip()
375 if p1 is None: p1 = self.tip()
376 if p2 is None: p2 = nullid
376 if p2 is None: p2 = nullid
377
377
378 node = hash(text, p1, p2)
378 node = hash(text, p1, p2)
379
379
380 if node in self.nodemap:
380 if node in self.nodemap:
381 return node
381 return node
382
382
383 n = self.count()
383 n = self.count()
384 t = n - 1
384 t = n - 1
385
385
386 if n:
386 if n:
387 base = self.base(t)
387 base = self.base(t)
388 start = self.start(base)
388 start = self.start(base)
389 end = self.end(t)
389 end = self.end(t)
390 if not d:
390 if not d:
391 prev = self.revision(self.tip())
391 prev = self.revision(self.tip())
392 d = self.diff(prev, text)
392 d = self.diff(prev, text)
393 data = compress(d)
393 data = compress(d)
394 dist = end - start + len(data)
394 dist = end - start + len(data)
395
395
396 # full versions are inserted when the needed deltas
396 # full versions are inserted when the needed deltas
397 # become comparable to the uncompressed text
397 # become comparable to the uncompressed text
398 if not n or dist > len(text) * 2:
398 if not n or dist > len(text) * 2:
399 data = compress(text)
399 data = compress(text)
400 base = n
400 base = n
401 else:
401 else:
402 base = self.base(t)
402 base = self.base(t)
403
403
404 offset = 0
404 offset = 0
405 if t >= 0:
405 if t >= 0:
406 offset = self.end(t)
406 offset = self.end(t)
407
407
408 e = (offset, len(data), base, link, p1, p2, node)
408 e = (offset, len(data), base, link, p1, p2, node)
409
409
410 self.index.append(e)
410 self.index.append(e)
411 self.nodemap[node] = n
411 self.nodemap[node] = n
412 entry = struct.pack(indexformat, *e)
412 entry = struct.pack(indexformat, *e)
413
413
414 transaction.add(self.datafile, e[0])
414 transaction.add(self.datafile, e[0])
415 self.opener(self.datafile, "a").write(data)
415 self.opener(self.datafile, "a").write(data)
416 transaction.add(self.indexfile, n * len(entry))
416 transaction.add(self.indexfile, n * len(entry))
417 self.opener(self.indexfile, "a").write(entry)
417 self.opener(self.indexfile, "a").write(entry)
418
418
419 self.cache = (node, n, text)
419 self.cache = (node, n, text)
420 return node
420 return node
421
421
422 def ancestor(self, a, b):
422 def ancestor(self, a, b):
423 """calculate the least common ancestor of nodes a and b"""
423 """calculate the least common ancestor of nodes a and b"""
424 # calculate the distance of every node from root
424 # calculate the distance of every node from root
425 dist = {nullid: 0}
425 dist = {nullid: 0}
426 for i in xrange(self.count()):
426 for i in xrange(self.count()):
427 n = self.node(i)
427 n = self.node(i)
428 p1, p2 = self.parents(n)
428 p1, p2 = self.parents(n)
429 dist[n] = max(dist[p1], dist[p2]) + 1
429 dist[n] = max(dist[p1], dist[p2]) + 1
430
430
431 # traverse ancestors in order of decreasing distance from root
431 # traverse ancestors in order of decreasing distance from root
432 def ancestors(node):
432 def ancestors(node):
433 # we store negative distances because heap returns smallest member
433 # we store negative distances because heap returns smallest member
434 h = [(-dist[node], node)]
434 h = [(-dist[node], node)]
435 seen = {}
435 seen = {}
436 earliest = self.count()
436 earliest = self.count()
437 while h:
437 while h:
438 d, n = heapq.heappop(h)
438 d, n = heapq.heappop(h)
439 if n not in seen:
439 if n not in seen:
440 seen[n] = 1
440 seen[n] = 1
441 r = self.rev(n)
441 r = self.rev(n)
442 yield (-d, n)
442 yield (-d, n)
443 for p in self.parents(n):
443 for p in self.parents(n):
444 heapq.heappush(h, (-dist[p], p))
444 heapq.heappush(h, (-dist[p], p))
445
445
446 def generations(node):
446 def generations(node):
447 sg, s = None, {}
447 sg, s = None, {}
448 for g,n in ancestors(node):
448 for g,n in ancestors(node):
449 if g != sg:
449 if g != sg:
450 if sg:
450 if sg:
451 yield sg, s
451 yield sg, s
452 sg, s = g, {n:1}
452 sg, s = g, {n:1}
453 else:
453 else:
454 s[n] = 1
454 s[n] = 1
455 yield sg, s
455 yield sg, s
456
456
457 x = generations(a)
457 x = generations(a)
458 y = generations(b)
458 y = generations(b)
459 gx = x.next()
459 gx = x.next()
460 gy = y.next()
460 gy = y.next()
461
461
462 # increment each ancestor list until it is closer to root than
462 # increment each ancestor list until it is closer to root than
463 # the other, or they match
463 # the other, or they match
464 while 1:
464 while 1:
465 #print "ancestor gen %s %s" % (gx[0], gy[0])
465 #print "ancestor gen %s %s" % (gx[0], gy[0])
466 if gx[0] == gy[0]:
466 if gx[0] == gy[0]:
467 # find the intersection
467 # find the intersection
468 i = [ n for n in gx[1] if n in gy[1] ]
468 i = [ n for n in gx[1] if n in gy[1] ]
469 if i:
469 if i:
470 return i[0]
470 return i[0]
471 else:
471 else:
472 #print "next"
472 #print "next"
473 gy = y.next()
473 gy = y.next()
474 gx = x.next()
474 gx = x.next()
475 elif gx[0] < gy[0]:
475 elif gx[0] < gy[0]:
476 #print "next y"
476 #print "next y"
477 gy = y.next()
477 gy = y.next()
478 else:
478 else:
479 #print "next x"
479 #print "next x"
480 gx = x.next()
480 gx = x.next()
481
481
482 def group(self, linkmap):
482 def group(self, linkmap):
483 """calculate a delta group
483 """calculate a delta group
484
484
485 Given a list of changeset revs, return a set of deltas and
485 Given a list of changeset revs, return a set of deltas and
486 metadata corresponding to nodes. the first delta is
486 metadata corresponding to nodes. the first delta is
487 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
487 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
488 have this parent as it has all history before these
488 have this parent as it has all history before these
489 changesets. parent is parent[0]
489 changesets. parent is parent[0]
490 """
490 """
491 revs = []
491 revs = []
492 needed = {}
492 needed = {}
493
493
494 # find file nodes/revs that match changeset revs
494 # find file nodes/revs that match changeset revs
495 for i in xrange(0, self.count()):
495 for i in xrange(0, self.count()):
496 if self.index[i][3] in linkmap:
496 if self.index[i][3] in linkmap:
497 revs.append(i)
497 revs.append(i)
498 needed[i] = 1
498 needed[i] = 1
499
499
500 # if we don't have any revisions touched by these changesets, bail
500 # if we don't have any revisions touched by these changesets, bail
501 if not revs:
501 if not revs:
502 yield struct.pack(">l", 0)
502 yield struct.pack(">l", 0)
503 return
503 return
504
504
505 # add the parent of the first rev
505 # add the parent of the first rev
506 p = self.parents(self.node(revs[0]))[0]
506 p = self.parents(self.node(revs[0]))[0]
507 revs.insert(0, self.rev(p))
507 revs.insert(0, self.rev(p))
508
508
509 # for each delta that isn't contiguous in the log, we need to
509 # for each delta that isn't contiguous in the log, we need to
510 # reconstruct the base, reconstruct the result, and then
510 # reconstruct the base, reconstruct the result, and then
511 # calculate the delta. We also need to do this where we've
511 # calculate the delta. We also need to do this where we've
512 # stored a full version and not a delta
512 # stored a full version and not a delta
513 for i in xrange(0, len(revs) - 1):
513 for i in xrange(0, len(revs) - 1):
514 a, b = revs[i], revs[i + 1]
514 a, b = revs[i], revs[i + 1]
515 if a + 1 != b or self.base(b) == b:
515 if a + 1 != b or self.base(b) == b:
516 for j in xrange(self.base(a), a + 1):
516 for j in xrange(self.base(a), a + 1):
517 needed[j] = 1
517 needed[j] = 1
518 for j in xrange(self.base(b), b + 1):
518 for j in xrange(self.base(b), b + 1):
519 needed[j] = 1
519 needed[j] = 1
520
520
521 # calculate spans to retrieve from datafile
521 # calculate spans to retrieve from datafile
522 needed = needed.keys()
522 needed = needed.keys()
523 needed.sort()
523 needed.sort()
524 spans = []
524 spans = []
525 oo = -1
525 oo = -1
526 ol = 0
526 ol = 0
527 for n in needed:
527 for n in needed:
528 if n < 0: continue
528 if n < 0: continue
529 o = self.start(n)
529 o = self.start(n)
530 l = self.length(n)
530 l = self.length(n)
531 if oo + ol == o: # can we merge with the previous?
531 if oo + ol == o: # can we merge with the previous?
532 nl = spans[-1][2]
532 nl = spans[-1][2]
533 nl.append((n, l))
533 nl.append((n, l))
534 ol += l
534 ol += l
535 spans[-1] = (oo, ol, nl)
535 spans[-1] = (oo, ol, nl)
536 else:
536 else:
537 oo = o
537 oo = o
538 ol = l
538 ol = l
539 spans.append((oo, ol, [(n, l)]))
539 spans.append((oo, ol, [(n, l)]))
540
540
541 # read spans in, divide up chunks
541 # read spans in, divide up chunks
542 chunks = {}
542 chunks = {}
543 for span in spans:
543 for span in spans:
544 # we reopen the file for each span to make http happy for now
544 # we reopen the file for each span to make http happy for now
545 f = self.opener(self.datafile)
545 f = self.opener(self.datafile)
546 f.seek(span[0])
546 f.seek(span[0])
547 data = f.read(span[1])
547 data = f.read(span[1])
548
548
549 # divide up the span
549 # divide up the span
550 pos = 0
550 pos = 0
551 for r, l in span[2]:
551 for r, l in span[2]:
552 chunks[r] = decompress(data[pos: pos + l])
552 chunks[r] = decompress(data[pos: pos + l])
553 pos += l
553 pos += l
554
554
555 # helper to reconstruct intermediate versions
555 # helper to reconstruct intermediate versions
556 def construct(text, base, rev):
556 def construct(text, base, rev):
557 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
557 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
558 return mdiff.patches(text, bins)
558 return mdiff.patches(text, bins)
559
559
560 # build deltas
560 # build deltas
561 deltas = []
561 deltas = []
562 for d in xrange(0, len(revs) - 1):
562 for d in xrange(0, len(revs) - 1):
563 a, b = revs[d], revs[d + 1]
563 a, b = revs[d], revs[d + 1]
564 n = self.node(b)
564 n = self.node(b)
565
565
566 # do we need to construct a new delta?
566 # do we need to construct a new delta?
567 if a + 1 != b or self.base(b) == b:
567 if a + 1 != b or self.base(b) == b:
568 if a >= 0:
568 if a >= 0:
569 base = self.base(a)
569 base = self.base(a)
570 ta = chunks[self.base(a)]
570 ta = chunks[self.base(a)]
571 ta = construct(ta, base, a)
571 ta = construct(ta, base, a)
572 else:
572 else:
573 ta = ""
573 ta = ""
574
574
575 base = self.base(b)
575 base = self.base(b)
576 if a > base:
576 if a > base:
577 base = a
577 base = a
578 tb = ta
578 tb = ta
579 else:
579 else:
580 tb = chunks[self.base(b)]
580 tb = chunks[self.base(b)]
581 tb = construct(tb, base, b)
581 tb = construct(tb, base, b)
582 d = self.diff(ta, tb)
582 d = self.diff(ta, tb)
583 else:
583 else:
584 d = chunks[b]
584 d = chunks[b]
585
585
586 p = self.parents(n)
586 p = self.parents(n)
587 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
587 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
588 l = struct.pack(">l", len(meta) + len(d) + 4)
588 l = struct.pack(">l", len(meta) + len(d) + 4)
589 yield l
589 yield l
590 yield meta
590 yield meta
591 yield d
591 yield d
592
592
593 yield struct.pack(">l", 0)
593 yield struct.pack(">l", 0)
594
594
595 def addgroup(self, revs, linkmapper, transaction, unique=0):
595 def addgroup(self, revs, linkmapper, transaction, unique=0):
596 """
596 """
597 add a delta group
597 add a delta group
598
598
599 given a set of deltas, add them to the revision log. the
599 given a set of deltas, add them to the revision log. the
600 first delta is against its parent, which should be in our
600 first delta is against its parent, which should be in our
601 log, the rest are against the previous delta.
601 log, the rest are against the previous delta.
602 """
602 """
603
603
604 #track the base of the current delta log
604 #track the base of the current delta log
605 r = self.count()
605 r = self.count()
606 t = r - 1
606 t = r - 1
607 node = nullid
607 node = nullid
608
608
609 base = prev = -1
609 base = prev = -1
610 start = end = measure = 0
610 start = end = measure = 0
611 if r:
611 if r:
612 start = self.start(self.base(t))
612 start = self.start(self.base(t))
613 end = self.end(t)
613 end = self.end(t)
614 measure = self.length(self.base(t))
614 measure = self.length(self.base(t))
615 base = self.base(t)
615 base = self.base(t)
616 prev = self.tip()
616 prev = self.tip()
617
617
618 transaction.add(self.datafile, end)
618 transaction.add(self.datafile, end)
619 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
619 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
620 dfh = self.opener(self.datafile, "a")
620 dfh = self.opener(self.datafile, "a")
621 ifh = self.opener(self.indexfile, "a")
621 ifh = self.opener(self.indexfile, "a")
622
622
623 # loop through our set of deltas
623 # loop through our set of deltas
624 chain = None
624 chain = None
625 for chunk in revs:
625 for chunk in revs:
626 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
626 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
627 link = linkmapper(cs)
627 link = linkmapper(cs)
628 if node in self.nodemap:
628 if node in self.nodemap:
629 # this can happen if two branches make the same change
629 # this can happen if two branches make the same change
630 # if unique:
630 # if unique:
631 # raise RevlogError("already have %s" % hex(node[:4]))
631 # raise RevlogError("already have %s" % hex(node[:4]))
632 chain = node
632 chain = node
633 continue
633 continue
634 delta = chunk[80:]
634 delta = chunk[80:]
635
635
636 if not chain:
636 if not chain:
637 # retrieve the parent revision of the delta chain
637 # retrieve the parent revision of the delta chain
638 chain = p1
638 chain = p1
639 if not chain in self.nodemap:
639 if not chain in self.nodemap:
640 raise RevlogError("unknown base %s" % short(chain[:4]))
640 raise RevlogError("unknown base %s" % short(chain[:4]))
641
641
642 # full versions are inserted when the needed deltas become
642 # full versions are inserted when the needed deltas become
643 # comparable to the uncompressed text or when the previous
643 # comparable to the uncompressed text or when the previous
644 # version is not the one we have a delta against. We use
644 # version is not the one we have a delta against. We use
645 # the size of the previous full rev as a proxy for the
645 # the size of the previous full rev as a proxy for the
646 # current size.
646 # current size.
647
647
648 if chain == prev:
648 if chain == prev:
649 cdelta = compress(delta)
649 cdelta = compress(delta)
650
650
651 if chain != prev or (end - start + len(cdelta)) > measure * 2:
651 if chain != prev or (end - start + len(cdelta)) > measure * 2:
652 # flush our writes here so we can read it in revision
652 # flush our writes here so we can read it in revision
653 dfh.flush()
653 dfh.flush()
654 ifh.flush()
654 ifh.flush()
655 text = self.revision(chain)
655 text = self.revision(chain)
656 text = self.patches(text, [delta])
656 text = self.patches(text, [delta])
657 chk = self.addrevision(text, transaction, link, p1, p2)
657 chk = self.addrevision(text, transaction, link, p1, p2)
658 if chk != node:
658 if chk != node:
659 raise RevlogError("consistency error adding group")
659 raise RevlogError("consistency error adding group")
660 measure = len(text)
660 measure = len(text)
661 else:
661 else:
662 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
662 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
663 self.index.append(e)
663 self.index.append(e)
664 self.nodemap[node] = r
664 self.nodemap[node] = r
665 dfh.write(cdelta)
665 dfh.write(cdelta)
666 ifh.write(struct.pack(indexformat, *e))
666 ifh.write(struct.pack(indexformat, *e))
667
667
668 t, r, chain, prev = r, r + 1, node, node
668 t, r, chain, prev = r, r + 1, node, node
669 start = self.start(self.base(t))
669 start = self.start(self.base(t))
670 end = self.end(t)
670 end = self.end(t)
671
671
672 dfh.close()
672 dfh.close()
673 ifh.close()
673 ifh.close()
674 return node
674 return node
General Comments 0
You need to be logged in to leave comments. Login now