##// END OF EJS Templates
Make revlog constructor more discerning in its treatment of errors.
Bryan O'Sullivan -
r1322:b3d44e9b default
parent child Browse files
Show More
@@ -1,651 +1,655 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 import zlib, struct, sha, binascii, heapq
14 import mdiff
15 from node import *
13 from node import *
14 from demandload import demandload
15 demandload(globals(), "binascii errno heapq mdiff sha struct urllib2 zlib")
16
16
17 def hash(text, p1, p2):
17 def hash(text, p1, p2):
18 """generate a hash from the given text and its parent hashes
18 """generate a hash from the given text and its parent hashes
19
19
20 This hash combines both the current file contents and its history
20 This hash combines both the current file contents and its history
21 in a manner that makes it easy to distinguish nodes with the same
21 in a manner that makes it easy to distinguish nodes with the same
22 content in the revision graph.
22 content in the revision graph.
23 """
23 """
24 l = [p1, p2]
24 l = [p1, p2]
25 l.sort()
25 l.sort()
26 s = sha.new(l[0])
26 s = sha.new(l[0])
27 s.update(l[1])
27 s.update(l[1])
28 s.update(text)
28 s.update(text)
29 return s.digest()
29 return s.digest()
30
30
31 def compress(text):
31 def compress(text):
32 """ generate a possibly-compressed representation of text """
32 """ generate a possibly-compressed representation of text """
33 if not text: return text
33 if not text: return text
34 if len(text) < 44:
34 if len(text) < 44:
35 if text[0] == '\0': return text
35 if text[0] == '\0': return text
36 return 'u' + text
36 return 'u' + text
37 bin = zlib.compress(text)
37 bin = zlib.compress(text)
38 if len(bin) > len(text):
38 if len(bin) > len(text):
39 if text[0] == '\0': return text
39 if text[0] == '\0': return text
40 return 'u' + text
40 return 'u' + text
41 return bin
41 return bin
42
42
43 def decompress(bin):
43 def decompress(bin):
44 """ decompress the given input """
44 """ decompress the given input """
45 if not bin: return bin
45 if not bin: return bin
46 t = bin[0]
46 t = bin[0]
47 if t == '\0': return bin
47 if t == '\0': return bin
48 if t == 'x': return zlib.decompress(bin)
48 if t == 'x': return zlib.decompress(bin)
49 if t == 'u': return bin[1:]
49 if t == 'u': return bin[1:]
50 raise RevlogError("unknown compression type %s" % t)
50 raise RevlogError("unknown compression type %s" % t)
51
51
52 indexformat = ">4l20s20s20s"
52 indexformat = ">4l20s20s20s"
53
53
54 class lazyparser:
54 class lazyparser:
55 """
55 """
56 this class avoids the need to parse the entirety of large indices
56 this class avoids the need to parse the entirety of large indices
57
57
58 By default we parse and load 1000 entries at a time.
58 By default we parse and load 1000 entries at a time.
59
59
60 If no position is specified, we load the whole index, and replace
60 If no position is specified, we load the whole index, and replace
61 the lazy objects in revlog with the underlying objects for
61 the lazy objects in revlog with the underlying objects for
62 efficiency in cases where we look at most of the nodes.
62 efficiency in cases where we look at most of the nodes.
63 """
63 """
64 def __init__(self, data, revlog):
64 def __init__(self, data, revlog):
65 self.data = data
65 self.data = data
66 self.s = struct.calcsize(indexformat)
66 self.s = struct.calcsize(indexformat)
67 self.l = len(data)/self.s
67 self.l = len(data)/self.s
68 self.index = [None] * self.l
68 self.index = [None] * self.l
69 self.map = {nullid: -1}
69 self.map = {nullid: -1}
70 self.all = 0
70 self.all = 0
71 self.revlog = revlog
71 self.revlog = revlog
72
72
73 def load(self, pos=None):
73 def load(self, pos=None):
74 if self.all: return
74 if self.all: return
75 if pos is not None:
75 if pos is not None:
76 block = pos / 1000
76 block = pos / 1000
77 i = block * 1000
77 i = block * 1000
78 end = min(self.l, i + 1000)
78 end = min(self.l, i + 1000)
79 else:
79 else:
80 self.all = 1
80 self.all = 1
81 i = 0
81 i = 0
82 end = self.l
82 end = self.l
83 self.revlog.index = self.index
83 self.revlog.index = self.index
84 self.revlog.nodemap = self.map
84 self.revlog.nodemap = self.map
85
85
86 while i < end:
86 while i < end:
87 d = self.data[i * self.s: (i + 1) * self.s]
87 d = self.data[i * self.s: (i + 1) * self.s]
88 e = struct.unpack(indexformat, d)
88 e = struct.unpack(indexformat, d)
89 self.index[i] = e
89 self.index[i] = e
90 self.map[e[6]] = i
90 self.map[e[6]] = i
91 i += 1
91 i += 1
92
92
93 class lazyindex:
93 class lazyindex:
94 """a lazy version of the index array"""
94 """a lazy version of the index array"""
95 def __init__(self, parser):
95 def __init__(self, parser):
96 self.p = parser
96 self.p = parser
97 def __len__(self):
97 def __len__(self):
98 return len(self.p.index)
98 return len(self.p.index)
99 def load(self, pos):
99 def load(self, pos):
100 self.p.load(pos)
100 self.p.load(pos)
101 return self.p.index[pos]
101 return self.p.index[pos]
102 def __getitem__(self, pos):
102 def __getitem__(self, pos):
103 return self.p.index[pos] or self.load(pos)
103 return self.p.index[pos] or self.load(pos)
104 def append(self, e):
104 def append(self, e):
105 self.p.index.append(e)
105 self.p.index.append(e)
106
106
107 class lazymap:
107 class lazymap:
108 """a lazy version of the node map"""
108 """a lazy version of the node map"""
109 def __init__(self, parser):
109 def __init__(self, parser):
110 self.p = parser
110 self.p = parser
111 def load(self, key):
111 def load(self, key):
112 if self.p.all: return
112 if self.p.all: return
113 n = self.p.data.find(key)
113 n = self.p.data.find(key)
114 if n < 0:
114 if n < 0:
115 raise KeyError(key)
115 raise KeyError(key)
116 pos = n / self.p.s
116 pos = n / self.p.s
117 self.p.load(pos)
117 self.p.load(pos)
118 def __contains__(self, key):
118 def __contains__(self, key):
119 self.p.load()
119 self.p.load()
120 return key in self.p.map
120 return key in self.p.map
121 def __iter__(self):
121 def __iter__(self):
122 yield nullid
122 yield nullid
123 for i in xrange(self.p.l):
123 for i in xrange(self.p.l):
124 try:
124 try:
125 yield self.p.index[i][6]
125 yield self.p.index[i][6]
126 except:
126 except:
127 self.p.load(i)
127 self.p.load(i)
128 yield self.p.index[i][6]
128 yield self.p.index[i][6]
129 def __getitem__(self, key):
129 def __getitem__(self, key):
130 try:
130 try:
131 return self.p.map[key]
131 return self.p.map[key]
132 except KeyError:
132 except KeyError:
133 try:
133 try:
134 self.load(key)
134 self.load(key)
135 return self.p.map[key]
135 return self.p.map[key]
136 except KeyError:
136 except KeyError:
137 raise KeyError("node " + hex(key))
137 raise KeyError("node " + hex(key))
138 def __setitem__(self, key, val):
138 def __setitem__(self, key, val):
139 self.p.map[key] = val
139 self.p.map[key] = val
140
140
141 class RevlogError(Exception): pass
141 class RevlogError(Exception): pass
142
142
143 class revlog:
143 class revlog:
144 """
144 """
145 the underlying revision storage object
145 the underlying revision storage object
146
146
147 A revlog consists of two parts, an index and the revision data.
147 A revlog consists of two parts, an index and the revision data.
148
148
149 The index is a file with a fixed record size containing
149 The index is a file with a fixed record size containing
150 information on each revision, includings its nodeid (hash), the
150 information on each revision, includings its nodeid (hash), the
151 nodeids of its parents, the position and offset of its data within
151 nodeids of its parents, the position and offset of its data within
152 the data file, and the revision it's based on. Finally, each entry
152 the data file, and the revision it's based on. Finally, each entry
153 contains a linkrev entry that can serve as a pointer to external
153 contains a linkrev entry that can serve as a pointer to external
154 data.
154 data.
155
155
156 The revision data itself is a linear collection of data chunks.
156 The revision data itself is a linear collection of data chunks.
157 Each chunk represents a revision and is usually represented as a
157 Each chunk represents a revision and is usually represented as a
158 delta against the previous chunk. To bound lookup time, runs of
158 delta against the previous chunk. To bound lookup time, runs of
159 deltas are limited to about 2 times the length of the original
159 deltas are limited to about 2 times the length of the original
160 version data. This makes retrieval of a version proportional to
160 version data. This makes retrieval of a version proportional to
161 its size, or O(1) relative to the number of revisions.
161 its size, or O(1) relative to the number of revisions.
162
162
163 Both pieces of the revlog are written to in an append-only
163 Both pieces of the revlog are written to in an append-only
164 fashion, which means we never need to rewrite a file to insert or
164 fashion, which means we never need to rewrite a file to insert or
165 remove data, and can use some simple techniques to avoid the need
165 remove data, and can use some simple techniques to avoid the need
166 for locking while reading.
166 for locking while reading.
167 """
167 """
168 def __init__(self, opener, indexfile, datafile):
168 def __init__(self, opener, indexfile, datafile):
169 """
169 """
170 create a revlog object
170 create a revlog object
171
171
172 opener is a function that abstracts the file opening operation
172 opener is a function that abstracts the file opening operation
173 and can be used to implement COW semantics or the like.
173 and can be used to implement COW semantics or the like.
174 """
174 """
175 self.indexfile = indexfile
175 self.indexfile = indexfile
176 self.datafile = datafile
176 self.datafile = datafile
177 self.opener = opener
177 self.opener = opener
178 self.cache = None
178 self.cache = None
179
179
180 try:
180 try:
181 i = self.opener(self.indexfile).read()
181 i = self.opener(self.indexfile).read()
182 except IOError:
182 except urllib2.URLError:
183 raise
184 except IOError, inst:
185 if inst.errno != errno.ENOENT:
186 raise
183 i = ""
187 i = ""
184
188
185 if len(i) > 10000:
189 if len(i) > 10000:
186 # big index, let's parse it on demand
190 # big index, let's parse it on demand
187 parser = lazyparser(i, self)
191 parser = lazyparser(i, self)
188 self.index = lazyindex(parser)
192 self.index = lazyindex(parser)
189 self.nodemap = lazymap(parser)
193 self.nodemap = lazymap(parser)
190 else:
194 else:
191 s = struct.calcsize(indexformat)
195 s = struct.calcsize(indexformat)
192 l = len(i) / s
196 l = len(i) / s
193 self.index = [None] * l
197 self.index = [None] * l
194 m = [None] * l
198 m = [None] * l
195
199
196 n = 0
200 n = 0
197 for f in xrange(0, len(i), s):
201 for f in xrange(0, len(i), s):
198 # offset, size, base, linkrev, p1, p2, nodeid
202 # offset, size, base, linkrev, p1, p2, nodeid
199 e = struct.unpack(indexformat, i[f:f + s])
203 e = struct.unpack(indexformat, i[f:f + s])
200 m[n] = (e[6], n)
204 m[n] = (e[6], n)
201 self.index[n] = e
205 self.index[n] = e
202 n += 1
206 n += 1
203
207
204 self.nodemap = dict(m)
208 self.nodemap = dict(m)
205 self.nodemap[nullid] = -1
209 self.nodemap[nullid] = -1
206
210
207 def tip(self): return self.node(len(self.index) - 1)
211 def tip(self): return self.node(len(self.index) - 1)
208 def count(self): return len(self.index)
212 def count(self): return len(self.index)
209 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
213 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
210 def rev(self, node):
214 def rev(self, node):
211 try:
215 try:
212 return self.nodemap[node]
216 return self.nodemap[node]
213 except KeyError:
217 except KeyError:
214 raise RevlogError('%s: no node %s' % (self.indexfile, hex(node)))
218 raise RevlogError('%s: no node %s' % (self.indexfile, hex(node)))
215 def linkrev(self, node): return self.index[self.rev(node)][3]
219 def linkrev(self, node): return self.index[self.rev(node)][3]
216 def parents(self, node):
220 def parents(self, node):
217 if node == nullid: return (nullid, nullid)
221 if node == nullid: return (nullid, nullid)
218 return self.index[self.rev(node)][4:6]
222 return self.index[self.rev(node)][4:6]
219
223
220 def start(self, rev): return self.index[rev][0]
224 def start(self, rev): return self.index[rev][0]
221 def length(self, rev): return self.index[rev][1]
225 def length(self, rev): return self.index[rev][1]
222 def end(self, rev): return self.start(rev) + self.length(rev)
226 def end(self, rev): return self.start(rev) + self.length(rev)
223 def base(self, rev): return self.index[rev][2]
227 def base(self, rev): return self.index[rev][2]
224
228
225 def reachable(self, rev, stop=None):
229 def reachable(self, rev, stop=None):
226 reachable = {}
230 reachable = {}
227 visit = [rev]
231 visit = [rev]
228 reachable[rev] = 1
232 reachable[rev] = 1
229 if stop:
233 if stop:
230 stopn = self.rev(stop)
234 stopn = self.rev(stop)
231 else:
235 else:
232 stopn = 0
236 stopn = 0
233 while visit:
237 while visit:
234 n = visit.pop(0)
238 n = visit.pop(0)
235 if n == stop:
239 if n == stop:
236 continue
240 continue
237 if n == nullid:
241 if n == nullid:
238 continue
242 continue
239 for p in self.parents(n):
243 for p in self.parents(n):
240 if self.rev(p) < stopn:
244 if self.rev(p) < stopn:
241 continue
245 continue
242 if p not in reachable:
246 if p not in reachable:
243 reachable[p] = 1
247 reachable[p] = 1
244 visit.append(p)
248 visit.append(p)
245 return reachable
249 return reachable
246
250
247 def heads(self, stop=None):
251 def heads(self, stop=None):
248 """return the list of all nodes that have no children"""
252 """return the list of all nodes that have no children"""
249 p = {}
253 p = {}
250 h = []
254 h = []
251 stoprev = 0
255 stoprev = 0
252 if stop and stop in self.nodemap:
256 if stop and stop in self.nodemap:
253 stoprev = self.rev(stop)
257 stoprev = self.rev(stop)
254
258
255 for r in range(self.count() - 1, -1, -1):
259 for r in range(self.count() - 1, -1, -1):
256 n = self.node(r)
260 n = self.node(r)
257 if n not in p:
261 if n not in p:
258 h.append(n)
262 h.append(n)
259 if n == stop:
263 if n == stop:
260 break
264 break
261 if r < stoprev:
265 if r < stoprev:
262 break
266 break
263 for pn in self.parents(n):
267 for pn in self.parents(n):
264 p[pn] = 1
268 p[pn] = 1
265 return h
269 return h
266
270
267 def children(self, node):
271 def children(self, node):
268 """find the children of a given node"""
272 """find the children of a given node"""
269 c = []
273 c = []
270 p = self.rev(node)
274 p = self.rev(node)
271 for r in range(p + 1, self.count()):
275 for r in range(p + 1, self.count()):
272 n = self.node(r)
276 n = self.node(r)
273 for pn in self.parents(n):
277 for pn in self.parents(n):
274 if pn == node:
278 if pn == node:
275 c.append(n)
279 c.append(n)
276 continue
280 continue
277 elif pn == nullid:
281 elif pn == nullid:
278 continue
282 continue
279 return c
283 return c
280
284
281 def lookup(self, id):
285 def lookup(self, id):
282 """locate a node based on revision number or subset of hex nodeid"""
286 """locate a node based on revision number or subset of hex nodeid"""
283 try:
287 try:
284 rev = int(id)
288 rev = int(id)
285 if str(rev) != id: raise ValueError
289 if str(rev) != id: raise ValueError
286 if rev < 0: rev = self.count() + rev
290 if rev < 0: rev = self.count() + rev
287 if rev < 0 or rev >= self.count(): raise ValueError
291 if rev < 0 or rev >= self.count(): raise ValueError
288 return self.node(rev)
292 return self.node(rev)
289 except (ValueError, OverflowError):
293 except (ValueError, OverflowError):
290 c = []
294 c = []
291 for n in self.nodemap:
295 for n in self.nodemap:
292 if hex(n).startswith(id):
296 if hex(n).startswith(id):
293 c.append(n)
297 c.append(n)
294 if len(c) > 1: raise KeyError("Ambiguous identifier")
298 if len(c) > 1: raise KeyError("Ambiguous identifier")
295 if len(c) < 1: raise KeyError("No match found")
299 if len(c) < 1: raise KeyError("No match found")
296 return c[0]
300 return c[0]
297
301
298 return None
302 return None
299
303
300 def diff(self, a, b):
304 def diff(self, a, b):
301 """return a delta between two revisions"""
305 """return a delta between two revisions"""
302 return mdiff.textdiff(a, b)
306 return mdiff.textdiff(a, b)
303
307
304 def patches(self, t, pl):
308 def patches(self, t, pl):
305 """apply a list of patches to a string"""
309 """apply a list of patches to a string"""
306 return mdiff.patches(t, pl)
310 return mdiff.patches(t, pl)
307
311
308 def delta(self, node):
312 def delta(self, node):
309 """return or calculate a delta between a node and its predecessor"""
313 """return or calculate a delta between a node and its predecessor"""
310 r = self.rev(node)
314 r = self.rev(node)
311 b = self.base(r)
315 b = self.base(r)
312 if r == b:
316 if r == b:
313 return self.diff(self.revision(self.node(r - 1)),
317 return self.diff(self.revision(self.node(r - 1)),
314 self.revision(node))
318 self.revision(node))
315 else:
319 else:
316 f = self.opener(self.datafile)
320 f = self.opener(self.datafile)
317 f.seek(self.start(r))
321 f.seek(self.start(r))
318 data = f.read(self.length(r))
322 data = f.read(self.length(r))
319 return decompress(data)
323 return decompress(data)
320
324
321 def revision(self, node):
325 def revision(self, node):
322 """return an uncompressed revision of a given"""
326 """return an uncompressed revision of a given"""
323 if node == nullid: return ""
327 if node == nullid: return ""
324 if self.cache and self.cache[0] == node: return self.cache[2]
328 if self.cache and self.cache[0] == node: return self.cache[2]
325
329
326 # look up what we need to read
330 # look up what we need to read
327 text = None
331 text = None
328 rev = self.rev(node)
332 rev = self.rev(node)
329 start, length, base, link, p1, p2, node = self.index[rev]
333 start, length, base, link, p1, p2, node = self.index[rev]
330 end = start + length
334 end = start + length
331 if base != rev: start = self.start(base)
335 if base != rev: start = self.start(base)
332
336
333 # do we have useful data cached?
337 # do we have useful data cached?
334 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
338 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
335 base = self.cache[1]
339 base = self.cache[1]
336 start = self.start(base + 1)
340 start = self.start(base + 1)
337 text = self.cache[2]
341 text = self.cache[2]
338 last = 0
342 last = 0
339
343
340 f = self.opener(self.datafile)
344 f = self.opener(self.datafile)
341 f.seek(start)
345 f.seek(start)
342 data = f.read(end - start)
346 data = f.read(end - start)
343
347
344 if text is None:
348 if text is None:
345 last = self.length(base)
349 last = self.length(base)
346 text = decompress(data[:last])
350 text = decompress(data[:last])
347
351
348 bins = []
352 bins = []
349 for r in xrange(base + 1, rev + 1):
353 for r in xrange(base + 1, rev + 1):
350 s = self.length(r)
354 s = self.length(r)
351 bins.append(decompress(data[last:last + s]))
355 bins.append(decompress(data[last:last + s]))
352 last = last + s
356 last = last + s
353
357
354 text = mdiff.patches(text, bins)
358 text = mdiff.patches(text, bins)
355
359
356 if node != hash(text, p1, p2):
360 if node != hash(text, p1, p2):
357 raise RevlogError("integrity check failed on %s:%d"
361 raise RevlogError("integrity check failed on %s:%d"
358 % (self.datafile, rev))
362 % (self.datafile, rev))
359
363
360 self.cache = (node, rev, text)
364 self.cache = (node, rev, text)
361 return text
365 return text
362
366
363 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
367 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
364 """add a revision to the log
368 """add a revision to the log
365
369
366 text - the revision data to add
370 text - the revision data to add
367 transaction - the transaction object used for rollback
371 transaction - the transaction object used for rollback
368 link - the linkrev data to add
372 link - the linkrev data to add
369 p1, p2 - the parent nodeids of the revision
373 p1, p2 - the parent nodeids of the revision
370 d - an optional precomputed delta
374 d - an optional precomputed delta
371 """
375 """
372 if text is None: text = ""
376 if text is None: text = ""
373 if p1 is None: p1 = self.tip()
377 if p1 is None: p1 = self.tip()
374 if p2 is None: p2 = nullid
378 if p2 is None: p2 = nullid
375
379
376 node = hash(text, p1, p2)
380 node = hash(text, p1, p2)
377
381
378 if node in self.nodemap:
382 if node in self.nodemap:
379 return node
383 return node
380
384
381 n = self.count()
385 n = self.count()
382 t = n - 1
386 t = n - 1
383
387
384 if n:
388 if n:
385 base = self.base(t)
389 base = self.base(t)
386 start = self.start(base)
390 start = self.start(base)
387 end = self.end(t)
391 end = self.end(t)
388 if not d:
392 if not d:
389 prev = self.revision(self.tip())
393 prev = self.revision(self.tip())
390 d = self.diff(prev, text)
394 d = self.diff(prev, text)
391 data = compress(d)
395 data = compress(d)
392 dist = end - start + len(data)
396 dist = end - start + len(data)
393
397
394 # full versions are inserted when the needed deltas
398 # full versions are inserted when the needed deltas
395 # become comparable to the uncompressed text
399 # become comparable to the uncompressed text
396 if not n or dist > len(text) * 2:
400 if not n or dist > len(text) * 2:
397 data = compress(text)
401 data = compress(text)
398 base = n
402 base = n
399 else:
403 else:
400 base = self.base(t)
404 base = self.base(t)
401
405
402 offset = 0
406 offset = 0
403 if t >= 0:
407 if t >= 0:
404 offset = self.end(t)
408 offset = self.end(t)
405
409
406 e = (offset, len(data), base, link, p1, p2, node)
410 e = (offset, len(data), base, link, p1, p2, node)
407
411
408 self.index.append(e)
412 self.index.append(e)
409 self.nodemap[node] = n
413 self.nodemap[node] = n
410 entry = struct.pack(indexformat, *e)
414 entry = struct.pack(indexformat, *e)
411
415
412 transaction.add(self.datafile, e[0])
416 transaction.add(self.datafile, e[0])
413 self.opener(self.datafile, "a").write(data)
417 self.opener(self.datafile, "a").write(data)
414 transaction.add(self.indexfile, n * len(entry))
418 transaction.add(self.indexfile, n * len(entry))
415 self.opener(self.indexfile, "a").write(entry)
419 self.opener(self.indexfile, "a").write(entry)
416
420
417 self.cache = (node, n, text)
421 self.cache = (node, n, text)
418 return node
422 return node
419
423
420 def ancestor(self, a, b):
424 def ancestor(self, a, b):
421 """calculate the least common ancestor of nodes a and b"""
425 """calculate the least common ancestor of nodes a and b"""
422 # calculate the distance of every node from root
426 # calculate the distance of every node from root
423 dist = {nullid: 0}
427 dist = {nullid: 0}
424 for i in xrange(self.count()):
428 for i in xrange(self.count()):
425 n = self.node(i)
429 n = self.node(i)
426 p1, p2 = self.parents(n)
430 p1, p2 = self.parents(n)
427 dist[n] = max(dist[p1], dist[p2]) + 1
431 dist[n] = max(dist[p1], dist[p2]) + 1
428
432
429 # traverse ancestors in order of decreasing distance from root
433 # traverse ancestors in order of decreasing distance from root
430 def ancestors(node):
434 def ancestors(node):
431 # we store negative distances because heap returns smallest member
435 # we store negative distances because heap returns smallest member
432 h = [(-dist[node], node)]
436 h = [(-dist[node], node)]
433 seen = {}
437 seen = {}
434 earliest = self.count()
438 earliest = self.count()
435 while h:
439 while h:
436 d, n = heapq.heappop(h)
440 d, n = heapq.heappop(h)
437 if n not in seen:
441 if n not in seen:
438 seen[n] = 1
442 seen[n] = 1
439 r = self.rev(n)
443 r = self.rev(n)
440 yield (-d, r, n)
444 yield (-d, r, n)
441 for p in self.parents(n):
445 for p in self.parents(n):
442 heapq.heappush(h, (-dist[p], p))
446 heapq.heappush(h, (-dist[p], p))
443
447
444 x = ancestors(a)
448 x = ancestors(a)
445 y = ancestors(b)
449 y = ancestors(b)
446 lx = x.next()
450 lx = x.next()
447 ly = y.next()
451 ly = y.next()
448
452
449 # increment each ancestor list until it is closer to root than
453 # increment each ancestor list until it is closer to root than
450 # the other, or they match
454 # the other, or they match
451 while 1:
455 while 1:
452 if lx == ly:
456 if lx == ly:
453 return lx[2]
457 return lx[2]
454 elif lx < ly:
458 elif lx < ly:
455 ly = y.next()
459 ly = y.next()
456 elif lx > ly:
460 elif lx > ly:
457 lx = x.next()
461 lx = x.next()
458
462
459 def group(self, linkmap):
463 def group(self, linkmap):
460 """calculate a delta group
464 """calculate a delta group
461
465
462 Given a list of changeset revs, return a set of deltas and
466 Given a list of changeset revs, return a set of deltas and
463 metadata corresponding to nodes. the first delta is
467 metadata corresponding to nodes. the first delta is
464 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
468 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
465 have this parent as it has all history before these
469 have this parent as it has all history before these
466 changesets. parent is parent[0]
470 changesets. parent is parent[0]
467 """
471 """
468 revs = []
472 revs = []
469 needed = {}
473 needed = {}
470
474
471 # find file nodes/revs that match changeset revs
475 # find file nodes/revs that match changeset revs
472 for i in xrange(0, self.count()):
476 for i in xrange(0, self.count()):
473 if self.index[i][3] in linkmap:
477 if self.index[i][3] in linkmap:
474 revs.append(i)
478 revs.append(i)
475 needed[i] = 1
479 needed[i] = 1
476
480
477 # if we don't have any revisions touched by these changesets, bail
481 # if we don't have any revisions touched by these changesets, bail
478 if not revs:
482 if not revs:
479 yield struct.pack(">l", 0)
483 yield struct.pack(">l", 0)
480 return
484 return
481
485
482 # add the parent of the first rev
486 # add the parent of the first rev
483 p = self.parents(self.node(revs[0]))[0]
487 p = self.parents(self.node(revs[0]))[0]
484 revs.insert(0, self.rev(p))
488 revs.insert(0, self.rev(p))
485
489
486 # for each delta that isn't contiguous in the log, we need to
490 # for each delta that isn't contiguous in the log, we need to
487 # reconstruct the base, reconstruct the result, and then
491 # reconstruct the base, reconstruct the result, and then
488 # calculate the delta. We also need to do this where we've
492 # calculate the delta. We also need to do this where we've
489 # stored a full version and not a delta
493 # stored a full version and not a delta
490 for i in xrange(0, len(revs) - 1):
494 for i in xrange(0, len(revs) - 1):
491 a, b = revs[i], revs[i + 1]
495 a, b = revs[i], revs[i + 1]
492 if a + 1 != b or self.base(b) == b:
496 if a + 1 != b or self.base(b) == b:
493 for j in xrange(self.base(a), a + 1):
497 for j in xrange(self.base(a), a + 1):
494 needed[j] = 1
498 needed[j] = 1
495 for j in xrange(self.base(b), b + 1):
499 for j in xrange(self.base(b), b + 1):
496 needed[j] = 1
500 needed[j] = 1
497
501
498 # calculate spans to retrieve from datafile
502 # calculate spans to retrieve from datafile
499 needed = needed.keys()
503 needed = needed.keys()
500 needed.sort()
504 needed.sort()
501 spans = []
505 spans = []
502 oo = -1
506 oo = -1
503 ol = 0
507 ol = 0
504 for n in needed:
508 for n in needed:
505 if n < 0: continue
509 if n < 0: continue
506 o = self.start(n)
510 o = self.start(n)
507 l = self.length(n)
511 l = self.length(n)
508 if oo + ol == o: # can we merge with the previous?
512 if oo + ol == o: # can we merge with the previous?
509 nl = spans[-1][2]
513 nl = spans[-1][2]
510 nl.append((n, l))
514 nl.append((n, l))
511 ol += l
515 ol += l
512 spans[-1] = (oo, ol, nl)
516 spans[-1] = (oo, ol, nl)
513 else:
517 else:
514 oo = o
518 oo = o
515 ol = l
519 ol = l
516 spans.append((oo, ol, [(n, l)]))
520 spans.append((oo, ol, [(n, l)]))
517
521
518 # read spans in, divide up chunks
522 # read spans in, divide up chunks
519 chunks = {}
523 chunks = {}
520 for span in spans:
524 for span in spans:
521 # we reopen the file for each span to make http happy for now
525 # we reopen the file for each span to make http happy for now
522 f = self.opener(self.datafile)
526 f = self.opener(self.datafile)
523 f.seek(span[0])
527 f.seek(span[0])
524 data = f.read(span[1])
528 data = f.read(span[1])
525
529
526 # divide up the span
530 # divide up the span
527 pos = 0
531 pos = 0
528 for r, l in span[2]:
532 for r, l in span[2]:
529 chunks[r] = decompress(data[pos: pos + l])
533 chunks[r] = decompress(data[pos: pos + l])
530 pos += l
534 pos += l
531
535
532 # helper to reconstruct intermediate versions
536 # helper to reconstruct intermediate versions
533 def construct(text, base, rev):
537 def construct(text, base, rev):
534 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
538 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
535 return mdiff.patches(text, bins)
539 return mdiff.patches(text, bins)
536
540
537 # build deltas
541 # build deltas
538 deltas = []
542 deltas = []
539 for d in xrange(0, len(revs) - 1):
543 for d in xrange(0, len(revs) - 1):
540 a, b = revs[d], revs[d + 1]
544 a, b = revs[d], revs[d + 1]
541 n = self.node(b)
545 n = self.node(b)
542
546
543 # do we need to construct a new delta?
547 # do we need to construct a new delta?
544 if a + 1 != b or self.base(b) == b:
548 if a + 1 != b or self.base(b) == b:
545 if a >= 0:
549 if a >= 0:
546 base = self.base(a)
550 base = self.base(a)
547 ta = chunks[self.base(a)]
551 ta = chunks[self.base(a)]
548 ta = construct(ta, base, a)
552 ta = construct(ta, base, a)
549 else:
553 else:
550 ta = ""
554 ta = ""
551
555
552 base = self.base(b)
556 base = self.base(b)
553 if a > base:
557 if a > base:
554 base = a
558 base = a
555 tb = ta
559 tb = ta
556 else:
560 else:
557 tb = chunks[self.base(b)]
561 tb = chunks[self.base(b)]
558 tb = construct(tb, base, b)
562 tb = construct(tb, base, b)
559 d = self.diff(ta, tb)
563 d = self.diff(ta, tb)
560 else:
564 else:
561 d = chunks[b]
565 d = chunks[b]
562
566
563 p = self.parents(n)
567 p = self.parents(n)
564 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
568 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
565 l = struct.pack(">l", len(meta) + len(d) + 4)
569 l = struct.pack(">l", len(meta) + len(d) + 4)
566 yield l
570 yield l
567 yield meta
571 yield meta
568 yield d
572 yield d
569
573
570 yield struct.pack(">l", 0)
574 yield struct.pack(">l", 0)
571
575
572 def addgroup(self, revs, linkmapper, transaction, unique=0):
576 def addgroup(self, revs, linkmapper, transaction, unique=0):
573 """
577 """
574 add a delta group
578 add a delta group
575
579
576 given a set of deltas, add them to the revision log. the
580 given a set of deltas, add them to the revision log. the
577 first delta is against its parent, which should be in our
581 first delta is against its parent, which should be in our
578 log, the rest are against the previous delta.
582 log, the rest are against the previous delta.
579 """
583 """
580
584
581 #track the base of the current delta log
585 #track the base of the current delta log
582 r = self.count()
586 r = self.count()
583 t = r - 1
587 t = r - 1
584 node = nullid
588 node = nullid
585
589
586 base = prev = -1
590 base = prev = -1
587 start = end = measure = 0
591 start = end = measure = 0
588 if r:
592 if r:
589 start = self.start(self.base(t))
593 start = self.start(self.base(t))
590 end = self.end(t)
594 end = self.end(t)
591 measure = self.length(self.base(t))
595 measure = self.length(self.base(t))
592 base = self.base(t)
596 base = self.base(t)
593 prev = self.tip()
597 prev = self.tip()
594
598
595 transaction.add(self.datafile, end)
599 transaction.add(self.datafile, end)
596 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
600 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
597 dfh = self.opener(self.datafile, "a")
601 dfh = self.opener(self.datafile, "a")
598 ifh = self.opener(self.indexfile, "a")
602 ifh = self.opener(self.indexfile, "a")
599
603
600 # loop through our set of deltas
604 # loop through our set of deltas
601 chain = None
605 chain = None
602 for chunk in revs:
606 for chunk in revs:
603 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
607 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
604 link = linkmapper(cs)
608 link = linkmapper(cs)
605 if node in self.nodemap:
609 if node in self.nodemap:
606 # this can happen if two branches make the same change
610 # this can happen if two branches make the same change
607 # if unique:
611 # if unique:
608 # raise RevlogError("already have %s" % hex(node[:4]))
612 # raise RevlogError("already have %s" % hex(node[:4]))
609 chain = node
613 chain = node
610 continue
614 continue
611 delta = chunk[80:]
615 delta = chunk[80:]
612
616
613 if not chain:
617 if not chain:
614 # retrieve the parent revision of the delta chain
618 # retrieve the parent revision of the delta chain
615 chain = p1
619 chain = p1
616 if not chain in self.nodemap:
620 if not chain in self.nodemap:
617 raise RevlogError("unknown base %s" % short(chain[:4]))
621 raise RevlogError("unknown base %s" % short(chain[:4]))
618
622
619 # full versions are inserted when the needed deltas become
623 # full versions are inserted when the needed deltas become
620 # comparable to the uncompressed text or when the previous
624 # comparable to the uncompressed text or when the previous
621 # version is not the one we have a delta against. We use
625 # version is not the one we have a delta against. We use
622 # the size of the previous full rev as a proxy for the
626 # the size of the previous full rev as a proxy for the
623 # current size.
627 # current size.
624
628
625 if chain == prev:
629 if chain == prev:
626 cdelta = compress(delta)
630 cdelta = compress(delta)
627
631
628 if chain != prev or (end - start + len(cdelta)) > measure * 2:
632 if chain != prev or (end - start + len(cdelta)) > measure * 2:
629 # flush our writes here so we can read it in revision
633 # flush our writes here so we can read it in revision
630 dfh.flush()
634 dfh.flush()
631 ifh.flush()
635 ifh.flush()
632 text = self.revision(chain)
636 text = self.revision(chain)
633 text = self.patches(text, [delta])
637 text = self.patches(text, [delta])
634 chk = self.addrevision(text, transaction, link, p1, p2)
638 chk = self.addrevision(text, transaction, link, p1, p2)
635 if chk != node:
639 if chk != node:
636 raise RevlogError("consistency error adding group")
640 raise RevlogError("consistency error adding group")
637 measure = len(text)
641 measure = len(text)
638 else:
642 else:
639 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
643 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
640 self.index.append(e)
644 self.index.append(e)
641 self.nodemap[node] = r
645 self.nodemap[node] = r
642 dfh.write(cdelta)
646 dfh.write(cdelta)
643 ifh.write(struct.pack(indexformat, *e))
647 ifh.write(struct.pack(indexformat, *e))
644
648
645 t, r, chain, prev = r, r + 1, node, node
649 t, r, chain, prev = r, r + 1, node, node
646 start = self.start(self.base(t))
650 start = self.start(self.base(t))
647 end = self.end(t)
651 end = self.end(t)
648
652
649 dfh.close()
653 dfh.close()
650 ifh.close()
654 ifh.close()
651 return node
655 return node
General Comments 0
You need to be logged in to leave comments. Login now