##// END OF EJS Templates
lazyindex fix, make load handle negative indexes properly.
Eric Hopper -
r1403:bc3e66ed default
parent child Browse files
Show More
@@ -1,675 +1,677 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import gettext as _
14 from i18n import gettext as _
15 from demandload import demandload
15 from demandload import demandload
16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
17
17
18 def hash(text, p1, p2):
18 def hash(text, p1, p2):
19 """generate a hash from the given text and its parent hashes
19 """generate a hash from the given text and its parent hashes
20
20
21 This hash combines both the current file contents and its history
21 This hash combines both the current file contents and its history
22 in a manner that makes it easy to distinguish nodes with the same
22 in a manner that makes it easy to distinguish nodes with the same
23 content in the revision graph.
23 content in the revision graph.
24 """
24 """
25 l = [p1, p2]
25 l = [p1, p2]
26 l.sort()
26 l.sort()
27 s = sha.new(l[0])
27 s = sha.new(l[0])
28 s.update(l[1])
28 s.update(l[1])
29 s.update(text)
29 s.update(text)
30 return s.digest()
30 return s.digest()
31
31
32 def compress(text):
32 def compress(text):
33 """ generate a possibly-compressed representation of text """
33 """ generate a possibly-compressed representation of text """
34 if not text: return text
34 if not text: return text
35 if len(text) < 44:
35 if len(text) < 44:
36 if text[0] == '\0': return text
36 if text[0] == '\0': return text
37 return 'u' + text
37 return 'u' + text
38 bin = zlib.compress(text)
38 bin = zlib.compress(text)
39 if len(bin) > len(text):
39 if len(bin) > len(text):
40 if text[0] == '\0': return text
40 if text[0] == '\0': return text
41 return 'u' + text
41 return 'u' + text
42 return bin
42 return bin
43
43
44 def decompress(bin):
44 def decompress(bin):
45 """ decompress the given input """
45 """ decompress the given input """
46 if not bin: return bin
46 if not bin: return bin
47 t = bin[0]
47 t = bin[0]
48 if t == '\0': return bin
48 if t == '\0': return bin
49 if t == 'x': return zlib.decompress(bin)
49 if t == 'x': return zlib.decompress(bin)
50 if t == 'u': return bin[1:]
50 if t == 'u': return bin[1:]
51 raise RevlogError(_("unknown compression type %s") % t)
51 raise RevlogError(_("unknown compression type %s") % t)
52
52
53 indexformat = ">4l20s20s20s"
53 indexformat = ">4l20s20s20s"
54
54
55 class lazyparser:
55 class lazyparser:
56 """
56 """
57 this class avoids the need to parse the entirety of large indices
57 this class avoids the need to parse the entirety of large indices
58
58
59 By default we parse and load 1000 entries at a time.
59 By default we parse and load 1000 entries at a time.
60
60
61 If no position is specified, we load the whole index, and replace
61 If no position is specified, we load the whole index, and replace
62 the lazy objects in revlog with the underlying objects for
62 the lazy objects in revlog with the underlying objects for
63 efficiency in cases where we look at most of the nodes.
63 efficiency in cases where we look at most of the nodes.
64 """
64 """
65 def __init__(self, data, revlog):
65 def __init__(self, data, revlog):
66 self.data = data
66 self.data = data
67 self.s = struct.calcsize(indexformat)
67 self.s = struct.calcsize(indexformat)
68 self.l = len(data)/self.s
68 self.l = len(data)/self.s
69 self.index = [None] * self.l
69 self.index = [None] * self.l
70 self.map = {nullid: -1}
70 self.map = {nullid: -1}
71 self.all = 0
71 self.all = 0
72 self.revlog = revlog
72 self.revlog = revlog
73
73
74 def load(self, pos=None):
74 def load(self, pos=None):
75 if self.all: return
75 if self.all: return
76 if pos is not None:
76 if pos is not None:
77 block = pos / 1000
77 block = pos / 1000
78 i = block * 1000
78 i = block * 1000
79 end = min(self.l, i + 1000)
79 end = min(self.l, i + 1000)
80 else:
80 else:
81 self.all = 1
81 self.all = 1
82 i = 0
82 i = 0
83 end = self.l
83 end = self.l
84 self.revlog.index = self.index
84 self.revlog.index = self.index
85 self.revlog.nodemap = self.map
85 self.revlog.nodemap = self.map
86
86
87 while i < end:
87 while i < end:
88 d = self.data[i * self.s: (i + 1) * self.s]
88 d = self.data[i * self.s: (i + 1) * self.s]
89 e = struct.unpack(indexformat, d)
89 e = struct.unpack(indexformat, d)
90 self.index[i] = e
90 self.index[i] = e
91 self.map[e[6]] = i
91 self.map[e[6]] = i
92 i += 1
92 i += 1
93
93
94 class lazyindex:
94 class lazyindex:
95 """a lazy version of the index array"""
95 """a lazy version of the index array"""
96 def __init__(self, parser):
96 def __init__(self, parser):
97 self.p = parser
97 self.p = parser
98 def __len__(self):
98 def __len__(self):
99 return len(self.p.index)
99 return len(self.p.index)
100 def load(self, pos):
100 def load(self, pos):
101 if pos < 0:
102 pos += len(self.p.index)
101 self.p.load(pos)
103 self.p.load(pos)
102 return self.p.index[pos]
104 return self.p.index[pos]
103 def __getitem__(self, pos):
105 def __getitem__(self, pos):
104 return self.p.index[pos] or self.load(pos)
106 return self.p.index[pos] or self.load(pos)
105 def append(self, e):
107 def append(self, e):
106 self.p.index.append(e)
108 self.p.index.append(e)
107
109
108 class lazymap:
110 class lazymap:
109 """a lazy version of the node map"""
111 """a lazy version of the node map"""
110 def __init__(self, parser):
112 def __init__(self, parser):
111 self.p = parser
113 self.p = parser
112 def load(self, key):
114 def load(self, key):
113 if self.p.all: return
115 if self.p.all: return
114 n = self.p.data.find(key)
116 n = self.p.data.find(key)
115 if n < 0:
117 if n < 0:
116 raise KeyError(key)
118 raise KeyError(key)
117 pos = n / self.p.s
119 pos = n / self.p.s
118 self.p.load(pos)
120 self.p.load(pos)
119 def __contains__(self, key):
121 def __contains__(self, key):
120 self.p.load()
122 self.p.load()
121 return key in self.p.map
123 return key in self.p.map
122 def __iter__(self):
124 def __iter__(self):
123 yield nullid
125 yield nullid
124 for i in xrange(self.p.l):
126 for i in xrange(self.p.l):
125 try:
127 try:
126 yield self.p.index[i][6]
128 yield self.p.index[i][6]
127 except:
129 except:
128 self.p.load(i)
130 self.p.load(i)
129 yield self.p.index[i][6]
131 yield self.p.index[i][6]
130 def __getitem__(self, key):
132 def __getitem__(self, key):
131 try:
133 try:
132 return self.p.map[key]
134 return self.p.map[key]
133 except KeyError:
135 except KeyError:
134 try:
136 try:
135 self.load(key)
137 self.load(key)
136 return self.p.map[key]
138 return self.p.map[key]
137 except KeyError:
139 except KeyError:
138 raise KeyError("node " + hex(key))
140 raise KeyError("node " + hex(key))
139 def __setitem__(self, key, val):
141 def __setitem__(self, key, val):
140 self.p.map[key] = val
142 self.p.map[key] = val
141
143
142 class RevlogError(Exception): pass
144 class RevlogError(Exception): pass
143
145
144 class revlog:
146 class revlog:
145 """
147 """
146 the underlying revision storage object
148 the underlying revision storage object
147
149
148 A revlog consists of two parts, an index and the revision data.
150 A revlog consists of two parts, an index and the revision data.
149
151
150 The index is a file with a fixed record size containing
152 The index is a file with a fixed record size containing
151 information on each revision, includings its nodeid (hash), the
153 information on each revision, includings its nodeid (hash), the
152 nodeids of its parents, the position and offset of its data within
154 nodeids of its parents, the position and offset of its data within
153 the data file, and the revision it's based on. Finally, each entry
155 the data file, and the revision it's based on. Finally, each entry
154 contains a linkrev entry that can serve as a pointer to external
156 contains a linkrev entry that can serve as a pointer to external
155 data.
157 data.
156
158
157 The revision data itself is a linear collection of data chunks.
159 The revision data itself is a linear collection of data chunks.
158 Each chunk represents a revision and is usually represented as a
160 Each chunk represents a revision and is usually represented as a
159 delta against the previous chunk. To bound lookup time, runs of
161 delta against the previous chunk. To bound lookup time, runs of
160 deltas are limited to about 2 times the length of the original
162 deltas are limited to about 2 times the length of the original
161 version data. This makes retrieval of a version proportional to
163 version data. This makes retrieval of a version proportional to
162 its size, or O(1) relative to the number of revisions.
164 its size, or O(1) relative to the number of revisions.
163
165
164 Both pieces of the revlog are written to in an append-only
166 Both pieces of the revlog are written to in an append-only
165 fashion, which means we never need to rewrite a file to insert or
167 fashion, which means we never need to rewrite a file to insert or
166 remove data, and can use some simple techniques to avoid the need
168 remove data, and can use some simple techniques to avoid the need
167 for locking while reading.
169 for locking while reading.
168 """
170 """
169 def __init__(self, opener, indexfile, datafile):
171 def __init__(self, opener, indexfile, datafile):
170 """
172 """
171 create a revlog object
173 create a revlog object
172
174
173 opener is a function that abstracts the file opening operation
175 opener is a function that abstracts the file opening operation
174 and can be used to implement COW semantics or the like.
176 and can be used to implement COW semantics or the like.
175 """
177 """
176 self.indexfile = indexfile
178 self.indexfile = indexfile
177 self.datafile = datafile
179 self.datafile = datafile
178 self.opener = opener
180 self.opener = opener
179 self.cache = None
181 self.cache = None
180
182
181 try:
183 try:
182 i = self.opener(self.indexfile).read()
184 i = self.opener(self.indexfile).read()
183 except IOError, inst:
185 except IOError, inst:
184 if inst.errno != errno.ENOENT:
186 if inst.errno != errno.ENOENT:
185 raise
187 raise
186 i = ""
188 i = ""
187
189
188 if len(i) > 10000:
190 if len(i) > 10000:
189 # big index, let's parse it on demand
191 # big index, let's parse it on demand
190 parser = lazyparser(i, self)
192 parser = lazyparser(i, self)
191 self.index = lazyindex(parser)
193 self.index = lazyindex(parser)
192 self.nodemap = lazymap(parser)
194 self.nodemap = lazymap(parser)
193 else:
195 else:
194 s = struct.calcsize(indexformat)
196 s = struct.calcsize(indexformat)
195 l = len(i) / s
197 l = len(i) / s
196 self.index = [None] * l
198 self.index = [None] * l
197 m = [None] * l
199 m = [None] * l
198
200
199 n = 0
201 n = 0
200 for f in xrange(0, len(i), s):
202 for f in xrange(0, len(i), s):
201 # offset, size, base, linkrev, p1, p2, nodeid
203 # offset, size, base, linkrev, p1, p2, nodeid
202 e = struct.unpack(indexformat, i[f:f + s])
204 e = struct.unpack(indexformat, i[f:f + s])
203 m[n] = (e[6], n)
205 m[n] = (e[6], n)
204 self.index[n] = e
206 self.index[n] = e
205 n += 1
207 n += 1
206
208
207 self.nodemap = dict(m)
209 self.nodemap = dict(m)
208 self.nodemap[nullid] = -1
210 self.nodemap[nullid] = -1
209
211
210 def tip(self): return self.node(len(self.index) - 1)
212 def tip(self): return self.node(len(self.index) - 1)
211 def count(self): return len(self.index)
213 def count(self): return len(self.index)
212 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
214 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
213 def rev(self, node):
215 def rev(self, node):
214 try:
216 try:
215 return self.nodemap[node]
217 return self.nodemap[node]
216 except KeyError:
218 except KeyError:
217 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
219 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
218 def linkrev(self, node): return self.index[self.rev(node)][3]
220 def linkrev(self, node): return self.index[self.rev(node)][3]
219 def parents(self, node):
221 def parents(self, node):
220 if node == nullid: return (nullid, nullid)
222 if node == nullid: return (nullid, nullid)
221 return self.index[self.rev(node)][4:6]
223 return self.index[self.rev(node)][4:6]
222
224
223 def start(self, rev): return self.index[rev][0]
225 def start(self, rev): return self.index[rev][0]
224 def length(self, rev): return self.index[rev][1]
226 def length(self, rev): return self.index[rev][1]
225 def end(self, rev): return self.start(rev) + self.length(rev)
227 def end(self, rev): return self.start(rev) + self.length(rev)
226 def base(self, rev): return self.index[rev][2]
228 def base(self, rev): return self.index[rev][2]
227
229
228 def reachable(self, rev, stop=None):
230 def reachable(self, rev, stop=None):
229 reachable = {}
231 reachable = {}
230 visit = [rev]
232 visit = [rev]
231 reachable[rev] = 1
233 reachable[rev] = 1
232 if stop:
234 if stop:
233 stopn = self.rev(stop)
235 stopn = self.rev(stop)
234 else:
236 else:
235 stopn = 0
237 stopn = 0
236 while visit:
238 while visit:
237 n = visit.pop(0)
239 n = visit.pop(0)
238 if n == stop:
240 if n == stop:
239 continue
241 continue
240 if n == nullid:
242 if n == nullid:
241 continue
243 continue
242 for p in self.parents(n):
244 for p in self.parents(n):
243 if self.rev(p) < stopn:
245 if self.rev(p) < stopn:
244 continue
246 continue
245 if p not in reachable:
247 if p not in reachable:
246 reachable[p] = 1
248 reachable[p] = 1
247 visit.append(p)
249 visit.append(p)
248 return reachable
250 return reachable
249
251
250 def heads(self, stop=None):
252 def heads(self, stop=None):
251 """return the list of all nodes that have no children"""
253 """return the list of all nodes that have no children"""
252 p = {}
254 p = {}
253 h = []
255 h = []
254 stoprev = 0
256 stoprev = 0
255 if stop and stop in self.nodemap:
257 if stop and stop in self.nodemap:
256 stoprev = self.rev(stop)
258 stoprev = self.rev(stop)
257
259
258 for r in range(self.count() - 1, -1, -1):
260 for r in range(self.count() - 1, -1, -1):
259 n = self.node(r)
261 n = self.node(r)
260 if n not in p:
262 if n not in p:
261 h.append(n)
263 h.append(n)
262 if n == stop:
264 if n == stop:
263 break
265 break
264 if r < stoprev:
266 if r < stoprev:
265 break
267 break
266 for pn in self.parents(n):
268 for pn in self.parents(n):
267 p[pn] = 1
269 p[pn] = 1
268 return h
270 return h
269
271
270 def children(self, node):
272 def children(self, node):
271 """find the children of a given node"""
273 """find the children of a given node"""
272 c = []
274 c = []
273 p = self.rev(node)
275 p = self.rev(node)
274 for r in range(p + 1, self.count()):
276 for r in range(p + 1, self.count()):
275 n = self.node(r)
277 n = self.node(r)
276 for pn in self.parents(n):
278 for pn in self.parents(n):
277 if pn == node:
279 if pn == node:
278 c.append(n)
280 c.append(n)
279 continue
281 continue
280 elif pn == nullid:
282 elif pn == nullid:
281 continue
283 continue
282 return c
284 return c
283
285
284 def lookup(self, id):
286 def lookup(self, id):
285 """locate a node based on revision number or subset of hex nodeid"""
287 """locate a node based on revision number or subset of hex nodeid"""
286 try:
288 try:
287 rev = int(id)
289 rev = int(id)
288 if str(rev) != id: raise ValueError
290 if str(rev) != id: raise ValueError
289 if rev < 0: rev = self.count() + rev
291 if rev < 0: rev = self.count() + rev
290 if rev < 0 or rev >= self.count(): raise ValueError
292 if rev < 0 or rev >= self.count(): raise ValueError
291 return self.node(rev)
293 return self.node(rev)
292 except (ValueError, OverflowError):
294 except (ValueError, OverflowError):
293 c = []
295 c = []
294 for n in self.nodemap:
296 for n in self.nodemap:
295 if hex(n).startswith(id):
297 if hex(n).startswith(id):
296 c.append(n)
298 c.append(n)
297 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
299 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
298 if len(c) < 1: raise RevlogError(_("No match found"))
300 if len(c) < 1: raise RevlogError(_("No match found"))
299 return c[0]
301 return c[0]
300
302
301 return None
303 return None
302
304
303 def diff(self, a, b):
305 def diff(self, a, b):
304 """return a delta between two revisions"""
306 """return a delta between two revisions"""
305 return mdiff.textdiff(a, b)
307 return mdiff.textdiff(a, b)
306
308
307 def patches(self, t, pl):
309 def patches(self, t, pl):
308 """apply a list of patches to a string"""
310 """apply a list of patches to a string"""
309 return mdiff.patches(t, pl)
311 return mdiff.patches(t, pl)
310
312
311 def delta(self, node):
313 def delta(self, node):
312 """return or calculate a delta between a node and its predecessor"""
314 """return or calculate a delta between a node and its predecessor"""
313 r = self.rev(node)
315 r = self.rev(node)
314 b = self.base(r)
316 b = self.base(r)
315 if r == b:
317 if r == b:
316 return self.diff(self.revision(self.node(r - 1)),
318 return self.diff(self.revision(self.node(r - 1)),
317 self.revision(node))
319 self.revision(node))
318 else:
320 else:
319 f = self.opener(self.datafile)
321 f = self.opener(self.datafile)
320 f.seek(self.start(r))
322 f.seek(self.start(r))
321 data = f.read(self.length(r))
323 data = f.read(self.length(r))
322 return decompress(data)
324 return decompress(data)
323
325
324 def revision(self, node):
326 def revision(self, node):
325 """return an uncompressed revision of a given"""
327 """return an uncompressed revision of a given"""
326 if node == nullid: return ""
328 if node == nullid: return ""
327 if self.cache and self.cache[0] == node: return self.cache[2]
329 if self.cache and self.cache[0] == node: return self.cache[2]
328
330
329 # look up what we need to read
331 # look up what we need to read
330 text = None
332 text = None
331 rev = self.rev(node)
333 rev = self.rev(node)
332 start, length, base, link, p1, p2, node = self.index[rev]
334 start, length, base, link, p1, p2, node = self.index[rev]
333 end = start + length
335 end = start + length
334 if base != rev: start = self.start(base)
336 if base != rev: start = self.start(base)
335
337
336 # do we have useful data cached?
338 # do we have useful data cached?
337 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
339 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
338 base = self.cache[1]
340 base = self.cache[1]
339 start = self.start(base + 1)
341 start = self.start(base + 1)
340 text = self.cache[2]
342 text = self.cache[2]
341 last = 0
343 last = 0
342
344
343 f = self.opener(self.datafile)
345 f = self.opener(self.datafile)
344 f.seek(start)
346 f.seek(start)
345 data = f.read(end - start)
347 data = f.read(end - start)
346
348
347 if text is None:
349 if text is None:
348 last = self.length(base)
350 last = self.length(base)
349 text = decompress(data[:last])
351 text = decompress(data[:last])
350
352
351 bins = []
353 bins = []
352 for r in xrange(base + 1, rev + 1):
354 for r in xrange(base + 1, rev + 1):
353 s = self.length(r)
355 s = self.length(r)
354 bins.append(decompress(data[last:last + s]))
356 bins.append(decompress(data[last:last + s]))
355 last = last + s
357 last = last + s
356
358
357 text = mdiff.patches(text, bins)
359 text = mdiff.patches(text, bins)
358
360
359 if node != hash(text, p1, p2):
361 if node != hash(text, p1, p2):
360 raise RevlogError(_("integrity check failed on %s:%d")
362 raise RevlogError(_("integrity check failed on %s:%d")
361 % (self.datafile, rev))
363 % (self.datafile, rev))
362
364
363 self.cache = (node, rev, text)
365 self.cache = (node, rev, text)
364 return text
366 return text
365
367
366 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
368 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
367 """add a revision to the log
369 """add a revision to the log
368
370
369 text - the revision data to add
371 text - the revision data to add
370 transaction - the transaction object used for rollback
372 transaction - the transaction object used for rollback
371 link - the linkrev data to add
373 link - the linkrev data to add
372 p1, p2 - the parent nodeids of the revision
374 p1, p2 - the parent nodeids of the revision
373 d - an optional precomputed delta
375 d - an optional precomputed delta
374 """
376 """
375 if text is None: text = ""
377 if text is None: text = ""
376 if p1 is None: p1 = self.tip()
378 if p1 is None: p1 = self.tip()
377 if p2 is None: p2 = nullid
379 if p2 is None: p2 = nullid
378
380
379 node = hash(text, p1, p2)
381 node = hash(text, p1, p2)
380
382
381 if node in self.nodemap:
383 if node in self.nodemap:
382 return node
384 return node
383
385
384 n = self.count()
386 n = self.count()
385 t = n - 1
387 t = n - 1
386
388
387 if n:
389 if n:
388 base = self.base(t)
390 base = self.base(t)
389 start = self.start(base)
391 start = self.start(base)
390 end = self.end(t)
392 end = self.end(t)
391 if not d:
393 if not d:
392 prev = self.revision(self.tip())
394 prev = self.revision(self.tip())
393 d = self.diff(prev, text)
395 d = self.diff(prev, text)
394 data = compress(d)
396 data = compress(d)
395 dist = end - start + len(data)
397 dist = end - start + len(data)
396
398
397 # full versions are inserted when the needed deltas
399 # full versions are inserted when the needed deltas
398 # become comparable to the uncompressed text
400 # become comparable to the uncompressed text
399 if not n or dist > len(text) * 2:
401 if not n or dist > len(text) * 2:
400 data = compress(text)
402 data = compress(text)
401 base = n
403 base = n
402 else:
404 else:
403 base = self.base(t)
405 base = self.base(t)
404
406
405 offset = 0
407 offset = 0
406 if t >= 0:
408 if t >= 0:
407 offset = self.end(t)
409 offset = self.end(t)
408
410
409 e = (offset, len(data), base, link, p1, p2, node)
411 e = (offset, len(data), base, link, p1, p2, node)
410
412
411 self.index.append(e)
413 self.index.append(e)
412 self.nodemap[node] = n
414 self.nodemap[node] = n
413 entry = struct.pack(indexformat, *e)
415 entry = struct.pack(indexformat, *e)
414
416
415 transaction.add(self.datafile, e[0])
417 transaction.add(self.datafile, e[0])
416 self.opener(self.datafile, "a").write(data)
418 self.opener(self.datafile, "a").write(data)
417 transaction.add(self.indexfile, n * len(entry))
419 transaction.add(self.indexfile, n * len(entry))
418 self.opener(self.indexfile, "a").write(entry)
420 self.opener(self.indexfile, "a").write(entry)
419
421
420 self.cache = (node, n, text)
422 self.cache = (node, n, text)
421 return node
423 return node
422
424
423 def ancestor(self, a, b):
425 def ancestor(self, a, b):
424 """calculate the least common ancestor of nodes a and b"""
426 """calculate the least common ancestor of nodes a and b"""
425 # calculate the distance of every node from root
427 # calculate the distance of every node from root
426 dist = {nullid: 0}
428 dist = {nullid: 0}
427 for i in xrange(self.count()):
429 for i in xrange(self.count()):
428 n = self.node(i)
430 n = self.node(i)
429 p1, p2 = self.parents(n)
431 p1, p2 = self.parents(n)
430 dist[n] = max(dist[p1], dist[p2]) + 1
432 dist[n] = max(dist[p1], dist[p2]) + 1
431
433
432 # traverse ancestors in order of decreasing distance from root
434 # traverse ancestors in order of decreasing distance from root
433 def ancestors(node):
435 def ancestors(node):
434 # we store negative distances because heap returns smallest member
436 # we store negative distances because heap returns smallest member
435 h = [(-dist[node], node)]
437 h = [(-dist[node], node)]
436 seen = {}
438 seen = {}
437 earliest = self.count()
439 earliest = self.count()
438 while h:
440 while h:
439 d, n = heapq.heappop(h)
441 d, n = heapq.heappop(h)
440 if n not in seen:
442 if n not in seen:
441 seen[n] = 1
443 seen[n] = 1
442 r = self.rev(n)
444 r = self.rev(n)
443 yield (-d, n)
445 yield (-d, n)
444 for p in self.parents(n):
446 for p in self.parents(n):
445 heapq.heappush(h, (-dist[p], p))
447 heapq.heappush(h, (-dist[p], p))
446
448
447 def generations(node):
449 def generations(node):
448 sg, s = None, {}
450 sg, s = None, {}
449 for g,n in ancestors(node):
451 for g,n in ancestors(node):
450 if g != sg:
452 if g != sg:
451 if sg:
453 if sg:
452 yield sg, s
454 yield sg, s
453 sg, s = g, {n:1}
455 sg, s = g, {n:1}
454 else:
456 else:
455 s[n] = 1
457 s[n] = 1
456 yield sg, s
458 yield sg, s
457
459
458 x = generations(a)
460 x = generations(a)
459 y = generations(b)
461 y = generations(b)
460 gx = x.next()
462 gx = x.next()
461 gy = y.next()
463 gy = y.next()
462
464
463 # increment each ancestor list until it is closer to root than
465 # increment each ancestor list until it is closer to root than
464 # the other, or they match
466 # the other, or they match
465 while 1:
467 while 1:
466 #print "ancestor gen %s %s" % (gx[0], gy[0])
468 #print "ancestor gen %s %s" % (gx[0], gy[0])
467 if gx[0] == gy[0]:
469 if gx[0] == gy[0]:
468 # find the intersection
470 # find the intersection
469 i = [ n for n in gx[1] if n in gy[1] ]
471 i = [ n for n in gx[1] if n in gy[1] ]
470 if i:
472 if i:
471 return i[0]
473 return i[0]
472 else:
474 else:
473 #print "next"
475 #print "next"
474 gy = y.next()
476 gy = y.next()
475 gx = x.next()
477 gx = x.next()
476 elif gx[0] < gy[0]:
478 elif gx[0] < gy[0]:
477 #print "next y"
479 #print "next y"
478 gy = y.next()
480 gy = y.next()
479 else:
481 else:
480 #print "next x"
482 #print "next x"
481 gx = x.next()
483 gx = x.next()
482
484
483 def group(self, linkmap):
485 def group(self, linkmap):
484 """calculate a delta group
486 """calculate a delta group
485
487
486 Given a list of changeset revs, return a set of deltas and
488 Given a list of changeset revs, return a set of deltas and
487 metadata corresponding to nodes. the first delta is
489 metadata corresponding to nodes. the first delta is
488 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
490 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
489 have this parent as it has all history before these
491 have this parent as it has all history before these
490 changesets. parent is parent[0]
492 changesets. parent is parent[0]
491 """
493 """
492 revs = []
494 revs = []
493 needed = {}
495 needed = {}
494
496
495 # find file nodes/revs that match changeset revs
497 # find file nodes/revs that match changeset revs
496 for i in xrange(0, self.count()):
498 for i in xrange(0, self.count()):
497 if self.index[i][3] in linkmap:
499 if self.index[i][3] in linkmap:
498 revs.append(i)
500 revs.append(i)
499 needed[i] = 1
501 needed[i] = 1
500
502
501 # if we don't have any revisions touched by these changesets, bail
503 # if we don't have any revisions touched by these changesets, bail
502 if not revs:
504 if not revs:
503 yield struct.pack(">l", 0)
505 yield struct.pack(">l", 0)
504 return
506 return
505
507
506 # add the parent of the first rev
508 # add the parent of the first rev
507 p = self.parents(self.node(revs[0]))[0]
509 p = self.parents(self.node(revs[0]))[0]
508 revs.insert(0, self.rev(p))
510 revs.insert(0, self.rev(p))
509
511
510 # for each delta that isn't contiguous in the log, we need to
512 # for each delta that isn't contiguous in the log, we need to
511 # reconstruct the base, reconstruct the result, and then
513 # reconstruct the base, reconstruct the result, and then
512 # calculate the delta. We also need to do this where we've
514 # calculate the delta. We also need to do this where we've
513 # stored a full version and not a delta
515 # stored a full version and not a delta
514 for i in xrange(0, len(revs) - 1):
516 for i in xrange(0, len(revs) - 1):
515 a, b = revs[i], revs[i + 1]
517 a, b = revs[i], revs[i + 1]
516 if a + 1 != b or self.base(b) == b:
518 if a + 1 != b or self.base(b) == b:
517 for j in xrange(self.base(a), a + 1):
519 for j in xrange(self.base(a), a + 1):
518 needed[j] = 1
520 needed[j] = 1
519 for j in xrange(self.base(b), b + 1):
521 for j in xrange(self.base(b), b + 1):
520 needed[j] = 1
522 needed[j] = 1
521
523
522 # calculate spans to retrieve from datafile
524 # calculate spans to retrieve from datafile
523 needed = needed.keys()
525 needed = needed.keys()
524 needed.sort()
526 needed.sort()
525 spans = []
527 spans = []
526 oo = -1
528 oo = -1
527 ol = 0
529 ol = 0
528 for n in needed:
530 for n in needed:
529 if n < 0: continue
531 if n < 0: continue
530 o = self.start(n)
532 o = self.start(n)
531 l = self.length(n)
533 l = self.length(n)
532 if oo + ol == o: # can we merge with the previous?
534 if oo + ol == o: # can we merge with the previous?
533 nl = spans[-1][2]
535 nl = spans[-1][2]
534 nl.append((n, l))
536 nl.append((n, l))
535 ol += l
537 ol += l
536 spans[-1] = (oo, ol, nl)
538 spans[-1] = (oo, ol, nl)
537 else:
539 else:
538 oo = o
540 oo = o
539 ol = l
541 ol = l
540 spans.append((oo, ol, [(n, l)]))
542 spans.append((oo, ol, [(n, l)]))
541
543
542 # read spans in, divide up chunks
544 # read spans in, divide up chunks
543 chunks = {}
545 chunks = {}
544 for span in spans:
546 for span in spans:
545 # we reopen the file for each span to make http happy for now
547 # we reopen the file for each span to make http happy for now
546 f = self.opener(self.datafile)
548 f = self.opener(self.datafile)
547 f.seek(span[0])
549 f.seek(span[0])
548 data = f.read(span[1])
550 data = f.read(span[1])
549
551
550 # divide up the span
552 # divide up the span
551 pos = 0
553 pos = 0
552 for r, l in span[2]:
554 for r, l in span[2]:
553 chunks[r] = decompress(data[pos: pos + l])
555 chunks[r] = decompress(data[pos: pos + l])
554 pos += l
556 pos += l
555
557
556 # helper to reconstruct intermediate versions
558 # helper to reconstruct intermediate versions
557 def construct(text, base, rev):
559 def construct(text, base, rev):
558 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
560 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
559 return mdiff.patches(text, bins)
561 return mdiff.patches(text, bins)
560
562
561 # build deltas
563 # build deltas
562 deltas = []
564 deltas = []
563 for d in xrange(0, len(revs) - 1):
565 for d in xrange(0, len(revs) - 1):
564 a, b = revs[d], revs[d + 1]
566 a, b = revs[d], revs[d + 1]
565 n = self.node(b)
567 n = self.node(b)
566
568
567 # do we need to construct a new delta?
569 # do we need to construct a new delta?
568 if a + 1 != b or self.base(b) == b:
570 if a + 1 != b or self.base(b) == b:
569 if a >= 0:
571 if a >= 0:
570 base = self.base(a)
572 base = self.base(a)
571 ta = chunks[self.base(a)]
573 ta = chunks[self.base(a)]
572 ta = construct(ta, base, a)
574 ta = construct(ta, base, a)
573 else:
575 else:
574 ta = ""
576 ta = ""
575
577
576 base = self.base(b)
578 base = self.base(b)
577 if a > base:
579 if a > base:
578 base = a
580 base = a
579 tb = ta
581 tb = ta
580 else:
582 else:
581 tb = chunks[self.base(b)]
583 tb = chunks[self.base(b)]
582 tb = construct(tb, base, b)
584 tb = construct(tb, base, b)
583 d = self.diff(ta, tb)
585 d = self.diff(ta, tb)
584 else:
586 else:
585 d = chunks[b]
587 d = chunks[b]
586
588
587 p = self.parents(n)
589 p = self.parents(n)
588 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
590 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
589 l = struct.pack(">l", len(meta) + len(d) + 4)
591 l = struct.pack(">l", len(meta) + len(d) + 4)
590 yield l
592 yield l
591 yield meta
593 yield meta
592 yield d
594 yield d
593
595
594 yield struct.pack(">l", 0)
596 yield struct.pack(">l", 0)
595
597
596 def addgroup(self, revs, linkmapper, transaction, unique=0):
598 def addgroup(self, revs, linkmapper, transaction, unique=0):
597 """
599 """
598 add a delta group
600 add a delta group
599
601
600 given a set of deltas, add them to the revision log. the
602 given a set of deltas, add them to the revision log. the
601 first delta is against its parent, which should be in our
603 first delta is against its parent, which should be in our
602 log, the rest are against the previous delta.
604 log, the rest are against the previous delta.
603 """
605 """
604
606
605 #track the base of the current delta log
607 #track the base of the current delta log
606 r = self.count()
608 r = self.count()
607 t = r - 1
609 t = r - 1
608 node = nullid
610 node = nullid
609
611
610 base = prev = -1
612 base = prev = -1
611 start = end = measure = 0
613 start = end = measure = 0
612 if r:
614 if r:
613 start = self.start(self.base(t))
615 start = self.start(self.base(t))
614 end = self.end(t)
616 end = self.end(t)
615 measure = self.length(self.base(t))
617 measure = self.length(self.base(t))
616 base = self.base(t)
618 base = self.base(t)
617 prev = self.tip()
619 prev = self.tip()
618
620
619 transaction.add(self.datafile, end)
621 transaction.add(self.datafile, end)
620 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
622 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
621 dfh = self.opener(self.datafile, "a")
623 dfh = self.opener(self.datafile, "a")
622 ifh = self.opener(self.indexfile, "a")
624 ifh = self.opener(self.indexfile, "a")
623
625
624 # loop through our set of deltas
626 # loop through our set of deltas
625 chain = None
627 chain = None
626 for chunk in revs:
628 for chunk in revs:
627 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
629 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
628 link = linkmapper(cs)
630 link = linkmapper(cs)
629 if node in self.nodemap:
631 if node in self.nodemap:
630 # this can happen if two branches make the same change
632 # this can happen if two branches make the same change
631 # if unique:
633 # if unique:
632 # raise RevlogError(_("already have %s") % hex(node[:4]))
634 # raise RevlogError(_("already have %s") % hex(node[:4]))
633 chain = node
635 chain = node
634 continue
636 continue
635 delta = chunk[80:]
637 delta = chunk[80:]
636
638
637 if not chain:
639 if not chain:
638 # retrieve the parent revision of the delta chain
640 # retrieve the parent revision of the delta chain
639 chain = p1
641 chain = p1
640 if not chain in self.nodemap:
642 if not chain in self.nodemap:
641 raise RevlogError(_("unknown base %s") % short(chain[:4]))
643 raise RevlogError(_("unknown base %s") % short(chain[:4]))
642
644
643 # full versions are inserted when the needed deltas become
645 # full versions are inserted when the needed deltas become
644 # comparable to the uncompressed text or when the previous
646 # comparable to the uncompressed text or when the previous
645 # version is not the one we have a delta against. We use
647 # version is not the one we have a delta against. We use
646 # the size of the previous full rev as a proxy for the
648 # the size of the previous full rev as a proxy for the
647 # current size.
649 # current size.
648
650
649 if chain == prev:
651 if chain == prev:
650 cdelta = compress(delta)
652 cdelta = compress(delta)
651
653
652 if chain != prev or (end - start + len(cdelta)) > measure * 2:
654 if chain != prev or (end - start + len(cdelta)) > measure * 2:
653 # flush our writes here so we can read it in revision
655 # flush our writes here so we can read it in revision
654 dfh.flush()
656 dfh.flush()
655 ifh.flush()
657 ifh.flush()
656 text = self.revision(chain)
658 text = self.revision(chain)
657 text = self.patches(text, [delta])
659 text = self.patches(text, [delta])
658 chk = self.addrevision(text, transaction, link, p1, p2)
660 chk = self.addrevision(text, transaction, link, p1, p2)
659 if chk != node:
661 if chk != node:
660 raise RevlogError(_("consistency error adding group"))
662 raise RevlogError(_("consistency error adding group"))
661 measure = len(text)
663 measure = len(text)
662 else:
664 else:
663 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
665 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
664 self.index.append(e)
666 self.index.append(e)
665 self.nodemap[node] = r
667 self.nodemap[node] = r
666 dfh.write(cdelta)
668 dfh.write(cdelta)
667 ifh.write(struct.pack(indexformat, *e))
669 ifh.write(struct.pack(indexformat, *e))
668
670
669 t, r, chain, prev = r, r + 1, node, node
671 t, r, chain, prev = r, r + 1, node, node
670 start = self.start(self.base(t))
672 start = self.start(self.base(t))
671 end = self.end(t)
673 end = self.end(t)
672
674
673 dfh.close()
675 dfh.close()
674 ifh.close()
676 ifh.close()
675 return node
677 return node
General Comments 0
You need to be logged in to leave comments. Login now