##// END OF EJS Templates
Add revlog.reachable to find a graph of ancestors for a given rev
mason@suse.com -
r1074:55bf5cfd default
parent child Browse files
Show More
@@ -1,553 +1,575 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, binascii, heapq
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:6])
17 17
18 18 def compress(text):
19 19 if not text: return text
20 20 if len(text) < 44:
21 21 if text[0] == '\0': return text
22 22 return 'u' + text
23 23 bin = zlib.compress(text)
24 24 if len(bin) > len(text):
25 25 if text[0] == '\0': return text
26 26 return 'u' + text
27 27 return bin
28 28
29 29 def decompress(bin):
30 30 if not bin: return bin
31 31 t = bin[0]
32 32 if t == '\0': return bin
33 33 if t == 'x': return zlib.decompress(bin)
34 34 if t == 'u': return bin[1:]
35 35 raise RevlogError("unknown compression type %s" % t)
36 36
37 37 def hash(text, p1, p2):
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 nullid = "\0" * 20
46 46 indexformat = ">4l20s20s20s"
47 47
48 48 class lazyparser:
49 49 def __init__(self, data, revlog):
50 50 self.data = data
51 51 self.s = struct.calcsize(indexformat)
52 52 self.l = len(data)/self.s
53 53 self.index = [None] * self.l
54 54 self.map = {nullid: -1}
55 55 self.all = 0
56 56 self.revlog = revlog
57 57
58 58 def load(self, pos=None):
59 59 if self.all: return
60 60 if pos is not None:
61 61 block = pos / 1000
62 62 i = block * 1000
63 63 end = min(self.l, i + 1000)
64 64 else:
65 65 self.all = 1
66 66 i = 0
67 67 end = self.l
68 68 self.revlog.index = self.index
69 69 self.revlog.nodemap = self.map
70 70
71 71 while i < end:
72 72 d = self.data[i * self.s: (i + 1) * self.s]
73 73 e = struct.unpack(indexformat, d)
74 74 self.index[i] = e
75 75 self.map[e[6]] = i
76 76 i += 1
77 77
78 78 class lazyindex:
79 79 def __init__(self, parser):
80 80 self.p = parser
81 81 def __len__(self):
82 82 return len(self.p.index)
83 83 def load(self, pos):
84 84 self.p.load(pos)
85 85 return self.p.index[pos]
86 86 def __getitem__(self, pos):
87 87 return self.p.index[pos] or self.load(pos)
88 88 def append(self, e):
89 89 self.p.index.append(e)
90 90
91 91 class lazymap:
92 92 def __init__(self, parser):
93 93 self.p = parser
94 94 def load(self, key):
95 95 if self.p.all: return
96 96 n = self.p.data.find(key)
97 97 if n < 0: raise KeyError("node " + hex(key))
98 98 pos = n / self.p.s
99 99 self.p.load(pos)
100 100 def __contains__(self, key):
101 101 self.p.load()
102 102 return key in self.p.map
103 103 def __iter__(self):
104 104 yield nullid
105 105 for i in xrange(self.p.l):
106 106 try:
107 107 yield self.p.index[i][6]
108 108 except:
109 109 self.p.load(i)
110 110 yield self.p.index[i][6]
111 111 def __getitem__(self, key):
112 112 try:
113 113 return self.p.map[key]
114 114 except KeyError:
115 115 try:
116 116 self.load(key)
117 117 return self.p.map[key]
118 118 except KeyError:
119 119 raise KeyError("node " + hex(key))
120 120 def __setitem__(self, key, val):
121 121 self.p.map[key] = val
122 122
123 123 class RevlogError(Exception): pass
124 124
125 125 class revlog:
126 126 def __init__(self, opener, indexfile, datafile):
127 127 self.indexfile = indexfile
128 128 self.datafile = datafile
129 129 self.opener = opener
130 130 self.cache = None
131 131
132 132 try:
133 133 i = self.opener(self.indexfile).read()
134 134 except IOError:
135 135 i = ""
136 136
137 137 if len(i) > 10000:
138 138 # big index, let's parse it on demand
139 139 parser = lazyparser(i, self)
140 140 self.index = lazyindex(parser)
141 141 self.nodemap = lazymap(parser)
142 142 else:
143 143 s = struct.calcsize(indexformat)
144 144 l = len(i) / s
145 145 self.index = [None] * l
146 146 m = [None] * l
147 147
148 148 n = 0
149 149 for f in xrange(0, len(i), s):
150 150 # offset, size, base, linkrev, p1, p2, nodeid
151 151 e = struct.unpack(indexformat, i[f:f + s])
152 152 m[n] = (e[6], n)
153 153 self.index[n] = e
154 154 n += 1
155 155
156 156 self.nodemap = dict(m)
157 157 self.nodemap[nullid] = -1
158 158
159 159 def tip(self): return self.node(len(self.index) - 1)
160 160 def count(self): return len(self.index)
161 161 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
162 162 def rev(self, node): return self.nodemap[node]
163 163 def linkrev(self, node): return self.index[self.nodemap[node]][3]
164 164 def parents(self, node):
165 165 if node == nullid: return (nullid, nullid)
166 166 return self.index[self.nodemap[node]][4:6]
167 167
168 168 def start(self, rev): return self.index[rev][0]
169 169 def length(self, rev): return self.index[rev][1]
170 170 def end(self, rev): return self.start(rev) + self.length(rev)
171 171 def base(self, rev): return self.index[rev][2]
172 172
173 def reachable(self, rev, stop=None):
174 reachable = {}
175 visit = [rev]
176 reachable[rev] = 1
177 if stop:
178 stopn = self.rev(stop)
179 else:
180 stopn = 0
181 while visit:
182 n = visit.pop(0)
183 if n == stop:
184 continue
185 if n == nullid:
186 continue
187 for p in self.parents(n):
188 if self.rev(p) < stopn:
189 continue
190 if p not in reachable:
191 reachable[p] = 1
192 visit.append(p)
193 return reachable
194
173 195 def heads(self, stop=None):
174 196 p = {}
175 197 h = []
176 198 stoprev = 0
177 199 if stop and stop in self.nodemap:
178 200 stoprev = self.rev(stop)
179 201
180 202 for r in range(self.count() - 1, -1, -1):
181 203 n = self.node(r)
182 204 if n not in p:
183 205 h.append(n)
184 206 if n == stop:
185 207 break
186 208 if r < stoprev:
187 209 break
188 210 for pn in self.parents(n):
189 211 p[pn] = 1
190 212 return h
191 213
192 214 def children(self, node):
193 215 c = []
194 216 p = self.rev(node)
195 217 for r in range(p + 1, self.count()):
196 218 n = self.node(r)
197 219 for pn in self.parents(n):
198 220 if pn == node:
199 221 c.append(n)
200 222 continue
201 223 elif pn == nullid:
202 224 continue
203 225 return c
204 226
205 227 def lookup(self, id):
206 228 try:
207 229 rev = int(id)
208 230 if str(rev) != id: raise ValueError
209 231 if rev < 0: rev = self.count() + rev
210 232 if rev < 0 or rev >= self.count(): raise ValueError
211 233 return self.node(rev)
212 234 except (ValueError, OverflowError):
213 235 c = []
214 236 for n in self.nodemap:
215 237 if hex(n).startswith(id):
216 238 c.append(n)
217 239 if len(c) > 1: raise KeyError("Ambiguous identifier")
218 240 if len(c) < 1: raise KeyError("No match found")
219 241 return c[0]
220 242
221 243 return None
222 244
223 245 def diff(self, a, b):
224 246 return mdiff.textdiff(a, b)
225 247
226 248 def patches(self, t, pl):
227 249 return mdiff.patches(t, pl)
228 250
229 251 def delta(self, node):
230 252 r = self.rev(node)
231 253 b = self.base(r)
232 254 if r == b:
233 255 return self.diff(self.revision(self.node(r - 1)),
234 256 self.revision(node))
235 257 else:
236 258 f = self.opener(self.datafile)
237 259 f.seek(self.start(r))
238 260 data = f.read(self.length(r))
239 261 return decompress(data)
240 262
241 263 def revision(self, node):
242 264 if node == nullid: return ""
243 265 if self.cache and self.cache[0] == node: return self.cache[2]
244 266
245 267 text = None
246 268 rev = self.rev(node)
247 269 start, length, base, link, p1, p2, node = self.index[rev]
248 270 end = start + length
249 271 if base != rev: start = self.start(base)
250 272
251 273 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
252 274 base = self.cache[1]
253 275 start = self.start(base + 1)
254 276 text = self.cache[2]
255 277 last = 0
256 278
257 279 f = self.opener(self.datafile)
258 280 f.seek(start)
259 281 data = f.read(end - start)
260 282
261 283 if text is None:
262 284 last = self.length(base)
263 285 text = decompress(data[:last])
264 286
265 287 bins = []
266 288 for r in xrange(base + 1, rev + 1):
267 289 s = self.length(r)
268 290 bins.append(decompress(data[last:last + s]))
269 291 last = last + s
270 292
271 293 text = mdiff.patches(text, bins)
272 294
273 295 if node != hash(text, p1, p2):
274 296 raise IOError("integrity check failed on %s:%d"
275 297 % (self.datafile, rev))
276 298
277 299 self.cache = (node, rev, text)
278 300 return text
279 301
280 302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
281 303 if text is None: text = ""
282 304 if p1 is None: p1 = self.tip()
283 305 if p2 is None: p2 = nullid
284 306
285 307 node = hash(text, p1, p2)
286 308
287 309 if node in self.nodemap:
288 310 return node
289 311
290 312 n = self.count()
291 313 t = n - 1
292 314
293 315 if n:
294 316 base = self.base(t)
295 317 start = self.start(base)
296 318 end = self.end(t)
297 319 if not d:
298 320 prev = self.revision(self.tip())
299 321 d = self.diff(prev, text)
300 322 data = compress(d)
301 323 dist = end - start + len(data)
302 324
303 325 # full versions are inserted when the needed deltas
304 326 # become comparable to the uncompressed text
305 327 if not n or dist > len(text) * 2:
306 328 data = compress(text)
307 329 base = n
308 330 else:
309 331 base = self.base(t)
310 332
311 333 offset = 0
312 334 if t >= 0:
313 335 offset = self.end(t)
314 336
315 337 e = (offset, len(data), base, link, p1, p2, node)
316 338
317 339 self.index.append(e)
318 340 self.nodemap[node] = n
319 341 entry = struct.pack(indexformat, *e)
320 342
321 343 transaction.add(self.datafile, e[0])
322 344 self.opener(self.datafile, "a").write(data)
323 345 transaction.add(self.indexfile, n * len(entry))
324 346 self.opener(self.indexfile, "a").write(entry)
325 347
326 348 self.cache = (node, n, text)
327 349 return node
328 350
329 351 def ancestor(self, a, b):
330 352 # calculate the distance of every node from root
331 353 dist = {nullid: 0}
332 354 for i in xrange(self.count()):
333 355 n = self.node(i)
334 356 p1, p2 = self.parents(n)
335 357 dist[n] = max(dist[p1], dist[p2]) + 1
336 358
337 359 # traverse ancestors in order of decreasing distance from root
338 360 def ancestors(node):
339 361 # we store negative distances because heap returns smallest member
340 362 h = [(-dist[node], node)]
341 363 seen = {}
342 364 earliest = self.count()
343 365 while h:
344 366 d, n = heapq.heappop(h)
345 367 if n not in seen:
346 368 seen[n] = 1
347 369 r = self.rev(n)
348 370 yield (-d, r, n)
349 371 for p in self.parents(n):
350 372 heapq.heappush(h, (-dist[p], p))
351 373
352 374 x = ancestors(a)
353 375 y = ancestors(b)
354 376 lx = x.next()
355 377 ly = y.next()
356 378
357 379 # increment each ancestor list until it is closer to root than
358 380 # the other, or they match
359 381 while 1:
360 382 if lx == ly:
361 383 return lx[2]
362 384 elif lx < ly:
363 385 ly = y.next()
364 386 elif lx > ly:
365 387 lx = x.next()
366 388
367 389 def group(self, linkmap):
368 390 # given a list of changeset revs, return a set of deltas and
369 391 # metadata corresponding to nodes. the first delta is
370 392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
371 393 # have this parent as it has all history before these
372 394 # changesets. parent is parent[0]
373 395
374 396 revs = []
375 397 needed = {}
376 398
377 399 # find file nodes/revs that match changeset revs
378 400 for i in xrange(0, self.count()):
379 401 if self.index[i][3] in linkmap:
380 402 revs.append(i)
381 403 needed[i] = 1
382 404
383 405 # if we don't have any revisions touched by these changesets, bail
384 406 if not revs:
385 407 yield struct.pack(">l", 0)
386 408 return
387 409
388 410 # add the parent of the first rev
389 411 p = self.parents(self.node(revs[0]))[0]
390 412 revs.insert(0, self.rev(p))
391 413
392 414 # for each delta that isn't contiguous in the log, we need to
393 415 # reconstruct the base, reconstruct the result, and then
394 416 # calculate the delta. We also need to do this where we've
395 417 # stored a full version and not a delta
396 418 for i in xrange(0, len(revs) - 1):
397 419 a, b = revs[i], revs[i + 1]
398 420 if a + 1 != b or self.base(b) == b:
399 421 for j in xrange(self.base(a), a + 1):
400 422 needed[j] = 1
401 423 for j in xrange(self.base(b), b + 1):
402 424 needed[j] = 1
403 425
404 426 # calculate spans to retrieve from datafile
405 427 needed = needed.keys()
406 428 needed.sort()
407 429 spans = []
408 430 oo = -1
409 431 ol = 0
410 432 for n in needed:
411 433 if n < 0: continue
412 434 o = self.start(n)
413 435 l = self.length(n)
414 436 if oo + ol == o: # can we merge with the previous?
415 437 nl = spans[-1][2]
416 438 nl.append((n, l))
417 439 ol += l
418 440 spans[-1] = (oo, ol, nl)
419 441 else:
420 442 oo = o
421 443 ol = l
422 444 spans.append((oo, ol, [(n, l)]))
423 445
424 446 # read spans in, divide up chunks
425 447 chunks = {}
426 448 for span in spans:
427 449 # we reopen the file for each span to make http happy for now
428 450 f = self.opener(self.datafile)
429 451 f.seek(span[0])
430 452 data = f.read(span[1])
431 453
432 454 # divide up the span
433 455 pos = 0
434 456 for r, l in span[2]:
435 457 chunks[r] = decompress(data[pos: pos + l])
436 458 pos += l
437 459
438 460 # helper to reconstruct intermediate versions
439 461 def construct(text, base, rev):
440 462 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
441 463 return mdiff.patches(text, bins)
442 464
443 465 # build deltas
444 466 deltas = []
445 467 for d in xrange(0, len(revs) - 1):
446 468 a, b = revs[d], revs[d + 1]
447 469 n = self.node(b)
448 470
449 471 # do we need to construct a new delta?
450 472 if a + 1 != b or self.base(b) == b:
451 473 if a >= 0:
452 474 base = self.base(a)
453 475 ta = chunks[self.base(a)]
454 476 ta = construct(ta, base, a)
455 477 else:
456 478 ta = ""
457 479
458 480 base = self.base(b)
459 481 if a > base:
460 482 base = a
461 483 tb = ta
462 484 else:
463 485 tb = chunks[self.base(b)]
464 486 tb = construct(tb, base, b)
465 487 d = self.diff(ta, tb)
466 488 else:
467 489 d = chunks[b]
468 490
469 491 p = self.parents(n)
470 492 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
471 493 l = struct.pack(">l", len(meta) + len(d) + 4)
472 494 yield l
473 495 yield meta
474 496 yield d
475 497
476 498 yield struct.pack(">l", 0)
477 499
478 500 def addgroup(self, revs, linkmapper, transaction, unique=0):
479 501 # given a set of deltas, add them to the revision log. the
480 502 # first delta is against its parent, which should be in our
481 503 # log, the rest are against the previous delta.
482 504
483 505 # track the base of the current delta log
484 506 r = self.count()
485 507 t = r - 1
486 508 node = nullid
487 509
488 510 base = prev = -1
489 511 start = end = measure = 0
490 512 if r:
491 513 start = self.start(self.base(t))
492 514 end = self.end(t)
493 515 measure = self.length(self.base(t))
494 516 base = self.base(t)
495 517 prev = self.tip()
496 518
497 519 transaction.add(self.datafile, end)
498 520 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
499 521 dfh = self.opener(self.datafile, "a")
500 522 ifh = self.opener(self.indexfile, "a")
501 523
502 524 # loop through our set of deltas
503 525 chain = None
504 526 for chunk in revs:
505 527 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
506 528 link = linkmapper(cs)
507 529 if node in self.nodemap:
508 530 # this can happen if two branches make the same change
509 531 if unique:
510 532 raise RevlogError("already have %s" % hex(node[:4]))
511 533 chain = node
512 534 continue
513 535 delta = chunk[80:]
514 536
515 537 if not chain:
516 538 # retrieve the parent revision of the delta chain
517 539 chain = p1
518 540 if not chain in self.nodemap:
519 541 raise RevlogError("unknown base %s" % short(chain[:4]))
520 542
521 543 # full versions are inserted when the needed deltas become
522 544 # comparable to the uncompressed text or when the previous
523 545 # version is not the one we have a delta against. We use
524 546 # the size of the previous full rev as a proxy for the
525 547 # current size.
526 548
527 549 if chain == prev:
528 550 cdelta = compress(delta)
529 551
530 552 if chain != prev or (end - start + len(cdelta)) > measure * 2:
531 553 # flush our writes here so we can read it in revision
532 554 dfh.flush()
533 555 ifh.flush()
534 556 text = self.revision(chain)
535 557 text = self.patches(text, [delta])
536 558 chk = self.addrevision(text, transaction, link, p1, p2)
537 559 if chk != node:
538 560 raise RevlogError("consistency error adding group")
539 561 measure = len(text)
540 562 else:
541 563 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
542 564 self.index.append(e)
543 565 self.nodemap[node] = r
544 566 dfh.write(cdelta)
545 567 ifh.write(struct.pack(indexformat, *e))
546 568
547 569 t, r, chain, prev = r, r + 1, node, node
548 570 start = self.start(self.base(t))
549 571 end = self.end(t)
550 572
551 573 dfh.close()
552 574 ifh.close()
553 575 return node
General Comments 0
You need to be logged in to leave comments. Login now