##// END OF EJS Templates
Speedup revlog.ancestors for the linear case...
Chris Mason -
r2081:416d8b2a default
parent child Browse files
Show More
@@ -1,1211 +1,1229 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25
26 26 def flagstr(flag):
27 27 if flag == "inline":
28 28 return REVLOGNGINLINEDATA
29 29 raise RevlogError(_("unknown revlog flag %s" % flag))
30 30
31 31 def hash(text, p1, p2):
32 32 """generate a hash from the given text and its parent hashes
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 def compress(text):
46 46 """ generate a possibly-compressed representation of text """
47 47 if not text: return ("", text)
48 48 if len(text) < 44:
49 49 if text[0] == '\0': return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 return ("", bin)
56 56
57 57 def decompress(bin):
58 58 """ decompress the given input """
59 59 if not bin: return bin
60 60 t = bin[0]
61 61 if t == '\0': return bin
62 62 if t == 'x': return zlib.decompress(bin)
63 63 if t == 'u': return bin[1:]
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 67 v0shaoffset = 56
68 68 # index ng:
69 69 # 6 bytes offset
70 70 # 2 bytes flags
71 71 # 4 bytes compressed length
72 72 # 4 bytes uncompressed length
73 73 # 4 bytes: base rev
74 74 # 4 bytes link rev
75 75 # 4 bytes parent 1 rev
76 76 # 4 bytes parent 2 rev
77 77 # 32 bytes: nodeid
78 78 indexformatng = ">Qiiiiii20s12x"
79 79 ngshaoffset = 32
80 80 versionformat = ">i"
81 81
82 82 class lazyparser(object):
83 83 """
84 84 this class avoids the need to parse the entirety of large indices
85 85 """
86 86 def __init__(self, dataf, size, indexformat, shaoffset):
87 87 self.dataf = dataf
88 88 self.format = indexformat
89 89 self.s = struct.calcsize(indexformat)
90 90 self.indexformat = indexformat
91 91 self.datasize = size
92 92 self.l = size/self.s
93 93 self.index = [None] * self.l
94 94 self.map = {nullid: -1}
95 95 self.allmap = 0
96 96 self.all = 0
97 97 self.mapfind_count = 0
98 98 self.shaoffset = shaoffset
99 99
100 100 def loadmap(self):
101 101 """
102 102 during a commit, we need to make sure the rev being added is
103 103 not a duplicate. This requires loading the entire index,
104 104 which is fairly slow. loadmap can load up just the node map,
105 105 which takes much less time.
106 106 """
107 107 if self.allmap: return
108 108 start = 0
109 109 end = self.datasize
110 110 self.allmap = 1
111 111 cur = 0
112 112 count = 0
113 113 blocksize = self.s * 256
114 114 self.dataf.seek(0)
115 115 while cur < end:
116 116 data = self.dataf.read(blocksize)
117 117 off = 0
118 118 for x in xrange(256):
119 119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 120 self.map[n] = count
121 121 count += 1
122 122 if count >= self.l:
123 123 break
124 124 off += self.s
125 125 cur += blocksize
126 126
127 127 def loadblock(self, blockstart, blocksize, data=None):
128 128 if self.all: return
129 129 if data is None:
130 130 self.dataf.seek(blockstart)
131 131 data = self.dataf.read(blocksize)
132 132 lend = len(data) / self.s
133 133 i = blockstart / self.s
134 134 off = 0
135 135 for x in xrange(lend):
136 136 if self.index[i + x] == None:
137 137 b = data[off : off + self.s]
138 138 self.index[i + x] = b
139 139 n = b[self.shaoffset:self.shaoffset + 20]
140 140 self.map[n] = i + x
141 141 off += self.s
142 142
143 143 def findnode(self, node):
144 144 """search backwards through the index file for a specific node"""
145 145 if self.allmap: return None
146 146
147 147 # hg log will cause many many searches for the manifest
148 148 # nodes. After we get called a few times, just load the whole
149 149 # thing.
150 150 if self.mapfind_count > 8:
151 151 self.loadmap()
152 152 if node in self.map:
153 153 return node
154 154 return None
155 155 self.mapfind_count += 1
156 156 last = self.l - 1
157 157 while self.index[last] != None:
158 158 if last == 0:
159 159 self.all = 1
160 160 self.allmap = 1
161 161 return None
162 162 last -= 1
163 163 end = (last + 1) * self.s
164 164 blocksize = self.s * 256
165 165 while end >= 0:
166 166 start = max(end - blocksize, 0)
167 167 self.dataf.seek(start)
168 168 data = self.dataf.read(end - start)
169 169 findend = end - start
170 170 while True:
171 171 # we're searching backwards, so weh have to make sure
172 172 # we don't find a changeset where this node is a parent
173 173 off = data.rfind(node, 0, findend)
174 174 findend = off
175 175 if off >= 0:
176 176 i = off / self.s
177 177 off = i * self.s
178 178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 179 if n == node:
180 180 self.map[n] = i + start / self.s
181 181 return node
182 182 else:
183 183 break
184 184 end -= blocksize
185 185 return None
186 186
187 187 def loadindex(self, i=None, end=None):
188 188 if self.all: return
189 189 all = False
190 190 if i == None:
191 191 blockstart = 0
192 192 blocksize = (512 / self.s) * self.s
193 193 end = self.datasize
194 194 all = True
195 195 else:
196 196 if end:
197 197 blockstart = i * self.s
198 198 end = end * self.s
199 199 blocksize = end - blockstart
200 200 else:
201 201 blockstart = (i & ~(32)) * self.s
202 202 blocksize = self.s * 64
203 203 end = blockstart + blocksize
204 204 while blockstart < end:
205 205 self.loadblock(blockstart, blocksize)
206 206 blockstart += blocksize
207 207 if all: self.all = True
208 208
209 209 class lazyindex(object):
210 210 """a lazy version of the index array"""
211 211 def __init__(self, parser):
212 212 self.p = parser
213 213 def __len__(self):
214 214 return len(self.p.index)
215 215 def load(self, pos):
216 216 if pos < 0:
217 217 pos += len(self.p.index)
218 218 self.p.loadindex(pos)
219 219 return self.p.index[pos]
220 220 def __getitem__(self, pos):
221 221 ret = self.p.index[pos] or self.load(pos)
222 222 if isinstance(ret, str):
223 223 ret = struct.unpack(self.p.indexformat, ret)
224 224 return ret
225 225 def __setitem__(self, pos, item):
226 226 self.p.index[pos] = item
227 227 def __delitem__(self, pos):
228 228 del self.p.index[pos]
229 229 def append(self, e):
230 230 self.p.index.append(e)
231 231
232 232 class lazymap(object):
233 233 """a lazy version of the node map"""
234 234 def __init__(self, parser):
235 235 self.p = parser
236 236 def load(self, key):
237 237 n = self.p.findnode(key)
238 238 if n == None:
239 239 raise KeyError(key)
240 240 def __contains__(self, key):
241 241 if key in self.p.map:
242 242 return True
243 243 self.p.loadmap()
244 244 return key in self.p.map
245 245 def __iter__(self):
246 246 yield nullid
247 247 for i in xrange(self.p.l):
248 248 ret = self.p.index[i]
249 249 if not ret:
250 250 self.p.loadindex(i)
251 251 ret = self.p.index[i]
252 252 if isinstance(ret, str):
253 253 ret = struct.unpack(self.p.indexformat, ret)
254 254 yield ret[-1]
255 255 def __getitem__(self, key):
256 256 try:
257 257 return self.p.map[key]
258 258 except KeyError:
259 259 try:
260 260 self.load(key)
261 261 return self.p.map[key]
262 262 except KeyError:
263 263 raise KeyError("node " + hex(key))
264 264 def __setitem__(self, key, val):
265 265 self.p.map[key] = val
266 266 def __delitem__(self, key):
267 267 del self.p.map[key]
268 268
269 269 class RevlogError(Exception): pass
270 270
271 271 class revlog(object):
272 272 """
273 273 the underlying revision storage object
274 274
275 275 A revlog consists of two parts, an index and the revision data.
276 276
277 277 The index is a file with a fixed record size containing
278 278 information on each revision, includings its nodeid (hash), the
279 279 nodeids of its parents, the position and offset of its data within
280 280 the data file, and the revision it's based on. Finally, each entry
281 281 contains a linkrev entry that can serve as a pointer to external
282 282 data.
283 283
284 284 The revision data itself is a linear collection of data chunks.
285 285 Each chunk represents a revision and is usually represented as a
286 286 delta against the previous chunk. To bound lookup time, runs of
287 287 deltas are limited to about 2 times the length of the original
288 288 version data. This makes retrieval of a version proportional to
289 289 its size, or O(1) relative to the number of revisions.
290 290
291 291 Both pieces of the revlog are written to in an append-only
292 292 fashion, which means we never need to rewrite a file to insert or
293 293 remove data, and can use some simple techniques to avoid the need
294 294 for locking while reading.
295 295 """
296 296 def __init__(self, opener, indexfile, datafile, defversion=0):
297 297 """
298 298 create a revlog object
299 299
300 300 opener is a function that abstracts the file opening operation
301 301 and can be used to implement COW semantics or the like.
302 302 """
303 303 self.indexfile = indexfile
304 304 self.datafile = datafile
305 305 self.opener = opener
306 306
307 307 self.indexstat = None
308 308 self.cache = None
309 309 self.chunkcache = None
310 310 self.defversion = defversion
311 311 self.load()
312 312
313 313 def load(self):
314 314 v = self.defversion
315 315 try:
316 316 f = self.opener(self.indexfile)
317 317 i = f.read(4)
318 318 f.seek(0)
319 319 except IOError, inst:
320 320 if inst.errno != errno.ENOENT:
321 321 raise
322 322 i = ""
323 323 else:
324 324 try:
325 325 st = os.fstat(f.fileno())
326 326 except AttributeError, inst:
327 327 st = None
328 328 else:
329 329 oldst = self.indexstat
330 330 if (oldst and st.st_dev == oldst.st_dev
331 331 and st.st_ino == oldst.st_ino
332 332 and st.st_mtime == oldst.st_mtime
333 333 and st.st_ctime == oldst.st_ctime):
334 334 return
335 335 self.indexstat = st
336 336 if len(i) > 0:
337 337 v = struct.unpack(versionformat, i)[0]
338 338 flags = v & ~0xFFFF
339 339 fmt = v & 0xFFFF
340 340 if fmt == 0:
341 341 if flags:
342 342 raise RevlogError(_("index %s invalid flags %x for format v0" %
343 343 (self.indexfile, flags)))
344 344 elif fmt == REVLOGNG:
345 345 if flags & ~REVLOGNGINLINEDATA:
346 346 raise RevlogError(_("index %s invalid flags %x for revlogng" %
347 347 (self.indexfile, flags)))
348 348 else:
349 349 raise RevlogError(_("index %s invalid format %d" %
350 350 (self.indexfile, fmt)))
351 351 self.version = v
352 352 if v == 0:
353 353 self.indexformat = indexformatv0
354 354 shaoffset = v0shaoffset
355 355 else:
356 356 self.indexformat = indexformatng
357 357 shaoffset = ngshaoffset
358 358
359 359 if i:
360 360 if not self.inlinedata() and st and st.st_size > 10000:
361 361 # big index, let's parse it on demand
362 362 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
363 363 self.index = lazyindex(parser)
364 364 self.nodemap = lazymap(parser)
365 365 else:
366 366 i = f.read()
367 367 self.parseindex(i)
368 368 if self.inlinedata():
369 369 # we've already got the entire data file read in, save it
370 370 # in the chunk data
371 371 self.chunkcache = (0, i)
372 372 if self.version != 0:
373 373 e = list(self.index[0])
374 374 type = self.ngtype(e[0])
375 375 e[0] = self.offset_type(0, type)
376 376 self.index[0] = e
377 377 else:
378 378 self.nodemap = { nullid: -1}
379 379 self.index = []
380 380
381 381
382 382 def parseindex(self, data):
383 383 s = struct.calcsize(self.indexformat)
384 384 l = len(data)
385 385 self.index = []
386 386 self.nodemap = {nullid: -1}
387 387 inline = self.inlinedata()
388 388 off = 0
389 389 n = 0
390 390 while off < l:
391 391 e = struct.unpack(self.indexformat, data[off:off + s])
392 392 self.index.append(e)
393 393 self.nodemap[e[-1]] = n
394 394 n += 1
395 395 off += s
396 396 if inline:
397 397 off += e[1]
398 398
399 399 def ngoffset(self, q):
400 400 if q & 0xFFFF:
401 401 raise RevlogError(_('%s: incompatible revision flag %x') %
402 402 (self.indexfile, type))
403 403 return long(q >> 16)
404 404
405 405 def ngtype(self, q):
406 406 return int(q & 0xFFFF)
407 407
408 408 def offset_type(self, offset, type):
409 409 return long(long(offset) << 16 | type)
410 410
411 411 def loadindex(self, start, end):
412 412 """load a block of indexes all at once from the lazy parser"""
413 413 if isinstance(self.index, lazyindex):
414 414 self.index.p.loadindex(start, end)
415 415
416 416 def loadindexmap(self):
417 417 """loads both the map and the index from the lazy parser"""
418 418 if isinstance(self.index, lazyindex):
419 419 p = self.index.p
420 420 p.loadindex()
421 421 self.nodemap = p.map
422 422
423 423 def loadmap(self):
424 424 """loads the map from the lazy parser"""
425 425 if isinstance(self.nodemap, lazymap):
426 426 self.nodemap.p.loadmap()
427 427 self.nodemap = self.nodemap.p.map
428 428
429 429 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
430 430 def tip(self): return self.node(len(self.index) - 1)
431 431 def count(self): return len(self.index)
432 432 def node(self, rev):
433 433 return (rev < 0) and nullid or self.index[rev][-1]
434 434 def rev(self, node):
435 435 try:
436 436 return self.nodemap[node]
437 437 except KeyError:
438 438 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
439 439 def linkrev(self, node): return self.index[self.rev(node)][-4]
440 440 def parents(self, node):
441 441 if node == nullid: return (nullid, nullid)
442 442 r = self.rev(node)
443 443 d = self.index[r][-3:-1]
444 444 if self.version == 0:
445 445 return d
446 446 return [ self.node(x) for x in d ]
447 447 def start(self, rev):
448 448 if rev < 0:
449 449 return -1
450 450 if self.version != 0:
451 451 return self.ngoffset(self.index[rev][0])
452 452 return self.index[rev][0]
453 453
454 454 def end(self, rev): return self.start(rev) + self.length(rev)
455 455
456 456 def size(self, rev):
457 457 """return the length of the uncompressed text for a given revision"""
458 458 l = -1
459 459 if self.version != 0:
460 460 l = self.index[rev][2]
461 461 if l >= 0:
462 462 return l
463 463
464 464 t = self.revision(self.node(rev))
465 465 return len(t)
466 466
467 467 # alternate implementation, The advantage to this code is it
468 468 # will be faster for a single revision. But, the results are not
469 469 # cached, so finding the size of every revision will be slower.
470 470 """
471 471 if self.cache and self.cache[1] == rev:
472 472 return len(self.cache[2])
473 473
474 474 base = self.base(rev)
475 475 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
476 476 base = self.cache[1]
477 477 text = self.cache[2]
478 478 else:
479 479 text = self.revision(self.node(base))
480 480
481 481 l = len(text)
482 482 for x in xrange(base + 1, rev + 1):
483 483 l = mdiff.patchedsize(l, self.chunk(x))
484 484 return l
485 485 """
486 486
487 487 def length(self, rev):
488 488 if rev < 0:
489 489 return 0
490 490 else:
491 491 return self.index[rev][1]
492 492 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
493 493
494 494 def reachable(self, rev, stop=None):
495 495 reachable = {}
496 496 visit = [rev]
497 497 reachable[rev] = 1
498 498 if stop:
499 499 stopn = self.rev(stop)
500 500 else:
501 501 stopn = 0
502 502 while visit:
503 503 n = visit.pop(0)
504 504 if n == stop:
505 505 continue
506 506 if n == nullid:
507 507 continue
508 508 for p in self.parents(n):
509 509 if self.rev(p) < stopn:
510 510 continue
511 511 if p not in reachable:
512 512 reachable[p] = 1
513 513 visit.append(p)
514 514 return reachable
515 515
516 516 def nodesbetween(self, roots=None, heads=None):
517 517 """Return a tuple containing three elements. Elements 1 and 2 contain
518 518 a final list bases and heads after all the unreachable ones have been
519 519 pruned. Element 0 contains a topologically sorted list of all
520 520
521 521 nodes that satisfy these constraints:
522 522 1. All nodes must be descended from a node in roots (the nodes on
523 523 roots are considered descended from themselves).
524 524 2. All nodes must also be ancestors of a node in heads (the nodes in
525 525 heads are considered to be their own ancestors).
526 526
527 527 If roots is unspecified, nullid is assumed as the only root.
528 528 If heads is unspecified, it is taken to be the output of the
529 529 heads method (i.e. a list of all nodes in the repository that
530 530 have no children)."""
531 531 nonodes = ([], [], [])
532 532 if roots is not None:
533 533 roots = list(roots)
534 534 if not roots:
535 535 return nonodes
536 536 lowestrev = min([self.rev(n) for n in roots])
537 537 else:
538 538 roots = [nullid] # Everybody's a descendent of nullid
539 539 lowestrev = -1
540 540 if (lowestrev == -1) and (heads is None):
541 541 # We want _all_ the nodes!
542 542 return ([self.node(r) for r in xrange(0, self.count())],
543 543 [nullid], list(self.heads()))
544 544 if heads is None:
545 545 # All nodes are ancestors, so the latest ancestor is the last
546 546 # node.
547 547 highestrev = self.count() - 1
548 548 # Set ancestors to None to signal that every node is an ancestor.
549 549 ancestors = None
550 550 # Set heads to an empty dictionary for later discovery of heads
551 551 heads = {}
552 552 else:
553 553 heads = list(heads)
554 554 if not heads:
555 555 return nonodes
556 556 ancestors = {}
557 557 # Start at the top and keep marking parents until we're done.
558 558 nodestotag = heads[:]
559 559 # Turn heads into a dictionary so we can remove 'fake' heads.
560 560 # Also, later we will be using it to filter out the heads we can't
561 561 # find from roots.
562 562 heads = dict.fromkeys(heads, 0)
563 563 # Remember where the top was so we can use it as a limit later.
564 564 highestrev = max([self.rev(n) for n in nodestotag])
565 565 while nodestotag:
566 566 # grab a node to tag
567 567 n = nodestotag.pop()
568 568 # Never tag nullid
569 569 if n == nullid:
570 570 continue
571 571 # A node's revision number represents its place in a
572 572 # topologically sorted list of nodes.
573 573 r = self.rev(n)
574 574 if r >= lowestrev:
575 575 if n not in ancestors:
576 576 # If we are possibly a descendent of one of the roots
577 577 # and we haven't already been marked as an ancestor
578 578 ancestors[n] = 1 # Mark as ancestor
579 579 # Add non-nullid parents to list of nodes to tag.
580 580 nodestotag.extend([p for p in self.parents(n) if
581 581 p != nullid])
582 582 elif n in heads: # We've seen it before, is it a fake head?
583 583 # So it is, real heads should not be the ancestors of
584 584 # any other heads.
585 585 heads.pop(n)
586 586 if not ancestors:
587 587 return nonodes
588 588 # Now that we have our set of ancestors, we want to remove any
589 589 # roots that are not ancestors.
590 590
591 591 # If one of the roots was nullid, everything is included anyway.
592 592 if lowestrev > -1:
593 593 # But, since we weren't, let's recompute the lowest rev to not
594 594 # include roots that aren't ancestors.
595 595
596 596 # Filter out roots that aren't ancestors of heads
597 597 roots = [n for n in roots if n in ancestors]
598 598 # Recompute the lowest revision
599 599 if roots:
600 600 lowestrev = min([self.rev(n) for n in roots])
601 601 else:
602 602 # No more roots? Return empty list
603 603 return nonodes
604 604 else:
605 605 # We are descending from nullid, and don't need to care about
606 606 # any other roots.
607 607 lowestrev = -1
608 608 roots = [nullid]
609 609 # Transform our roots list into a 'set' (i.e. a dictionary where the
610 610 # values don't matter.
611 611 descendents = dict.fromkeys(roots, 1)
612 612 # Also, keep the original roots so we can filter out roots that aren't
613 613 # 'real' roots (i.e. are descended from other roots).
614 614 roots = descendents.copy()
615 615 # Our topologically sorted list of output nodes.
616 616 orderedout = []
617 617 # Don't start at nullid since we don't want nullid in our output list,
618 618 # and if nullid shows up in descedents, empty parents will look like
619 619 # they're descendents.
620 620 for r in xrange(max(lowestrev, 0), highestrev + 1):
621 621 n = self.node(r)
622 622 isdescendent = False
623 623 if lowestrev == -1: # Everybody is a descendent of nullid
624 624 isdescendent = True
625 625 elif n in descendents:
626 626 # n is already a descendent
627 627 isdescendent = True
628 628 # This check only needs to be done here because all the roots
629 629 # will start being marked is descendents before the loop.
630 630 if n in roots:
631 631 # If n was a root, check if it's a 'real' root.
632 632 p = tuple(self.parents(n))
633 633 # If any of its parents are descendents, it's not a root.
634 634 if (p[0] in descendents) or (p[1] in descendents):
635 635 roots.pop(n)
636 636 else:
637 637 p = tuple(self.parents(n))
638 638 # A node is a descendent if either of its parents are
639 639 # descendents. (We seeded the dependents list with the roots
640 640 # up there, remember?)
641 641 if (p[0] in descendents) or (p[1] in descendents):
642 642 descendents[n] = 1
643 643 isdescendent = True
644 644 if isdescendent and ((ancestors is None) or (n in ancestors)):
645 645 # Only include nodes that are both descendents and ancestors.
646 646 orderedout.append(n)
647 647 if (ancestors is not None) and (n in heads):
648 648 # We're trying to figure out which heads are reachable
649 649 # from roots.
650 650 # Mark this head as having been reached
651 651 heads[n] = 1
652 652 elif ancestors is None:
653 653 # Otherwise, we're trying to discover the heads.
654 654 # Assume this is a head because if it isn't, the next step
655 655 # will eventually remove it.
656 656 heads[n] = 1
657 657 # But, obviously its parents aren't.
658 658 for p in self.parents(n):
659 659 heads.pop(p, None)
660 660 heads = [n for n in heads.iterkeys() if heads[n] != 0]
661 661 roots = roots.keys()
662 662 assert orderedout
663 663 assert roots
664 664 assert heads
665 665 return (orderedout, roots, heads)
666 666
667 667 def heads(self, start=None):
668 668 """return the list of all nodes that have no children
669 669
670 670 if start is specified, only heads that are descendants of
671 671 start will be returned
672 672
673 673 """
674 674 if start is None:
675 675 start = nullid
676 676 reachable = {start: 1}
677 677 heads = {start: 1}
678 678 startrev = self.rev(start)
679 679
680 680 for r in xrange(startrev + 1, self.count()):
681 681 n = self.node(r)
682 682 for pn in self.parents(n):
683 683 if pn in reachable:
684 684 reachable[n] = 1
685 685 heads[n] = 1
686 686 if pn in heads:
687 687 del heads[pn]
688 688 return heads.keys()
689 689
690 690 def children(self, node):
691 691 """find the children of a given node"""
692 692 c = []
693 693 p = self.rev(node)
694 694 for r in range(p + 1, self.count()):
695 695 n = self.node(r)
696 696 for pn in self.parents(n):
697 697 if pn == node:
698 698 c.append(n)
699 699 continue
700 700 elif pn == nullid:
701 701 continue
702 702 return c
703 703
704 704 def lookup(self, id):
705 705 """locate a node based on revision number or subset of hex nodeid"""
706 706 try:
707 707 rev = int(id)
708 708 if str(rev) != id: raise ValueError
709 709 if rev < 0: rev = self.count() + rev
710 710 if rev < 0 or rev >= self.count(): raise ValueError
711 711 return self.node(rev)
712 712 except (ValueError, OverflowError):
713 713 c = []
714 714 for n in self.nodemap:
715 715 if hex(n).startswith(id):
716 716 c.append(n)
717 717 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
718 718 if len(c) < 1: raise RevlogError(_("No match found"))
719 719 return c[0]
720 720
721 721 return None
722 722
723 723 def diff(self, a, b):
724 724 """return a delta between two revisions"""
725 725 return mdiff.textdiff(a, b)
726 726
727 727 def patches(self, t, pl):
728 728 """apply a list of patches to a string"""
729 729 return mdiff.patches(t, pl)
730 730
731 731 def chunk(self, rev, df=None, cachelen=4096):
732 732 start, length = self.start(rev), self.length(rev)
733 733 inline = self.inlinedata()
734 734 if inline:
735 735 start += (rev + 1) * struct.calcsize(self.indexformat)
736 736 end = start + length
737 737 def loadcache(df):
738 738 cache_length = max(cachelen, length) # 4k
739 739 if not df:
740 740 if inline:
741 741 df = self.opener(self.indexfile)
742 742 else:
743 743 df = self.opener(self.datafile)
744 744 df.seek(start)
745 745 self.chunkcache = (start, df.read(cache_length))
746 746
747 747 if not self.chunkcache:
748 748 loadcache(df)
749 749
750 750 cache_start = self.chunkcache[0]
751 751 cache_end = cache_start + len(self.chunkcache[1])
752 752 if start >= cache_start and end <= cache_end:
753 753 # it is cached
754 754 offset = start - cache_start
755 755 else:
756 756 loadcache(df)
757 757 offset = 0
758 758
759 759 #def checkchunk():
760 760 # df = self.opener(self.datafile)
761 761 # df.seek(start)
762 762 # return df.read(length)
763 763 #assert s == checkchunk()
764 764 return decompress(self.chunkcache[1][offset:offset + length])
765 765
766 766 def delta(self, node):
767 767 """return or calculate a delta between a node and its predecessor"""
768 768 r = self.rev(node)
769 769 return self.revdiff(r - 1, r)
770 770
771 771 def revdiff(self, rev1, rev2):
772 772 """return or calculate a delta between two revisions"""
773 773 b1 = self.base(rev1)
774 774 b2 = self.base(rev2)
775 775 if b1 == b2 and rev1 + 1 == rev2:
776 776 return self.chunk(rev2)
777 777 else:
778 778 return self.diff(self.revision(self.node(rev1)),
779 779 self.revision(self.node(rev2)))
780 780
781 781 def revision(self, node):
782 782 """return an uncompressed revision of a given"""
783 783 if node == nullid: return ""
784 784 if self.cache and self.cache[0] == node: return self.cache[2]
785 785
786 786 # look up what we need to read
787 787 text = None
788 788 rev = self.rev(node)
789 789 base = self.base(rev)
790 790
791 791 if self.inlinedata():
792 792 # we probably have the whole chunk cached
793 793 df = None
794 794 else:
795 795 df = self.opener(self.datafile)
796 796
797 797 # do we have useful data cached?
798 798 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
799 799 base = self.cache[1]
800 800 text = self.cache[2]
801 801 self.loadindex(base, rev + 1)
802 802 else:
803 803 self.loadindex(base, rev + 1)
804 804 text = self.chunk(base, df=df)
805 805
806 806 bins = []
807 807 for r in xrange(base + 1, rev + 1):
808 808 bins.append(self.chunk(r, df=df))
809 809
810 810 text = self.patches(text, bins)
811 811
812 812 p1, p2 = self.parents(node)
813 813 if node != hash(text, p1, p2):
814 814 raise RevlogError(_("integrity check failed on %s:%d")
815 815 % (self.datafile, rev))
816 816
817 817 self.cache = (node, rev, text)
818 818 return text
819 819
820 820 def checkinlinesize(self, tr, fp=None):
821 821 if not self.inlinedata():
822 822 return
823 823 if not fp:
824 824 fp = self.opener(self.indexfile, 'r')
825 825 size = fp.tell()
826 826 if size < 131072:
827 827 return
828 828 tr.add(self.datafile, 0)
829 829 df = self.opener(self.datafile, 'w')
830 830 calc = struct.calcsize(self.indexformat)
831 831 for r in xrange(self.count()):
832 832 start = self.start(r) + (r + 1) * calc
833 833 length = self.length(r)
834 834 fp.seek(start)
835 835 d = fp.read(length)
836 836 df.write(d)
837 837 fp.close()
838 838 df.close()
839 839 fp = self.opener(self.indexfile, 'w', atomictemp=True)
840 840 self.version &= ~(REVLOGNGINLINEDATA)
841 841 if self.count():
842 842 x = self.index[0]
843 843 e = struct.pack(self.indexformat, *x)[4:]
844 844 l = struct.pack(versionformat, self.version)
845 845 fp.write(l)
846 846 fp.write(e)
847 847
848 848 for i in xrange(1, self.count()):
849 849 x = self.index[i]
850 850 e = struct.pack(self.indexformat, *x)
851 851 fp.write(e)
852 852
853 853 # if we don't call rename, the temp file will never replace the
854 854 # real index
855 855 fp.rename()
856 856 self.chunkcache = None
857 857
858 858 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
859 859 """add a revision to the log
860 860
861 861 text - the revision data to add
862 862 transaction - the transaction object used for rollback
863 863 link - the linkrev data to add
864 864 p1, p2 - the parent nodeids of the revision
865 865 d - an optional precomputed delta
866 866 """
867 867 if text is None: text = ""
868 868 if p1 is None: p1 = self.tip()
869 869 if p2 is None: p2 = nullid
870 870
871 871 node = hash(text, p1, p2)
872 872
873 873 if node in self.nodemap:
874 874 return node
875 875
876 876 n = self.count()
877 877 t = n - 1
878 878
879 879 if n:
880 880 base = self.base(t)
881 881 start = self.start(base)
882 882 end = self.end(t)
883 883 if not d:
884 884 prev = self.revision(self.tip())
885 885 d = self.diff(prev, str(text))
886 886 data = compress(d)
887 887 l = len(data[1]) + len(data[0])
888 888 dist = end - start + l
889 889
890 890 # full versions are inserted when the needed deltas
891 891 # become comparable to the uncompressed text
892 892 if not n or dist > len(text) * 2:
893 893 data = compress(text)
894 894 l = len(data[1]) + len(data[0])
895 895 base = n
896 896 else:
897 897 base = self.base(t)
898 898
899 899 offset = 0
900 900 if t >= 0:
901 901 offset = self.end(t)
902 902
903 903 if self.version == 0:
904 904 e = (offset, l, base, link, p1, p2, node)
905 905 else:
906 906 e = (self.offset_type(offset, 0), l, len(text),
907 907 base, link, self.rev(p1), self.rev(p2), node)
908 908
909 909 self.index.append(e)
910 910 self.nodemap[node] = n
911 911 entry = struct.pack(self.indexformat, *e)
912 912
913 913 if not self.inlinedata():
914 914 transaction.add(self.datafile, offset)
915 915 transaction.add(self.indexfile, n * len(entry))
916 916 f = self.opener(self.datafile, "a")
917 917 if data[0]:
918 918 f.write(data[0])
919 919 f.write(data[1])
920 920 f = self.opener(self.indexfile, "a")
921 921 else:
922 922 f = self.opener(self.indexfile, "a+")
923 923 f.seek(0, 2)
924 924 transaction.add(self.indexfile, f.tell())
925 925
926 926 if len(self.index) == 1 and self.version != 0:
927 927 l = struct.pack(versionformat, self.version)
928 928 f.write(l)
929 929 entry = entry[4:]
930 930
931 931 f.write(entry)
932 932
933 933 if self.inlinedata():
934 934 f.write(data[0])
935 935 f.write(data[1])
936 936 self.checkinlinesize(transaction, f)
937 937
938 938 self.cache = (node, n, text)
939 939 return node
940 940
941 941 def ancestor(self, a, b):
942 942 """calculate the least common ancestor of nodes a and b"""
943
944 # start with some short cuts for the linear cases
945 if a == b:
946 return a
947 ra = self.rev(a)
948 rb = self.rev(b)
949 if ra < rb:
950 last = b
951 first = a
952 else:
953 last = a
954 first = b
955
956 # reachable won't include stop in the list, so we have to use a parent
957 reachable = self.reachable(last, stop=self.parents(first)[0])
958 if first in reachable:
959 return first
960
943 961 # calculate the distance of every node from root
944 962 dist = {nullid: 0}
945 963 for i in xrange(self.count()):
946 964 n = self.node(i)
947 965 p1, p2 = self.parents(n)
948 966 dist[n] = max(dist[p1], dist[p2]) + 1
949 967
950 968 # traverse ancestors in order of decreasing distance from root
951 969 def ancestors(node):
952 970 # we store negative distances because heap returns smallest member
953 971 h = [(-dist[node], node)]
954 972 seen = {}
955 973 while h:
956 974 d, n = heapq.heappop(h)
957 975 if n not in seen:
958 976 seen[n] = 1
959 977 yield (-d, n)
960 978 for p in self.parents(n):
961 979 heapq.heappush(h, (-dist[p], p))
962 980
963 981 def generations(node):
964 982 sg, s = None, {}
965 983 for g,n in ancestors(node):
966 984 if g != sg:
967 985 if sg:
968 986 yield sg, s
969 987 sg, s = g, {n:1}
970 988 else:
971 989 s[n] = 1
972 990 yield sg, s
973 991
974 992 x = generations(a)
975 993 y = generations(b)
976 994 gx = x.next()
977 995 gy = y.next()
978 996
979 997 # increment each ancestor list until it is closer to root than
980 998 # the other, or they match
981 999 while 1:
982 1000 #print "ancestor gen %s %s" % (gx[0], gy[0])
983 1001 if gx[0] == gy[0]:
984 1002 # find the intersection
985 1003 i = [ n for n in gx[1] if n in gy[1] ]
986 1004 if i:
987 1005 return i[0]
988 1006 else:
989 1007 #print "next"
990 1008 gy = y.next()
991 1009 gx = x.next()
992 1010 elif gx[0] < gy[0]:
993 1011 #print "next y"
994 1012 gy = y.next()
995 1013 else:
996 1014 #print "next x"
997 1015 gx = x.next()
998 1016
999 1017 def group(self, nodelist, lookup, infocollect=None):
1000 1018 """calculate a delta group
1001 1019
1002 1020 Given a list of changeset revs, return a set of deltas and
1003 1021 metadata corresponding to nodes. the first delta is
1004 1022 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1005 1023 have this parent as it has all history before these
1006 1024 changesets. parent is parent[0]
1007 1025 """
1008 1026 revs = [self.rev(n) for n in nodelist]
1009 1027
1010 1028 # if we don't have any revisions touched by these changesets, bail
1011 1029 if not revs:
1012 1030 yield changegroup.closechunk()
1013 1031 return
1014 1032
1015 1033 # add the parent of the first rev
1016 1034 p = self.parents(self.node(revs[0]))[0]
1017 1035 revs.insert(0, self.rev(p))
1018 1036
1019 1037 # build deltas
1020 1038 for d in xrange(0, len(revs) - 1):
1021 1039 a, b = revs[d], revs[d + 1]
1022 1040 nb = self.node(b)
1023 1041
1024 1042 if infocollect is not None:
1025 1043 infocollect(nb)
1026 1044
1027 1045 d = self.revdiff(a, b)
1028 1046 p = self.parents(nb)
1029 1047 meta = nb + p[0] + p[1] + lookup(nb)
1030 1048 yield changegroup.genchunk("%s%s" % (meta, d))
1031 1049
1032 1050 yield changegroup.closechunk()
1033 1051
1034 1052 def addgroup(self, revs, linkmapper, transaction, unique=0):
1035 1053 """
1036 1054 add a delta group
1037 1055
1038 1056 given a set of deltas, add them to the revision log. the
1039 1057 first delta is against its parent, which should be in our
1040 1058 log, the rest are against the previous delta.
1041 1059 """
1042 1060
1043 1061 #track the base of the current delta log
1044 1062 r = self.count()
1045 1063 t = r - 1
1046 1064 node = None
1047 1065
1048 1066 base = prev = -1
1049 1067 start = end = textlen = 0
1050 1068 if r:
1051 1069 end = self.end(t)
1052 1070
1053 1071 ifh = self.opener(self.indexfile, "a+")
1054 1072 ifh.seek(0, 2)
1055 1073 transaction.add(self.indexfile, ifh.tell())
1056 1074 if self.inlinedata():
1057 1075 dfh = None
1058 1076 else:
1059 1077 transaction.add(self.datafile, end)
1060 1078 dfh = self.opener(self.datafile, "a")
1061 1079
1062 1080 # loop through our set of deltas
1063 1081 chain = None
1064 1082 for chunk in revs:
1065 1083 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1066 1084 link = linkmapper(cs)
1067 1085 if node in self.nodemap:
1068 1086 # this can happen if two branches make the same change
1069 1087 # if unique:
1070 1088 # raise RevlogError(_("already have %s") % hex(node[:4]))
1071 1089 chain = node
1072 1090 continue
1073 1091 delta = chunk[80:]
1074 1092
1075 1093 for p in (p1, p2):
1076 1094 if not p in self.nodemap:
1077 1095 raise RevlogError(_("unknown parent %s") % short(p1))
1078 1096
1079 1097 if not chain:
1080 1098 # retrieve the parent revision of the delta chain
1081 1099 chain = p1
1082 1100 if not chain in self.nodemap:
1083 1101 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1084 1102
1085 1103 # full versions are inserted when the needed deltas become
1086 1104 # comparable to the uncompressed text or when the previous
1087 1105 # version is not the one we have a delta against. We use
1088 1106 # the size of the previous full rev as a proxy for the
1089 1107 # current size.
1090 1108
1091 1109 if chain == prev:
1092 1110 tempd = compress(delta)
1093 1111 cdelta = tempd[0] + tempd[1]
1094 1112 textlen = mdiff.patchedsize(textlen, delta)
1095 1113
1096 1114 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1097 1115 # flush our writes here so we can read it in revision
1098 1116 if dfh:
1099 1117 dfh.flush()
1100 1118 ifh.flush()
1101 1119 text = self.revision(chain)
1102 1120 text = self.patches(text, [delta])
1103 1121 chk = self.addrevision(text, transaction, link, p1, p2)
1104 1122 if chk != node:
1105 1123 raise RevlogError(_("consistency error adding group"))
1106 1124 textlen = len(text)
1107 1125 else:
1108 1126 if self.version == 0:
1109 1127 e = (end, len(cdelta), base, link, p1, p2, node)
1110 1128 else:
1111 1129 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1112 1130 link, self.rev(p1), self.rev(p2), node)
1113 1131 self.index.append(e)
1114 1132 self.nodemap[node] = r
1115 1133 if self.inlinedata():
1116 1134 ifh.write(struct.pack(self.indexformat, *e))
1117 1135 ifh.write(cdelta)
1118 1136 self.checkinlinesize(transaction, ifh)
1119 1137 if not self.inlinedata():
1120 1138 dfh = self.opener(self.datafile, "a")
1121 1139 ifh = self.opener(self.indexfile, "a")
1122 1140 else:
1123 1141 if not dfh:
1124 1142 # addrevision switched from inline to conventional
1125 1143 # reopen the index
1126 1144 dfh = self.opener(self.datafile, "a")
1127 1145 ifh = self.opener(self.indexfile, "a")
1128 1146 dfh.write(cdelta)
1129 1147 ifh.write(struct.pack(self.indexformat, *e))
1130 1148
1131 1149 t, r, chain, prev = r, r + 1, node, node
1132 1150 base = self.base(t)
1133 1151 start = self.start(base)
1134 1152 end = self.end(t)
1135 1153
1136 1154 if node is None:
1137 1155 raise RevlogError(_("group to be added is empty"))
1138 1156 return node
1139 1157
1140 1158 def strip(self, rev, minlink):
1141 1159 if self.count() == 0 or rev >= self.count():
1142 1160 return
1143 1161
1144 1162 if isinstance(self.index, lazyindex):
1145 1163 self.loadindexmap()
1146 1164
1147 1165 # When stripping away a revision, we need to make sure it
1148 1166 # does not actually belong to an older changeset.
1149 1167 # The minlink parameter defines the oldest revision
1150 1168 # we're allowed to strip away.
1151 1169 while minlink > self.index[rev][-4]:
1152 1170 rev += 1
1153 1171 if rev >= self.count():
1154 1172 return
1155 1173
1156 1174 # first truncate the files on disk
1157 1175 end = self.start(rev)
1158 1176 if not self.inlinedata():
1159 1177 df = self.opener(self.datafile, "a")
1160 1178 df.truncate(end)
1161 1179 end = rev * struct.calcsize(self.indexformat)
1162 1180 else:
1163 1181 end += rev * struct.calcsize(self.indexformat)
1164 1182
1165 1183 indexf = self.opener(self.indexfile, "a")
1166 1184 indexf.truncate(end)
1167 1185
1168 1186 # then reset internal state in memory to forget those revisions
1169 1187 self.cache = None
1170 1188 self.chunkcache = None
1171 1189 for x in xrange(rev, self.count()):
1172 1190 del self.nodemap[self.node(x)]
1173 1191
1174 1192 del self.index[rev:]
1175 1193
1176 1194 def checksize(self):
1177 1195 expected = 0
1178 1196 if self.count():
1179 1197 expected = self.end(self.count() - 1)
1180 1198
1181 1199 try:
1182 1200 f = self.opener(self.datafile)
1183 1201 f.seek(0, 2)
1184 1202 actual = f.tell()
1185 1203 dd = actual - expected
1186 1204 except IOError, inst:
1187 1205 if inst.errno != errno.ENOENT:
1188 1206 raise
1189 1207 dd = 0
1190 1208
1191 1209 try:
1192 1210 f = self.opener(self.indexfile)
1193 1211 f.seek(0, 2)
1194 1212 actual = f.tell()
1195 1213 s = struct.calcsize(self.indexformat)
1196 1214 i = actual / s
1197 1215 di = actual - (i * s)
1198 1216 if self.inlinedata():
1199 1217 databytes = 0
1200 1218 for r in xrange(self.count()):
1201 1219 databytes += self.length(r)
1202 1220 dd = 0
1203 1221 di = actual - self.count() * s - databytes
1204 1222 except IOError, inst:
1205 1223 if inst.errno != errno.ENOENT:
1206 1224 raise
1207 1225 di = 0
1208 1226
1209 1227 return (dd, di)
1210 1228
1211 1229
General Comments 0
You need to be logged in to leave comments. Login now