##// END OF EJS Templates
revlog: implement a fast path for heads
Matt Mackall -
r4991:9c8c42bc default
parent child Browse files
Show More
@@ -1,1252 +1,1263 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 if q & 0xFFFF:
33 33 raise RevlogError(_('incompatible revision flag %x') % q)
34 34 return int(q >> 16)
35 35
36 36 def gettype(q):
37 37 return int(q & 0xFFFF)
38 38
39 39 def offset_type(offset, type):
40 40 return long(long(offset) << 16 | type)
41 41
42 42 def hash(text, p1, p2):
43 43 """generate a hash from the given text and its parent hashes
44 44
45 45 This hash combines both the current file contents and its history
46 46 in a manner that makes it easy to distinguish nodes with the same
47 47 content in the revision graph.
48 48 """
49 49 l = [p1, p2]
50 50 l.sort()
51 51 s = sha.new(l[0])
52 52 s.update(l[1])
53 53 s.update(text)
54 54 return s.digest()
55 55
56 56 def compress(text):
57 57 """ generate a possibly-compressed representation of text """
58 58 if not text:
59 59 return ("", text)
60 60 if len(text) < 44:
61 61 if text[0] == '\0':
62 62 return ("", text)
63 63 return ('u', text)
64 64 bin = zlib.compress(text)
65 65 if len(bin) > len(text):
66 66 if text[0] == '\0':
67 67 return ("", text)
68 68 return ('u', text)
69 69 return ("", bin)
70 70
71 71 def decompress(bin):
72 72 """ decompress the given input """
73 73 if not bin:
74 74 return bin
75 75 t = bin[0]
76 76 if t == '\0':
77 77 return bin
78 78 if t == 'x':
79 79 return zlib.decompress(bin)
80 80 if t == 'u':
81 81 return bin[1:]
82 82 raise RevlogError(_("unknown compression type %r") % t)
83 83
84 84 class lazyparser(object):
85 85 """
86 86 this class avoids the need to parse the entirety of large indices
87 87 """
88 88
89 89 # lazyparser is not safe to use on windows if win32 extensions not
90 90 # available. it keeps file handle open, which make it not possible
91 91 # to break hardlinks on local cloned repos.
92 92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 93 hasattr(util, 'win32api'))
94 94
95 95 def __init__(self, dataf, size):
96 96 self.dataf = dataf
97 97 self.s = struct.calcsize(indexformatng)
98 98 self.datasize = size
99 99 self.l = size/self.s
100 100 self.index = [None] * self.l
101 101 self.map = {nullid: nullrev}
102 102 self.allmap = 0
103 103 self.all = 0
104 104 self.mapfind_count = 0
105 105
106 106 def loadmap(self):
107 107 """
108 108 during a commit, we need to make sure the rev being added is
109 109 not a duplicate. This requires loading the entire index,
110 110 which is fairly slow. loadmap can load up just the node map,
111 111 which takes much less time.
112 112 """
113 113 if self.allmap:
114 114 return
115 115 end = self.datasize
116 116 self.allmap = 1
117 117 cur = 0
118 118 count = 0
119 119 blocksize = self.s * 256
120 120 self.dataf.seek(0)
121 121 while cur < end:
122 122 data = self.dataf.read(blocksize)
123 123 off = 0
124 124 for x in xrange(256):
125 125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 126 self.map[n] = count
127 127 count += 1
128 128 if count >= self.l:
129 129 break
130 130 off += self.s
131 131 cur += blocksize
132 132
133 133 def loadblock(self, blockstart, blocksize, data=None):
134 134 if self.all:
135 135 return
136 136 if data is None:
137 137 self.dataf.seek(blockstart)
138 138 if blockstart + blocksize > self.datasize:
139 139 # the revlog may have grown since we've started running,
140 140 # but we don't have space in self.index for more entries.
141 141 # limit blocksize so that we don't get too much data.
142 142 blocksize = max(self.datasize - blockstart, 0)
143 143 data = self.dataf.read(blocksize)
144 144 lend = len(data) / self.s
145 145 i = blockstart / self.s
146 146 off = 0
147 147 # lazyindex supports __delitem__
148 148 if lend > len(self.index) - i:
149 149 lend = len(self.index) - i
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[ngshaoffset:ngshaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap:
161 161 return None
162 162
163 163 # hg log will cause many many searches for the manifest
164 164 # nodes. After we get called a few times, just load the whole
165 165 # thing.
166 166 if self.mapfind_count > 8:
167 167 self.loadmap()
168 168 if node in self.map:
169 169 return node
170 170 return None
171 171 self.mapfind_count += 1
172 172 last = self.l - 1
173 173 while self.index[last] != None:
174 174 if last == 0:
175 175 self.all = 1
176 176 self.allmap = 1
177 177 return None
178 178 last -= 1
179 179 end = (last + 1) * self.s
180 180 blocksize = self.s * 256
181 181 while end >= 0:
182 182 start = max(end - blocksize, 0)
183 183 self.dataf.seek(start)
184 184 data = self.dataf.read(end - start)
185 185 findend = end - start
186 186 while True:
187 187 # we're searching backwards, so weh have to make sure
188 188 # we don't find a changeset where this node is a parent
189 189 off = data.rfind(node, 0, findend)
190 190 findend = off
191 191 if off >= 0:
192 192 i = off / self.s
193 193 off = i * self.s
194 194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 195 if n == node:
196 196 self.map[n] = i + start / self.s
197 197 return node
198 198 else:
199 199 break
200 200 end -= blocksize
201 201 return None
202 202
203 203 def loadindex(self, i=None, end=None):
204 204 if self.all:
205 205 return
206 206 all = False
207 207 if i == None:
208 208 blockstart = 0
209 209 blocksize = (512 / self.s) * self.s
210 210 end = self.datasize
211 211 all = True
212 212 else:
213 213 if end:
214 214 blockstart = i * self.s
215 215 end = end * self.s
216 216 blocksize = end - blockstart
217 217 else:
218 218 blockstart = (i & ~63) * self.s
219 219 blocksize = self.s * 64
220 220 end = blockstart + blocksize
221 221 while blockstart < end:
222 222 self.loadblock(blockstart, blocksize)
223 223 blockstart += blocksize
224 224 if all:
225 225 self.all = True
226 226
227 227 class lazyindex(object):
228 228 """a lazy version of the index array"""
229 229 def __init__(self, parser):
230 230 self.p = parser
231 231 def __len__(self):
232 232 return len(self.p.index)
233 233 def load(self, pos):
234 234 if pos < 0:
235 235 pos += len(self.p.index)
236 236 self.p.loadindex(pos)
237 237 return self.p.index[pos]
238 238 def __getitem__(self, pos):
239 239 ret = self.p.index[pos] or self.load(pos)
240 240 if isinstance(ret, str):
241 241 ret = struct.unpack(indexformatng, ret)
242 242 return ret
243 243 def __setitem__(self, pos, item):
244 244 self.p.index[pos] = item
245 245 def __delitem__(self, pos):
246 246 del self.p.index[pos]
247 247 def insert(self, pos, e):
248 248 self.p.index.insert(pos, e)
249 249 def append(self, e):
250 250 self.p.index.append(e)
251 251
252 252 class lazymap(object):
253 253 """a lazy version of the node map"""
254 254 def __init__(self, parser):
255 255 self.p = parser
256 256 def load(self, key):
257 257 n = self.p.findnode(key)
258 258 if n == None:
259 259 raise KeyError(key)
260 260 def __contains__(self, key):
261 261 if key in self.p.map:
262 262 return True
263 263 self.p.loadmap()
264 264 return key in self.p.map
265 265 def __iter__(self):
266 266 yield nullid
267 267 for i in xrange(self.p.l):
268 268 ret = self.p.index[i]
269 269 if not ret:
270 270 self.p.loadindex(i)
271 271 ret = self.p.index[i]
272 272 if isinstance(ret, str):
273 273 ret = struct.unpack(indexformatng, ret)
274 274 yield ret[7]
275 275 def __getitem__(self, key):
276 276 try:
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 try:
280 280 self.load(key)
281 281 return self.p.map[key]
282 282 except KeyError:
283 283 raise KeyError("node " + hex(key))
284 284 def __setitem__(self, key, val):
285 285 self.p.map[key] = val
286 286 def __delitem__(self, key):
287 287 del self.p.map[key]
288 288
289 289 indexformatv0 = ">4l20s20s20s"
290 290 v0shaoffset = 56
291 291
292 292 class revlogoldio(object):
293 293 def __init__(self):
294 294 self.size = struct.calcsize(indexformatv0)
295 295
296 296 def parseindex(self, fp, inline):
297 297 s = self.size
298 298 index = []
299 299 nodemap = {nullid: nullrev}
300 300 n = off = 0
301 301 data = fp.read()
302 302 l = len(data)
303 303 while off + s <= l:
304 304 cur = data[off:off + s]
305 305 off += s
306 306 e = struct.unpack(indexformatv0, cur)
307 307 # transform to revlogv1 format
308 308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 309 nodemap[e[4]], nodemap[e[5]], e[6])
310 310 index.append(e2)
311 311 nodemap[e[6]] = n
312 312 n += 1
313 313
314 314 return index, nodemap, None
315 315
316 316 def packentry(self, entry, node, version):
317 317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 318 node(entry[5]), node(entry[6]), entry[7])
319 319 return struct.pack(indexformatv0, *e2)
320 320
321 321 # index ng:
322 322 # 6 bytes offset
323 323 # 2 bytes flags
324 324 # 4 bytes compressed length
325 325 # 4 bytes uncompressed length
326 326 # 4 bytes: base rev
327 327 # 4 bytes link rev
328 328 # 4 bytes parent 1 rev
329 329 # 4 bytes parent 2 rev
330 330 # 32 bytes: nodeid
331 331 indexformatng = ">Qiiiiii20s12x"
332 332 ngshaoffset = 32
333 333 versionformat = ">I"
334 334
335 335 class revlogio(object):
336 336 def __init__(self):
337 337 self.size = struct.calcsize(indexformatng)
338 338
339 339 def parseindex(self, fp, inline):
340 340 try:
341 341 size = util.fstat(fp).st_size
342 342 except AttributeError:
343 343 size = 0
344 344
345 345 if lazyparser.safe_to_use and not inline and size > 1000000:
346 346 # big index, let's parse it on demand
347 347 parser = lazyparser(fp, size)
348 348 index = lazyindex(parser)
349 349 nodemap = lazymap(parser)
350 350 e = list(index[0])
351 351 type = gettype(e[0])
352 352 e[0] = offset_type(0, type)
353 353 index[0] = e
354 354 return index, nodemap, None
355 355
356 356 s = self.size
357 357 cache = None
358 358 index = []
359 359 nodemap = {nullid: nullrev}
360 360 n = off = 0
361 361 # if we're not using lazymap, always read the whole index
362 362 data = fp.read()
363 363 l = len(data) - s
364 364 unpack = struct.unpack
365 365 append = index.append
366 366 if inline:
367 367 cache = (0, data)
368 368 while off <= l:
369 369 e = unpack(indexformatng, data[off:off + s])
370 370 nodemap[e[7]] = n
371 371 append(e)
372 372 n += 1
373 373 if e[1] < 0:
374 374 break
375 375 off += e[1] + s
376 376 else:
377 377 while off <= l:
378 378 e = unpack(indexformatng, data[off:off + s])
379 379 nodemap[e[7]] = n
380 380 append(e)
381 381 n += 1
382 382 off += s
383 383
384 384 e = list(index[0])
385 385 type = gettype(e[0])
386 386 e[0] = offset_type(0, type)
387 387 index[0] = e
388 388
389 389 return index, nodemap, cache
390 390
391 391 def packentry(self, entry, node, version):
392 392 p = struct.pack(indexformatng, *entry)
393 393 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
394 394 p = struct.pack(versionformat, version) + p[4:]
395 395 return p
396 396
397 397 class revlog(object):
398 398 """
399 399 the underlying revision storage object
400 400
401 401 A revlog consists of two parts, an index and the revision data.
402 402
403 403 The index is a file with a fixed record size containing
404 404 information on each revision, includings its nodeid (hash), the
405 405 nodeids of its parents, the position and offset of its data within
406 406 the data file, and the revision it's based on. Finally, each entry
407 407 contains a linkrev entry that can serve as a pointer to external
408 408 data.
409 409
410 410 The revision data itself is a linear collection of data chunks.
411 411 Each chunk represents a revision and is usually represented as a
412 412 delta against the previous chunk. To bound lookup time, runs of
413 413 deltas are limited to about 2 times the length of the original
414 414 version data. This makes retrieval of a version proportional to
415 415 its size, or O(1) relative to the number of revisions.
416 416
417 417 Both pieces of the revlog are written to in an append-only
418 418 fashion, which means we never need to rewrite a file to insert or
419 419 remove data, and can use some simple techniques to avoid the need
420 420 for locking while reading.
421 421 """
422 422 def __init__(self, opener, indexfile):
423 423 """
424 424 create a revlog object
425 425
426 426 opener is a function that abstracts the file opening operation
427 427 and can be used to implement COW semantics or the like.
428 428 """
429 429 self.indexfile = indexfile
430 430 self.datafile = indexfile[:-2] + ".d"
431 431 self.opener = opener
432 432 self._cache = None
433 433 self._chunkcache = None
434 434 self.nodemap = {nullid: nullrev}
435 435 self.index = []
436 436
437 437 v = REVLOG_DEFAULT_VERSION
438 438 if hasattr(opener, "defversion"):
439 439 v = opener.defversion
440 440 if v & REVLOGNG:
441 441 v |= REVLOGNGINLINEDATA
442 442
443 443 i = ""
444 444 try:
445 445 f = self.opener(self.indexfile)
446 446 i = f.read(4)
447 447 f.seek(0)
448 448 if len(i) > 0:
449 449 v = struct.unpack(versionformat, i)[0]
450 450 except IOError, inst:
451 451 if inst.errno != errno.ENOENT:
452 452 raise
453 453
454 454 self.version = v
455 455 self._inline = v & REVLOGNGINLINEDATA
456 456 flags = v & ~0xFFFF
457 457 fmt = v & 0xFFFF
458 458 if fmt == REVLOGV0 and flags:
459 459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 460 % (self.indexfile, flags >> 16))
461 461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 463 % (self.indexfile, flags >> 16))
464 464 elif fmt > REVLOGNG:
465 465 raise RevlogError(_("index %s unknown format %d")
466 466 % (self.indexfile, fmt))
467 467
468 468 self._io = revlogio()
469 469 if self.version == REVLOGV0:
470 470 self._io = revlogoldio()
471 471 if i:
472 472 d = self._io.parseindex(f, self._inline)
473 473 self.index, self.nodemap, self._chunkcache = d
474 474
475 475 # add the magic null revision at -1
476 476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477 477
478 478 def _loadindex(self, start, end):
479 479 """load a block of indexes all at once from the lazy parser"""
480 480 if isinstance(self.index, lazyindex):
481 481 self.index.p.loadindex(start, end)
482 482
483 483 def _loadindexmap(self):
484 484 """loads both the map and the index from the lazy parser"""
485 485 if isinstance(self.index, lazyindex):
486 486 p = self.index.p
487 487 p.loadindex()
488 488 self.nodemap = p.map
489 489
490 490 def _loadmap(self):
491 491 """loads the map from the lazy parser"""
492 492 if isinstance(self.nodemap, lazymap):
493 493 self.nodemap.p.loadmap()
494 494 self.nodemap = self.nodemap.p.map
495 495
496 496 def tip(self):
497 497 return self.node(len(self.index) - 2)
498 498 def count(self):
499 499 return len(self.index) - 1
500 500
501 501 def rev(self, node):
502 502 try:
503 503 return self.nodemap[node]
504 504 except KeyError:
505 505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 506 def node(self, rev):
507 507 return self.index[rev][7]
508 508 def linkrev(self, node):
509 509 return self.index[self.rev(node)][4]
510 510 def parents(self, node):
511 511 d = self.index[self.rev(node)][5:7]
512 512 return (self.node(d[0]), self.node(d[1]))
513 513 def parentrevs(self, rev):
514 514 return self.index[rev][5:7]
515 515 def start(self, rev):
516 516 return getoffset(self.index[rev][0])
517 517 def end(self, rev):
518 518 return self.start(rev) + self.length(rev)
519 519 def length(self, rev):
520 520 return self.index[rev][1]
521 521 def base(self, rev):
522 522 return self.index[rev][3]
523 523
524 524 def size(self, rev):
525 525 """return the length of the uncompressed text for a given revision"""
526 526 l = self.index[rev][2]
527 527 if l >= 0:
528 528 return l
529 529
530 530 t = self.revision(self.node(rev))
531 531 return len(t)
532 532
533 533 # alternate implementation, The advantage to this code is it
534 534 # will be faster for a single revision. But, the results are not
535 535 # cached, so finding the size of every revision will be slower.
536 536 """
537 537 if self.cache and self.cache[1] == rev:
538 538 return len(self.cache[2])
539 539
540 540 base = self.base(rev)
541 541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 542 base = self.cache[1]
543 543 text = self.cache[2]
544 544 else:
545 545 text = self.revision(self.node(base))
546 546
547 547 l = len(text)
548 548 for x in xrange(base + 1, rev + 1):
549 549 l = mdiff.patchedsize(l, self.chunk(x))
550 550 return l
551 551 """
552 552
553 553 def reachable(self, node, stop=None):
554 554 """return a hash of all nodes ancestral to a given node, including
555 555 the node itself, stopping when stop is matched"""
556 556 reachable = {}
557 557 visit = [node]
558 558 reachable[node] = 1
559 559 if stop:
560 560 stopn = self.rev(stop)
561 561 else:
562 562 stopn = 0
563 563 while visit:
564 564 n = visit.pop(0)
565 565 if n == stop:
566 566 continue
567 567 if n == nullid:
568 568 continue
569 569 for p in self.parents(n):
570 570 if self.rev(p) < stopn:
571 571 continue
572 572 if p not in reachable:
573 573 reachable[p] = 1
574 574 visit.append(p)
575 575 return reachable
576 576
577 577 def nodesbetween(self, roots=None, heads=None):
578 578 """Return a tuple containing three elements. Elements 1 and 2 contain
579 579 a final list bases and heads after all the unreachable ones have been
580 580 pruned. Element 0 contains a topologically sorted list of all
581 581
582 582 nodes that satisfy these constraints:
583 583 1. All nodes must be descended from a node in roots (the nodes on
584 584 roots are considered descended from themselves).
585 585 2. All nodes must also be ancestors of a node in heads (the nodes in
586 586 heads are considered to be their own ancestors).
587 587
588 588 If roots is unspecified, nullid is assumed as the only root.
589 589 If heads is unspecified, it is taken to be the output of the
590 590 heads method (i.e. a list of all nodes in the repository that
591 591 have no children)."""
592 592 nonodes = ([], [], [])
593 593 if roots is not None:
594 594 roots = list(roots)
595 595 if not roots:
596 596 return nonodes
597 597 lowestrev = min([self.rev(n) for n in roots])
598 598 else:
599 599 roots = [nullid] # Everybody's a descendent of nullid
600 600 lowestrev = nullrev
601 601 if (lowestrev == nullrev) and (heads is None):
602 602 # We want _all_ the nodes!
603 603 return ([self.node(r) for r in xrange(0, self.count())],
604 604 [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = self.count() - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = {}
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, 0)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = heads.keys()
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendent of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors[n] = 1 # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.extend([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 671 # values don't matter.
672 672 descendents = dict.fromkeys(roots, 1)
673 673 # Also, keep the original roots so we can filter out roots that aren't
674 674 # 'real' roots (i.e. are descended from other roots).
675 675 roots = descendents.copy()
676 676 # Our topologically sorted list of output nodes.
677 677 orderedout = []
678 678 # Don't start at nullid since we don't want nullid in our output list,
679 679 # and if nullid shows up in descedents, empty parents will look like
680 680 # they're descendents.
681 681 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 682 n = self.node(r)
683 683 isdescendent = False
684 684 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 685 isdescendent = True
686 686 elif n in descendents:
687 687 # n is already a descendent
688 688 isdescendent = True
689 689 # This check only needs to be done here because all the roots
690 690 # will start being marked is descendents before the loop.
691 691 if n in roots:
692 692 # If n was a root, check if it's a 'real' root.
693 693 p = tuple(self.parents(n))
694 694 # If any of its parents are descendents, it's not a root.
695 695 if (p[0] in descendents) or (p[1] in descendents):
696 696 roots.pop(n)
697 697 else:
698 698 p = tuple(self.parents(n))
699 699 # A node is a descendent if either of its parents are
700 700 # descendents. (We seeded the dependents list with the roots
701 701 # up there, remember?)
702 702 if (p[0] in descendents) or (p[1] in descendents):
703 703 descendents[n] = 1
704 704 isdescendent = True
705 705 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 706 # Only include nodes that are both descendents and ancestors.
707 707 orderedout.append(n)
708 708 if (ancestors is not None) and (n in heads):
709 709 # We're trying to figure out which heads are reachable
710 710 # from roots.
711 711 # Mark this head as having been reached
712 712 heads[n] = 1
713 713 elif ancestors is None:
714 714 # Otherwise, we're trying to discover the heads.
715 715 # Assume this is a head because if it isn't, the next step
716 716 # will eventually remove it.
717 717 heads[n] = 1
718 718 # But, obviously its parents aren't.
719 719 for p in self.parents(n):
720 720 heads.pop(p, None)
721 721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 722 roots = roots.keys()
723 723 assert orderedout
724 724 assert roots
725 725 assert heads
726 726 return (orderedout, roots, heads)
727 727
728 728 def heads(self, start=None, stop=None):
729 729 """return the list of all nodes that have no children
730 730
731 731 if start is specified, only heads that are descendants of
732 732 start will be returned
733 733 if stop is specified, it will consider all the revs from stop
734 734 as if they had no children
735 735 """
736 if start is None and stop is None:
737 count = self.count()
738 if not count:
739 return [nullid]
740 ishead = [1] * (count + 1)
741 index = self.index
742 for r in xrange(count):
743 e = index[r]
744 ishead[e[5]] = ishead[e[6]] = 0
745 return [self.node(r) for r in xrange(count) if ishead[r]]
746
736 747 if start is None:
737 748 start = nullid
738 749 if stop is None:
739 750 stop = []
740 751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
741 752 startrev = self.rev(start)
742 753 reachable = {startrev: 1}
743 754 heads = {startrev: 1}
744 755
745 756 parentrevs = self.parentrevs
746 757 for r in xrange(startrev + 1, self.count()):
747 758 for p in parentrevs(r):
748 759 if p in reachable:
749 760 if r not in stoprevs:
750 761 reachable[r] = 1
751 762 heads[r] = 1
752 763 if p in heads and p not in stoprevs:
753 764 del heads[p]
754 765
755 766 return [self.node(r) for r in heads]
756 767
757 768 def children(self, node):
758 769 """find the children of a given node"""
759 770 c = []
760 771 p = self.rev(node)
761 772 for r in range(p + 1, self.count()):
762 773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
763 774 if prevs:
764 775 for pr in prevs:
765 776 if pr == p:
766 777 c.append(self.node(r))
767 778 elif p == nullrev:
768 779 c.append(self.node(r))
769 780 return c
770 781
771 782 def _match(self, id):
772 783 if isinstance(id, (long, int)):
773 784 # rev
774 785 return self.node(id)
775 786 if len(id) == 20:
776 787 # possibly a binary node
777 788 # odds of a binary node being all hex in ASCII are 1 in 10**25
778 789 try:
779 790 node = id
780 791 r = self.rev(node) # quick search the index
781 792 return node
782 793 except LookupError:
783 794 pass # may be partial hex id
784 795 try:
785 796 # str(rev)
786 797 rev = int(id)
787 798 if str(rev) != id:
788 799 raise ValueError
789 800 if rev < 0:
790 801 rev = self.count() + rev
791 802 if rev < 0 or rev >= self.count():
792 803 raise ValueError
793 804 return self.node(rev)
794 805 except (ValueError, OverflowError):
795 806 pass
796 807 if len(id) == 40:
797 808 try:
798 809 # a full hex nodeid?
799 810 node = bin(id)
800 811 r = self.rev(node)
801 812 return node
802 813 except TypeError:
803 814 pass
804 815
805 816 def _partialmatch(self, id):
806 817 if len(id) < 40:
807 818 try:
808 819 # hex(node)[:...]
809 820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
810 821 node = None
811 822 for n in self.nodemap:
812 823 if n.startswith(bin_id) and hex(n).startswith(id):
813 824 if node is not None:
814 825 raise LookupError(_("Ambiguous identifier"))
815 826 node = n
816 827 if node is not None:
817 828 return node
818 829 except TypeError:
819 830 pass
820 831
821 832 def lookup(self, id):
822 833 """locate a node based on:
823 834 - revision number or str(revision number)
824 835 - nodeid or subset of hex nodeid
825 836 """
826 837 n = self._match(id)
827 838 if n is not None:
828 839 return n
829 840 n = self._partialmatch(id)
830 841 if n:
831 842 return n
832 843
833 844 raise LookupError(_("No match found"))
834 845
835 846 def cmp(self, node, text):
836 847 """compare text with a given file revision"""
837 848 p1, p2 = self.parents(node)
838 849 return hash(text, p1, p2) != node
839 850
840 851 def chunk(self, rev, df=None):
841 852 start, length = self.start(rev), self.length(rev)
842 853 if self._inline:
843 854 start += (rev + 1) * self._io.size
844 855 end = start + length
845 856 def loadcache(df):
846 857 cache_length = max(65536, length)
847 858 if not df:
848 859 if self._inline:
849 860 df = self.opener(self.indexfile)
850 861 else:
851 862 df = self.opener(self.datafile)
852 863 df.seek(start)
853 864 self._chunkcache = (start, df.read(cache_length))
854 865
855 866 if not self._chunkcache:
856 867 loadcache(df)
857 868
858 869 cache_start = self._chunkcache[0]
859 870 cache_end = cache_start + len(self._chunkcache[1])
860 871 if start >= cache_start and end <= cache_end:
861 872 # it is cached
862 873 offset = start - cache_start
863 874 else:
864 875 loadcache(df)
865 876 offset = 0
866 877
867 878 # avoid copying large chunks
868 879 c = self._chunkcache[1]
869 880 if len(c) > length:
870 881 c = c[offset:offset + length]
871 882
872 883 return decompress(c)
873 884
874 885 def delta(self, node):
875 886 """return or calculate a delta between a node and its predecessor"""
876 887 r = self.rev(node)
877 888 return self.revdiff(r - 1, r)
878 889
879 890 def revdiff(self, rev1, rev2):
880 891 """return or calculate a delta between two revisions"""
881 892 b1 = self.base(rev1)
882 893 b2 = self.base(rev2)
883 894 if b1 == b2 and rev1 + 1 == rev2:
884 895 return self.chunk(rev2)
885 896 else:
886 897 return mdiff.textdiff(self.revision(self.node(rev1)),
887 898 self.revision(self.node(rev2)))
888 899
889 900 def revision(self, node):
890 901 """return an uncompressed revision of a given"""
891 902 if node == nullid:
892 903 return ""
893 904 if self._cache and self._cache[0] == node:
894 905 return self._cache[2]
895 906
896 907 # look up what we need to read
897 908 text = None
898 909 rev = self.rev(node)
899 910 base = self.base(rev)
900 911
901 912 if self._inline:
902 913 # we probably have the whole chunk cached
903 914 df = None
904 915 else:
905 916 df = self.opener(self.datafile)
906 917
907 918 # do we have useful data cached?
908 919 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
909 920 base = self._cache[1]
910 921 text = self._cache[2]
911 922 self._loadindex(base, rev + 1)
912 923 else:
913 924 self._loadindex(base, rev + 1)
914 925 text = self.chunk(base, df=df)
915 926
916 927 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
917 928 text = mdiff.patches(text, bins)
918 929 p1, p2 = self.parents(node)
919 930 if node != hash(text, p1, p2):
920 931 raise RevlogError(_("integrity check failed on %s:%d")
921 932 % (self.datafile, rev))
922 933
923 934 self._cache = (node, rev, text)
924 935 return text
925 936
926 937 def checkinlinesize(self, tr, fp=None):
927 938 if not self._inline:
928 939 return
929 940 if not fp:
930 941 fp = self.opener(self.indexfile, 'r')
931 942 fp.seek(0, 2)
932 943 size = fp.tell()
933 944 if size < 131072:
934 945 return
935 946 trinfo = tr.find(self.indexfile)
936 947 if trinfo == None:
937 948 raise RevlogError(_("%s not found in the transaction")
938 949 % self.indexfile)
939 950
940 951 trindex = trinfo[2]
941 952 dataoff = self.start(trindex)
942 953
943 954 tr.add(self.datafile, dataoff)
944 955 df = self.opener(self.datafile, 'w')
945 956 calc = self._io.size
946 957 for r in xrange(self.count()):
947 958 start = self.start(r) + (r + 1) * calc
948 959 length = self.length(r)
949 960 fp.seek(start)
950 961 d = fp.read(length)
951 962 df.write(d)
952 963 fp.close()
953 964 df.close()
954 965 fp = self.opener(self.indexfile, 'w', atomictemp=True)
955 966 self.version &= ~(REVLOGNGINLINEDATA)
956 967 self._inline = False
957 968 for i in xrange(self.count()):
958 969 e = self._io.packentry(self.index[i], self.node, self.version)
959 970 fp.write(e)
960 971
961 972 # if we don't call rename, the temp file will never replace the
962 973 # real index
963 974 fp.rename()
964 975
965 976 tr.replace(self.indexfile, trindex * calc)
966 977 self._chunkcache = None
967 978
968 979 def addrevision(self, text, transaction, link, p1, p2, d=None):
969 980 """add a revision to the log
970 981
971 982 text - the revision data to add
972 983 transaction - the transaction object used for rollback
973 984 link - the linkrev data to add
974 985 p1, p2 - the parent nodeids of the revision
975 986 d - an optional precomputed delta
976 987 """
977 988 dfh = None
978 989 if not self._inline:
979 990 dfh = self.opener(self.datafile, "a")
980 991 ifh = self.opener(self.indexfile, "a+")
981 992 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
982 993
983 994 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
984 995 node = hash(text, p1, p2)
985 996 if node in self.nodemap:
986 997 return node
987 998
988 999 curr = self.count()
989 1000 prev = curr - 1
990 1001 base = self.base(prev)
991 1002 offset = self.end(prev)
992 1003
993 1004 if curr:
994 1005 if not d:
995 1006 ptext = self.revision(self.node(prev))
996 1007 d = mdiff.textdiff(ptext, text)
997 1008 data = compress(d)
998 1009 l = len(data[1]) + len(data[0])
999 1010 dist = l + offset - self.start(base)
1000 1011
1001 1012 # full versions are inserted when the needed deltas
1002 1013 # become comparable to the uncompressed text
1003 1014 if not curr or dist > len(text) * 2:
1004 1015 data = compress(text)
1005 1016 l = len(data[1]) + len(data[0])
1006 1017 base = curr
1007 1018
1008 1019 e = (offset_type(offset, 0), l, len(text),
1009 1020 base, link, self.rev(p1), self.rev(p2), node)
1010 1021 self.index.insert(-1, e)
1011 1022 self.nodemap[node] = curr
1012 1023
1013 1024 entry = self._io.packentry(e, self.node, self.version)
1014 1025 if not self._inline:
1015 1026 transaction.add(self.datafile, offset)
1016 1027 transaction.add(self.indexfile, curr * len(entry))
1017 1028 if data[0]:
1018 1029 dfh.write(data[0])
1019 1030 dfh.write(data[1])
1020 1031 dfh.flush()
1021 1032 ifh.write(entry)
1022 1033 else:
1023 1034 ifh.seek(0, 2)
1024 1035 transaction.add(self.indexfile, ifh.tell(), prev)
1025 1036 ifh.write(entry)
1026 1037 ifh.write(data[0])
1027 1038 ifh.write(data[1])
1028 1039 self.checkinlinesize(transaction, ifh)
1029 1040
1030 1041 self._cache = (node, curr, text)
1031 1042 return node
1032 1043
1033 1044 def ancestor(self, a, b):
1034 1045 """calculate the least common ancestor of nodes a and b"""
1035 1046
1036 1047 def parents(rev):
1037 1048 return [p for p in self.parentrevs(rev) if p != nullrev]
1038 1049
1039 1050 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1040 1051 if c is None:
1041 1052 return nullid
1042 1053
1043 1054 return self.node(c)
1044 1055
1045 1056 def group(self, nodelist, lookup, infocollect=None):
1046 1057 """calculate a delta group
1047 1058
1048 1059 Given a list of changeset revs, return a set of deltas and
1049 1060 metadata corresponding to nodes. the first delta is
1050 1061 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1051 1062 have this parent as it has all history before these
1052 1063 changesets. parent is parent[0]
1053 1064 """
1054 1065 revs = [self.rev(n) for n in nodelist]
1055 1066
1056 1067 # if we don't have any revisions touched by these changesets, bail
1057 1068 if not revs:
1058 1069 yield changegroup.closechunk()
1059 1070 return
1060 1071
1061 1072 # add the parent of the first rev
1062 1073 p = self.parents(self.node(revs[0]))[0]
1063 1074 revs.insert(0, self.rev(p))
1064 1075
1065 1076 # build deltas
1066 1077 for d in xrange(0, len(revs) - 1):
1067 1078 a, b = revs[d], revs[d + 1]
1068 1079 nb = self.node(b)
1069 1080
1070 1081 if infocollect is not None:
1071 1082 infocollect(nb)
1072 1083
1073 1084 d = self.revdiff(a, b)
1074 1085 p = self.parents(nb)
1075 1086 meta = nb + p[0] + p[1] + lookup(nb)
1076 1087 yield changegroup.genchunk("%s%s" % (meta, d))
1077 1088
1078 1089 yield changegroup.closechunk()
1079 1090
1080 1091 def addgroup(self, revs, linkmapper, transaction, unique=0):
1081 1092 """
1082 1093 add a delta group
1083 1094
1084 1095 given a set of deltas, add them to the revision log. the
1085 1096 first delta is against its parent, which should be in our
1086 1097 log, the rest are against the previous delta.
1087 1098 """
1088 1099
1089 1100 #track the base of the current delta log
1090 1101 r = self.count()
1091 1102 t = r - 1
1092 1103 node = None
1093 1104
1094 1105 base = prev = nullrev
1095 1106 start = end = textlen = 0
1096 1107 if r:
1097 1108 end = self.end(t)
1098 1109
1099 1110 ifh = self.opener(self.indexfile, "a+")
1100 1111 ifh.seek(0, 2)
1101 1112 transaction.add(self.indexfile, ifh.tell(), self.count())
1102 1113 if self._inline:
1103 1114 dfh = None
1104 1115 else:
1105 1116 transaction.add(self.datafile, end)
1106 1117 dfh = self.opener(self.datafile, "a")
1107 1118
1108 1119 # loop through our set of deltas
1109 1120 chain = None
1110 1121 for chunk in revs:
1111 1122 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1112 1123 link = linkmapper(cs)
1113 1124 if node in self.nodemap:
1114 1125 # this can happen if two branches make the same change
1115 1126 # if unique:
1116 1127 # raise RevlogError(_("already have %s") % hex(node[:4]))
1117 1128 chain = node
1118 1129 continue
1119 1130 delta = chunk[80:]
1120 1131
1121 1132 for p in (p1, p2):
1122 1133 if not p in self.nodemap:
1123 1134 raise LookupError(_("unknown parent %s") % short(p))
1124 1135
1125 1136 if not chain:
1126 1137 # retrieve the parent revision of the delta chain
1127 1138 chain = p1
1128 1139 if not chain in self.nodemap:
1129 1140 raise LookupError(_("unknown base %s") % short(chain[:4]))
1130 1141
1131 1142 # full versions are inserted when the needed deltas become
1132 1143 # comparable to the uncompressed text or when the previous
1133 1144 # version is not the one we have a delta against. We use
1134 1145 # the size of the previous full rev as a proxy for the
1135 1146 # current size.
1136 1147
1137 1148 if chain == prev:
1138 1149 tempd = compress(delta)
1139 1150 cdelta = tempd[0] + tempd[1]
1140 1151 textlen = mdiff.patchedsize(textlen, delta)
1141 1152
1142 1153 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1143 1154 # flush our writes here so we can read it in revision
1144 1155 if dfh:
1145 1156 dfh.flush()
1146 1157 ifh.flush()
1147 1158 text = self.revision(chain)
1148 1159 text = mdiff.patches(text, [delta])
1149 1160 chk = self._addrevision(text, transaction, link, p1, p2, None,
1150 1161 ifh, dfh)
1151 1162 if not dfh and not self._inline:
1152 1163 # addrevision switched from inline to conventional
1153 1164 # reopen the index
1154 1165 dfh = self.opener(self.datafile, "a")
1155 1166 ifh = self.opener(self.indexfile, "a")
1156 1167 if chk != node:
1157 1168 raise RevlogError(_("consistency error adding group"))
1158 1169 textlen = len(text)
1159 1170 else:
1160 1171 e = (offset_type(end, 0), len(cdelta), textlen, base,
1161 1172 link, self.rev(p1), self.rev(p2), node)
1162 1173 self.index.insert(-1, e)
1163 1174 self.nodemap[node] = r
1164 1175 entry = self._io.packentry(e, self.node, self.version)
1165 1176 if self._inline:
1166 1177 ifh.write(entry)
1167 1178 ifh.write(cdelta)
1168 1179 self.checkinlinesize(transaction, ifh)
1169 1180 if not self._inline:
1170 1181 dfh = self.opener(self.datafile, "a")
1171 1182 ifh = self.opener(self.indexfile, "a")
1172 1183 else:
1173 1184 dfh.write(cdelta)
1174 1185 ifh.write(entry)
1175 1186
1176 1187 t, r, chain, prev = r, r + 1, node, node
1177 1188 base = self.base(t)
1178 1189 start = self.start(base)
1179 1190 end = self.end(t)
1180 1191
1181 1192 return node
1182 1193
1183 1194 def strip(self, rev, minlink):
1184 1195 if self.count() == 0 or rev >= self.count():
1185 1196 return
1186 1197
1187 1198 if isinstance(self.index, lazyindex):
1188 1199 self._loadindexmap()
1189 1200
1190 1201 # When stripping away a revision, we need to make sure it
1191 1202 # does not actually belong to an older changeset.
1192 1203 # The minlink parameter defines the oldest revision
1193 1204 # we're allowed to strip away.
1194 1205 while minlink > self.index[rev][4]:
1195 1206 rev += 1
1196 1207 if rev >= self.count():
1197 1208 return
1198 1209
1199 1210 # first truncate the files on disk
1200 1211 end = self.start(rev)
1201 1212 if not self._inline:
1202 1213 df = self.opener(self.datafile, "a")
1203 1214 df.truncate(end)
1204 1215 end = rev * self._io.size
1205 1216 else:
1206 1217 end += rev * self._io.size
1207 1218
1208 1219 indexf = self.opener(self.indexfile, "a")
1209 1220 indexf.truncate(end)
1210 1221
1211 1222 # then reset internal state in memory to forget those revisions
1212 1223 self._cache = None
1213 1224 self._chunkcache = None
1214 1225 for x in xrange(rev, self.count()):
1215 1226 del self.nodemap[self.node(x)]
1216 1227
1217 1228 del self.index[rev:-1]
1218 1229
1219 1230 def checksize(self):
1220 1231 expected = 0
1221 1232 if self.count():
1222 1233 expected = self.end(self.count() - 1)
1223 1234
1224 1235 try:
1225 1236 f = self.opener(self.datafile)
1226 1237 f.seek(0, 2)
1227 1238 actual = f.tell()
1228 1239 dd = actual - expected
1229 1240 except IOError, inst:
1230 1241 if inst.errno != errno.ENOENT:
1231 1242 raise
1232 1243 dd = 0
1233 1244
1234 1245 try:
1235 1246 f = self.opener(self.indexfile)
1236 1247 f.seek(0, 2)
1237 1248 actual = f.tell()
1238 1249 s = self._io.size
1239 1250 i = actual / s
1240 1251 di = actual - (i * s)
1241 1252 if self._inline:
1242 1253 databytes = 0
1243 1254 for r in xrange(self.count()):
1244 1255 databytes += self.length(r)
1245 1256 dd = 0
1246 1257 di = actual - self.count() * s - databytes
1247 1258 except IOError, inst:
1248 1259 if inst.errno != errno.ENOENT:
1249 1260 raise
1250 1261 di = 0
1251 1262
1252 1263 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now