##// END OF EJS Templates
Change revlog.heads to walk the revision graph using revision numbers...
Alexis S. L. Carvalho -
r2490:6ff82ec1 default
parent child Browse files
Show More
@@ -1,1279 +1,1279 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 data = self.dataf.read(blocksize)
143 143 lend = len(data) / self.s
144 144 i = blockstart / self.s
145 145 off = 0
146 146 for x in xrange(lend):
147 147 if self.index[i + x] == None:
148 148 b = data[off : off + self.s]
149 149 self.index[i + x] = b
150 150 n = b[self.shaoffset:self.shaoffset + 20]
151 151 self.map[n] = i + x
152 152 off += self.s
153 153
154 154 def findnode(self, node):
155 155 """search backwards through the index file for a specific node"""
156 156 if self.allmap: return None
157 157
158 158 # hg log will cause many many searches for the manifest
159 159 # nodes. After we get called a few times, just load the whole
160 160 # thing.
161 161 if self.mapfind_count > 8:
162 162 self.loadmap()
163 163 if node in self.map:
164 164 return node
165 165 return None
166 166 self.mapfind_count += 1
167 167 last = self.l - 1
168 168 while self.index[last] != None:
169 169 if last == 0:
170 170 self.all = 1
171 171 self.allmap = 1
172 172 return None
173 173 last -= 1
174 174 end = (last + 1) * self.s
175 175 blocksize = self.s * 256
176 176 while end >= 0:
177 177 start = max(end - blocksize, 0)
178 178 self.dataf.seek(start)
179 179 data = self.dataf.read(end - start)
180 180 findend = end - start
181 181 while True:
182 182 # we're searching backwards, so weh have to make sure
183 183 # we don't find a changeset where this node is a parent
184 184 off = data.rfind(node, 0, findend)
185 185 findend = off
186 186 if off >= 0:
187 187 i = off / self.s
188 188 off = i * self.s
189 189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 190 if n == node:
191 191 self.map[n] = i + start / self.s
192 192 return node
193 193 else:
194 194 break
195 195 end -= blocksize
196 196 return None
197 197
198 198 def loadindex(self, i=None, end=None):
199 199 if self.all: return
200 200 all = False
201 201 if i == None:
202 202 blockstart = 0
203 203 blocksize = (512 / self.s) * self.s
204 204 end = self.datasize
205 205 all = True
206 206 else:
207 207 if end:
208 208 blockstart = i * self.s
209 209 end = end * self.s
210 210 blocksize = end - blockstart
211 211 else:
212 212 blockstart = (i & ~(32)) * self.s
213 213 blocksize = self.s * 64
214 214 end = blockstart + blocksize
215 215 while blockstart < end:
216 216 self.loadblock(blockstart, blocksize)
217 217 blockstart += blocksize
218 218 if all: self.all = True
219 219
220 220 class lazyindex(object):
221 221 """a lazy version of the index array"""
222 222 def __init__(self, parser):
223 223 self.p = parser
224 224 def __len__(self):
225 225 return len(self.p.index)
226 226 def load(self, pos):
227 227 if pos < 0:
228 228 pos += len(self.p.index)
229 229 self.p.loadindex(pos)
230 230 return self.p.index[pos]
231 231 def __getitem__(self, pos):
232 232 ret = self.p.index[pos] or self.load(pos)
233 233 if isinstance(ret, str):
234 234 ret = struct.unpack(self.p.indexformat, ret)
235 235 return ret
236 236 def __setitem__(self, pos, item):
237 237 self.p.index[pos] = item
238 238 def __delitem__(self, pos):
239 239 del self.p.index[pos]
240 240 def append(self, e):
241 241 self.p.index.append(e)
242 242
243 243 class lazymap(object):
244 244 """a lazy version of the node map"""
245 245 def __init__(self, parser):
246 246 self.p = parser
247 247 def load(self, key):
248 248 n = self.p.findnode(key)
249 249 if n == None:
250 250 raise KeyError(key)
251 251 def __contains__(self, key):
252 252 if key in self.p.map:
253 253 return True
254 254 self.p.loadmap()
255 255 return key in self.p.map
256 256 def __iter__(self):
257 257 yield nullid
258 258 for i in xrange(self.p.l):
259 259 ret = self.p.index[i]
260 260 if not ret:
261 261 self.p.loadindex(i)
262 262 ret = self.p.index[i]
263 263 if isinstance(ret, str):
264 264 ret = struct.unpack(self.p.indexformat, ret)
265 265 yield ret[-1]
266 266 def __getitem__(self, key):
267 267 try:
268 268 return self.p.map[key]
269 269 except KeyError:
270 270 try:
271 271 self.load(key)
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 raise KeyError("node " + hex(key))
275 275 def __setitem__(self, key, val):
276 276 self.p.map[key] = val
277 277 def __delitem__(self, key):
278 278 del self.p.map[key]
279 279
280 280 class RevlogError(Exception): pass
281 281
282 282 class revlog(object):
283 283 """
284 284 the underlying revision storage object
285 285
286 286 A revlog consists of two parts, an index and the revision data.
287 287
288 288 The index is a file with a fixed record size containing
289 289 information on each revision, includings its nodeid (hash), the
290 290 nodeids of its parents, the position and offset of its data within
291 291 the data file, and the revision it's based on. Finally, each entry
292 292 contains a linkrev entry that can serve as a pointer to external
293 293 data.
294 294
295 295 The revision data itself is a linear collection of data chunks.
296 296 Each chunk represents a revision and is usually represented as a
297 297 delta against the previous chunk. To bound lookup time, runs of
298 298 deltas are limited to about 2 times the length of the original
299 299 version data. This makes retrieval of a version proportional to
300 300 its size, or O(1) relative to the number of revisions.
301 301
302 302 Both pieces of the revlog are written to in an append-only
303 303 fashion, which means we never need to rewrite a file to insert or
304 304 remove data, and can use some simple techniques to avoid the need
305 305 for locking while reading.
306 306 """
307 307 def __init__(self, opener, indexfile, datafile,
308 308 defversion=REVLOG_DEFAULT_VERSION):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314 """
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile
317 317 self.opener = opener
318 318
319 319 self.indexstat = None
320 320 self.cache = None
321 321 self.chunkcache = None
322 322 self.defversion = defversion
323 323 self.load()
324 324
325 325 def load(self):
326 326 v = self.defversion
327 327 try:
328 328 f = self.opener(self.indexfile)
329 329 i = f.read(4)
330 330 f.seek(0)
331 331 except IOError, inst:
332 332 if inst.errno != errno.ENOENT:
333 333 raise
334 334 i = ""
335 335 else:
336 336 try:
337 337 st = util.fstat(f)
338 338 except AttributeError, inst:
339 339 st = None
340 340 else:
341 341 oldst = self.indexstat
342 342 if (oldst and st.st_dev == oldst.st_dev
343 343 and st.st_ino == oldst.st_ino
344 344 and st.st_mtime == oldst.st_mtime
345 345 and st.st_ctime == oldst.st_ctime):
346 346 return
347 347 self.indexstat = st
348 348 if len(i) > 0:
349 349 v = struct.unpack(versionformat, i)[0]
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 355 (self.indexfile, flags)))
356 356 elif fmt == REVLOGNG:
357 357 if flags & ~REVLOGNGINLINEDATA:
358 358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 359 (self.indexfile, flags)))
360 360 else:
361 361 raise RevlogError(_("index %s invalid format %d" %
362 362 (self.indexfile, fmt)))
363 363 self.version = v
364 364 if v == REVLOGV0:
365 365 self.indexformat = indexformatv0
366 366 shaoffset = v0shaoffset
367 367 else:
368 368 self.indexformat = indexformatng
369 369 shaoffset = ngshaoffset
370 370
371 371 if i:
372 372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 373 st and st.st_size > 10000):
374 374 # big index, let's parse it on demand
375 375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 376 self.index = lazyindex(parser)
377 377 self.nodemap = lazymap(parser)
378 378 else:
379 379 self.parseindex(f, st)
380 380 if self.version != REVLOGV0:
381 381 e = list(self.index[0])
382 382 type = self.ngtype(e[0])
383 383 e[0] = self.offset_type(0, type)
384 384 self.index[0] = e
385 385 else:
386 386 self.nodemap = { nullid: -1}
387 387 self.index = []
388 388
389 389
390 390 def parseindex(self, fp, st):
391 391 s = struct.calcsize(self.indexformat)
392 392 self.index = []
393 393 self.nodemap = {nullid: -1}
394 394 inline = self.inlinedata()
395 395 n = 0
396 396 leftover = None
397 397 while True:
398 398 if st:
399 399 data = fp.read(65536)
400 400 else:
401 401 # hack for httprangereader, it doesn't do partial reads well
402 402 data = fp.read()
403 403 if not data:
404 404 break
405 405 if n == 0 and self.inlinedata():
406 406 # cache the first chunk
407 407 self.chunkcache = (0, data)
408 408 if leftover:
409 409 data = leftover + data
410 410 leftover = None
411 411 off = 0
412 412 l = len(data)
413 413 while off < l:
414 414 if l - off < s:
415 415 leftover = data[off:]
416 416 break
417 417 cur = data[off:off + s]
418 418 off += s
419 419 e = struct.unpack(self.indexformat, cur)
420 420 self.index.append(e)
421 421 self.nodemap[e[-1]] = n
422 422 n += 1
423 423 if inline:
424 424 off += e[1]
425 425 if off > l:
426 426 # some things don't seek well, just read it
427 427 fp.read(off - l)
428 428 if not st:
429 429 break
430 430
431 431
432 432 def ngoffset(self, q):
433 433 if q & 0xFFFF:
434 434 raise RevlogError(_('%s: incompatible revision flag %x') %
435 435 (self.indexfile, q))
436 436 return long(q >> 16)
437 437
438 438 def ngtype(self, q):
439 439 return int(q & 0xFFFF)
440 440
441 441 def offset_type(self, offset, type):
442 442 return long(long(offset) << 16 | type)
443 443
444 444 def loadindex(self, start, end):
445 445 """load a block of indexes all at once from the lazy parser"""
446 446 if isinstance(self.index, lazyindex):
447 447 self.index.p.loadindex(start, end)
448 448
449 449 def loadindexmap(self):
450 450 """loads both the map and the index from the lazy parser"""
451 451 if isinstance(self.index, lazyindex):
452 452 p = self.index.p
453 453 p.loadindex()
454 454 self.nodemap = p.map
455 455
456 456 def loadmap(self):
457 457 """loads the map from the lazy parser"""
458 458 if isinstance(self.nodemap, lazymap):
459 459 self.nodemap.p.loadmap()
460 460 self.nodemap = self.nodemap.p.map
461 461
462 462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 463 def tip(self): return self.node(len(self.index) - 1)
464 464 def count(self): return len(self.index)
465 465 def node(self, rev):
466 466 return (rev < 0) and nullid or self.index[rev][-1]
467 467 def rev(self, node):
468 468 try:
469 469 return self.nodemap[node]
470 470 except KeyError:
471 471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 472 def linkrev(self, node): return self.index[self.rev(node)][-4]
473 473 def parents(self, node):
474 474 if node == nullid: return (nullid, nullid)
475 475 r = self.rev(node)
476 476 d = self.index[r][-3:-1]
477 477 if self.version == REVLOGV0:
478 478 return d
479 479 return [ self.node(x) for x in d ]
480 480 def parentrevs(self, rev):
481 481 if rev == -1:
482 482 return (-1, -1)
483 483 d = self.index[rev][-3:-1]
484 484 if self.version == REVLOGV0:
485 485 return [ self.rev(x) for x in d ]
486 486 return d
487 487 def start(self, rev):
488 488 if rev < 0:
489 489 return -1
490 490 if self.version != REVLOGV0:
491 491 return self.ngoffset(self.index[rev][0])
492 492 return self.index[rev][0]
493 493
494 494 def end(self, rev): return self.start(rev) + self.length(rev)
495 495
496 496 def size(self, rev):
497 497 """return the length of the uncompressed text for a given revision"""
498 498 l = -1
499 499 if self.version != REVLOGV0:
500 500 l = self.index[rev][2]
501 501 if l >= 0:
502 502 return l
503 503
504 504 t = self.revision(self.node(rev))
505 505 return len(t)
506 506
507 507 # alternate implementation, The advantage to this code is it
508 508 # will be faster for a single revision. But, the results are not
509 509 # cached, so finding the size of every revision will be slower.
510 510 """
511 511 if self.cache and self.cache[1] == rev:
512 512 return len(self.cache[2])
513 513
514 514 base = self.base(rev)
515 515 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
516 516 base = self.cache[1]
517 517 text = self.cache[2]
518 518 else:
519 519 text = self.revision(self.node(base))
520 520
521 521 l = len(text)
522 522 for x in xrange(base + 1, rev + 1):
523 523 l = mdiff.patchedsize(l, self.chunk(x))
524 524 return l
525 525 """
526 526
527 527 def length(self, rev):
528 528 if rev < 0:
529 529 return 0
530 530 else:
531 531 return self.index[rev][1]
532 532 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
533 533
534 534 def reachable(self, rev, stop=None):
535 535 reachable = {}
536 536 visit = [rev]
537 537 reachable[rev] = 1
538 538 if stop:
539 539 stopn = self.rev(stop)
540 540 else:
541 541 stopn = 0
542 542 while visit:
543 543 n = visit.pop(0)
544 544 if n == stop:
545 545 continue
546 546 if n == nullid:
547 547 continue
548 548 for p in self.parents(n):
549 549 if self.rev(p) < stopn:
550 550 continue
551 551 if p not in reachable:
552 552 reachable[p] = 1
553 553 visit.append(p)
554 554 return reachable
555 555
556 556 def nodesbetween(self, roots=None, heads=None):
557 557 """Return a tuple containing three elements. Elements 1 and 2 contain
558 558 a final list bases and heads after all the unreachable ones have been
559 559 pruned. Element 0 contains a topologically sorted list of all
560 560
561 561 nodes that satisfy these constraints:
562 562 1. All nodes must be descended from a node in roots (the nodes on
563 563 roots are considered descended from themselves).
564 564 2. All nodes must also be ancestors of a node in heads (the nodes in
565 565 heads are considered to be their own ancestors).
566 566
567 567 If roots is unspecified, nullid is assumed as the only root.
568 568 If heads is unspecified, it is taken to be the output of the
569 569 heads method (i.e. a list of all nodes in the repository that
570 570 have no children)."""
571 571 nonodes = ([], [], [])
572 572 if roots is not None:
573 573 roots = list(roots)
574 574 if not roots:
575 575 return nonodes
576 576 lowestrev = min([self.rev(n) for n in roots])
577 577 else:
578 578 roots = [nullid] # Everybody's a descendent of nullid
579 579 lowestrev = -1
580 580 if (lowestrev == -1) and (heads is None):
581 581 # We want _all_ the nodes!
582 582 return ([self.node(r) for r in xrange(0, self.count())],
583 583 [nullid], list(self.heads()))
584 584 if heads is None:
585 585 # All nodes are ancestors, so the latest ancestor is the last
586 586 # node.
587 587 highestrev = self.count() - 1
588 588 # Set ancestors to None to signal that every node is an ancestor.
589 589 ancestors = None
590 590 # Set heads to an empty dictionary for later discovery of heads
591 591 heads = {}
592 592 else:
593 593 heads = list(heads)
594 594 if not heads:
595 595 return nonodes
596 596 ancestors = {}
597 597 # Start at the top and keep marking parents until we're done.
598 598 nodestotag = heads[:]
599 599 # Turn heads into a dictionary so we can remove 'fake' heads.
600 600 # Also, later we will be using it to filter out the heads we can't
601 601 # find from roots.
602 602 heads = dict.fromkeys(heads, 0)
603 603 # Remember where the top was so we can use it as a limit later.
604 604 highestrev = max([self.rev(n) for n in nodestotag])
605 605 while nodestotag:
606 606 # grab a node to tag
607 607 n = nodestotag.pop()
608 608 # Never tag nullid
609 609 if n == nullid:
610 610 continue
611 611 # A node's revision number represents its place in a
612 612 # topologically sorted list of nodes.
613 613 r = self.rev(n)
614 614 if r >= lowestrev:
615 615 if n not in ancestors:
616 616 # If we are possibly a descendent of one of the roots
617 617 # and we haven't already been marked as an ancestor
618 618 ancestors[n] = 1 # Mark as ancestor
619 619 # Add non-nullid parents to list of nodes to tag.
620 620 nodestotag.extend([p for p in self.parents(n) if
621 621 p != nullid])
622 622 elif n in heads: # We've seen it before, is it a fake head?
623 623 # So it is, real heads should not be the ancestors of
624 624 # any other heads.
625 625 heads.pop(n)
626 626 if not ancestors:
627 627 return nonodes
628 628 # Now that we have our set of ancestors, we want to remove any
629 629 # roots that are not ancestors.
630 630
631 631 # If one of the roots was nullid, everything is included anyway.
632 632 if lowestrev > -1:
633 633 # But, since we weren't, let's recompute the lowest rev to not
634 634 # include roots that aren't ancestors.
635 635
636 636 # Filter out roots that aren't ancestors of heads
637 637 roots = [n for n in roots if n in ancestors]
638 638 # Recompute the lowest revision
639 639 if roots:
640 640 lowestrev = min([self.rev(n) for n in roots])
641 641 else:
642 642 # No more roots? Return empty list
643 643 return nonodes
644 644 else:
645 645 # We are descending from nullid, and don't need to care about
646 646 # any other roots.
647 647 lowestrev = -1
648 648 roots = [nullid]
649 649 # Transform our roots list into a 'set' (i.e. a dictionary where the
650 650 # values don't matter.
651 651 descendents = dict.fromkeys(roots, 1)
652 652 # Also, keep the original roots so we can filter out roots that aren't
653 653 # 'real' roots (i.e. are descended from other roots).
654 654 roots = descendents.copy()
655 655 # Our topologically sorted list of output nodes.
656 656 orderedout = []
657 657 # Don't start at nullid since we don't want nullid in our output list,
658 658 # and if nullid shows up in descedents, empty parents will look like
659 659 # they're descendents.
660 660 for r in xrange(max(lowestrev, 0), highestrev + 1):
661 661 n = self.node(r)
662 662 isdescendent = False
663 663 if lowestrev == -1: # Everybody is a descendent of nullid
664 664 isdescendent = True
665 665 elif n in descendents:
666 666 # n is already a descendent
667 667 isdescendent = True
668 668 # This check only needs to be done here because all the roots
669 669 # will start being marked is descendents before the loop.
670 670 if n in roots:
671 671 # If n was a root, check if it's a 'real' root.
672 672 p = tuple(self.parents(n))
673 673 # If any of its parents are descendents, it's not a root.
674 674 if (p[0] in descendents) or (p[1] in descendents):
675 675 roots.pop(n)
676 676 else:
677 677 p = tuple(self.parents(n))
678 678 # A node is a descendent if either of its parents are
679 679 # descendents. (We seeded the dependents list with the roots
680 680 # up there, remember?)
681 681 if (p[0] in descendents) or (p[1] in descendents):
682 682 descendents[n] = 1
683 683 isdescendent = True
684 684 if isdescendent and ((ancestors is None) or (n in ancestors)):
685 685 # Only include nodes that are both descendents and ancestors.
686 686 orderedout.append(n)
687 687 if (ancestors is not None) and (n in heads):
688 688 # We're trying to figure out which heads are reachable
689 689 # from roots.
690 690 # Mark this head as having been reached
691 691 heads[n] = 1
692 692 elif ancestors is None:
693 693 # Otherwise, we're trying to discover the heads.
694 694 # Assume this is a head because if it isn't, the next step
695 695 # will eventually remove it.
696 696 heads[n] = 1
697 697 # But, obviously its parents aren't.
698 698 for p in self.parents(n):
699 699 heads.pop(p, None)
700 700 heads = [n for n in heads.iterkeys() if heads[n] != 0]
701 701 roots = roots.keys()
702 702 assert orderedout
703 703 assert roots
704 704 assert heads
705 705 return (orderedout, roots, heads)
706 706
707 707 def heads(self, start=None):
708 708 """return the list of all nodes that have no children
709 709
710 710 if start is specified, only heads that are descendants of
711 711 start will be returned
712 712
713 713 """
714 714 if start is None:
715 715 start = nullid
716 reachable = {start: 1}
717 heads = {start: 1}
718 716 startrev = self.rev(start)
717 reachable = {startrev: 1}
718 heads = {startrev: 1}
719 719
720 parentrevs = self.parentrevs
720 721 for r in xrange(startrev + 1, self.count()):
721 n = self.node(r)
722 for pn in self.parents(n):
723 if pn in reachable:
724 reachable[n] = 1
725 heads[n] = 1
726 if pn in heads:
727 del heads[pn]
728 return heads.keys()
722 for p in parentrevs(r):
723 if p in reachable:
724 reachable[r] = 1
725 heads[r] = 1
726 if p in heads:
727 del heads[p]
728 return [self.node(r) for r in heads]
729 729
730 730 def children(self, node):
731 731 """find the children of a given node"""
732 732 c = []
733 733 p = self.rev(node)
734 734 for r in range(p + 1, self.count()):
735 735 n = self.node(r)
736 736 for pn in self.parents(n):
737 737 if pn == node:
738 738 c.append(n)
739 739 continue
740 740 elif pn == nullid:
741 741 continue
742 742 return c
743 743
744 744 def lookup(self, id):
745 745 """locate a node based on revision number or subset of hex nodeid"""
746 746 try:
747 747 rev = int(id)
748 748 if str(rev) != id: raise ValueError
749 749 if rev < 0: rev = self.count() + rev
750 750 if rev < 0 or rev >= self.count(): raise ValueError
751 751 return self.node(rev)
752 752 except (ValueError, OverflowError):
753 753 c = []
754 754 for n in self.nodemap:
755 755 if hex(n).startswith(id):
756 756 c.append(n)
757 757 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
758 758 if len(c) < 1: raise RevlogError(_("No match found"))
759 759 return c[0]
760 760
761 761 return None
762 762
763 763 def diff(self, a, b):
764 764 """return a delta between two revisions"""
765 765 return mdiff.textdiff(a, b)
766 766
767 767 def patches(self, t, pl):
768 768 """apply a list of patches to a string"""
769 769 return mdiff.patches(t, pl)
770 770
771 771 def chunk(self, rev, df=None, cachelen=4096):
772 772 start, length = self.start(rev), self.length(rev)
773 773 inline = self.inlinedata()
774 774 if inline:
775 775 start += (rev + 1) * struct.calcsize(self.indexformat)
776 776 end = start + length
777 777 def loadcache(df):
778 778 cache_length = max(cachelen, length) # 4k
779 779 if not df:
780 780 if inline:
781 781 df = self.opener(self.indexfile)
782 782 else:
783 783 df = self.opener(self.datafile)
784 784 df.seek(start)
785 785 self.chunkcache = (start, df.read(cache_length))
786 786
787 787 if not self.chunkcache:
788 788 loadcache(df)
789 789
790 790 cache_start = self.chunkcache[0]
791 791 cache_end = cache_start + len(self.chunkcache[1])
792 792 if start >= cache_start and end <= cache_end:
793 793 # it is cached
794 794 offset = start - cache_start
795 795 else:
796 796 loadcache(df)
797 797 offset = 0
798 798
799 799 #def checkchunk():
800 800 # df = self.opener(self.datafile)
801 801 # df.seek(start)
802 802 # return df.read(length)
803 803 #assert s == checkchunk()
804 804 return decompress(self.chunkcache[1][offset:offset + length])
805 805
806 806 def delta(self, node):
807 807 """return or calculate a delta between a node and its predecessor"""
808 808 r = self.rev(node)
809 809 return self.revdiff(r - 1, r)
810 810
811 811 def revdiff(self, rev1, rev2):
812 812 """return or calculate a delta between two revisions"""
813 813 b1 = self.base(rev1)
814 814 b2 = self.base(rev2)
815 815 if b1 == b2 and rev1 + 1 == rev2:
816 816 return self.chunk(rev2)
817 817 else:
818 818 return self.diff(self.revision(self.node(rev1)),
819 819 self.revision(self.node(rev2)))
820 820
821 821 def revision(self, node):
822 822 """return an uncompressed revision of a given"""
823 823 if node == nullid: return ""
824 824 if self.cache and self.cache[0] == node: return self.cache[2]
825 825
826 826 # look up what we need to read
827 827 text = None
828 828 rev = self.rev(node)
829 829 base = self.base(rev)
830 830
831 831 if self.inlinedata():
832 832 # we probably have the whole chunk cached
833 833 df = None
834 834 else:
835 835 df = self.opener(self.datafile)
836 836
837 837 # do we have useful data cached?
838 838 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
839 839 base = self.cache[1]
840 840 text = self.cache[2]
841 841 self.loadindex(base, rev + 1)
842 842 else:
843 843 self.loadindex(base, rev + 1)
844 844 text = self.chunk(base, df=df)
845 845
846 846 bins = []
847 847 for r in xrange(base + 1, rev + 1):
848 848 bins.append(self.chunk(r, df=df))
849 849
850 850 text = self.patches(text, bins)
851 851
852 852 p1, p2 = self.parents(node)
853 853 if node != hash(text, p1, p2):
854 854 raise RevlogError(_("integrity check failed on %s:%d")
855 855 % (self.datafile, rev))
856 856
857 857 self.cache = (node, rev, text)
858 858 return text
859 859
860 860 def checkinlinesize(self, tr, fp=None):
861 861 if not self.inlinedata():
862 862 return
863 863 if not fp:
864 864 fp = self.opener(self.indexfile, 'r')
865 865 fp.seek(0, 2)
866 866 size = fp.tell()
867 867 if size < 131072:
868 868 return
869 869 trinfo = tr.find(self.indexfile)
870 870 if trinfo == None:
871 871 raise RevlogError(_("%s not found in the transaction" %
872 872 self.indexfile))
873 873
874 874 trindex = trinfo[2]
875 875 dataoff = self.start(trindex)
876 876
877 877 tr.add(self.datafile, dataoff)
878 878 df = self.opener(self.datafile, 'w')
879 879 calc = struct.calcsize(self.indexformat)
880 880 for r in xrange(self.count()):
881 881 start = self.start(r) + (r + 1) * calc
882 882 length = self.length(r)
883 883 fp.seek(start)
884 884 d = fp.read(length)
885 885 df.write(d)
886 886 fp.close()
887 887 df.close()
888 888 fp = self.opener(self.indexfile, 'w', atomictemp=True)
889 889 self.version &= ~(REVLOGNGINLINEDATA)
890 890 if self.count():
891 891 x = self.index[0]
892 892 e = struct.pack(self.indexformat, *x)[4:]
893 893 l = struct.pack(versionformat, self.version)
894 894 fp.write(l)
895 895 fp.write(e)
896 896
897 897 for i in xrange(1, self.count()):
898 898 x = self.index[i]
899 899 e = struct.pack(self.indexformat, *x)
900 900 fp.write(e)
901 901
902 902 # if we don't call rename, the temp file will never replace the
903 903 # real index
904 904 fp.rename()
905 905
906 906 tr.replace(self.indexfile, trindex * calc)
907 907 self.chunkcache = None
908 908
909 909 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
910 910 """add a revision to the log
911 911
912 912 text - the revision data to add
913 913 transaction - the transaction object used for rollback
914 914 link - the linkrev data to add
915 915 p1, p2 - the parent nodeids of the revision
916 916 d - an optional precomputed delta
917 917 """
918 918 if text is None: text = ""
919 919 if p1 is None: p1 = self.tip()
920 920 if p2 is None: p2 = nullid
921 921
922 922 node = hash(text, p1, p2)
923 923
924 924 if node in self.nodemap:
925 925 return node
926 926
927 927 n = self.count()
928 928 t = n - 1
929 929
930 930 if n:
931 931 base = self.base(t)
932 932 start = self.start(base)
933 933 end = self.end(t)
934 934 if not d:
935 935 prev = self.revision(self.tip())
936 936 d = self.diff(prev, str(text))
937 937 data = compress(d)
938 938 l = len(data[1]) + len(data[0])
939 939 dist = end - start + l
940 940
941 941 # full versions are inserted when the needed deltas
942 942 # become comparable to the uncompressed text
943 943 if not n or dist > len(text) * 2:
944 944 data = compress(text)
945 945 l = len(data[1]) + len(data[0])
946 946 base = n
947 947 else:
948 948 base = self.base(t)
949 949
950 950 offset = 0
951 951 if t >= 0:
952 952 offset = self.end(t)
953 953
954 954 if self.version == REVLOGV0:
955 955 e = (offset, l, base, link, p1, p2, node)
956 956 else:
957 957 e = (self.offset_type(offset, 0), l, len(text),
958 958 base, link, self.rev(p1), self.rev(p2), node)
959 959
960 960 self.index.append(e)
961 961 self.nodemap[node] = n
962 962 entry = struct.pack(self.indexformat, *e)
963 963
964 964 if not self.inlinedata():
965 965 transaction.add(self.datafile, offset)
966 966 transaction.add(self.indexfile, n * len(entry))
967 967 f = self.opener(self.datafile, "a")
968 968 if data[0]:
969 969 f.write(data[0])
970 970 f.write(data[1])
971 971 f.close()
972 972 f = self.opener(self.indexfile, "a")
973 973 else:
974 974 f = self.opener(self.indexfile, "a+")
975 975 f.seek(0, 2)
976 976 transaction.add(self.indexfile, f.tell(), self.count() - 1)
977 977
978 978 if len(self.index) == 1 and self.version != REVLOGV0:
979 979 l = struct.pack(versionformat, self.version)
980 980 f.write(l)
981 981 entry = entry[4:]
982 982
983 983 f.write(entry)
984 984
985 985 if self.inlinedata():
986 986 f.write(data[0])
987 987 f.write(data[1])
988 988 self.checkinlinesize(transaction, f)
989 989
990 990 self.cache = (node, n, text)
991 991 return node
992 992
993 993 def ancestor(self, a, b):
994 994 """calculate the least common ancestor of nodes a and b"""
995 995
996 996 # start with some short cuts for the linear cases
997 997 if a == b:
998 998 return a
999 999 ra = self.rev(a)
1000 1000 rb = self.rev(b)
1001 1001 if ra < rb:
1002 1002 last = b
1003 1003 first = a
1004 1004 else:
1005 1005 last = a
1006 1006 first = b
1007 1007
1008 1008 # reachable won't include stop in the list, so we have to use a parent
1009 1009 reachable = self.reachable(last, stop=self.parents(first)[0])
1010 1010 if first in reachable:
1011 1011 return first
1012 1012
1013 1013 # calculate the distance of every node from root
1014 1014 dist = {nullid: 0}
1015 1015 for i in xrange(self.count()):
1016 1016 n = self.node(i)
1017 1017 p1, p2 = self.parents(n)
1018 1018 dist[n] = max(dist[p1], dist[p2]) + 1
1019 1019
1020 1020 # traverse ancestors in order of decreasing distance from root
1021 1021 def ancestors(node):
1022 1022 # we store negative distances because heap returns smallest member
1023 1023 h = [(-dist[node], node)]
1024 1024 seen = {}
1025 1025 while h:
1026 1026 d, n = heapq.heappop(h)
1027 1027 if n not in seen:
1028 1028 seen[n] = 1
1029 1029 yield (-d, n)
1030 1030 for p in self.parents(n):
1031 1031 heapq.heappush(h, (-dist[p], p))
1032 1032
1033 1033 def generations(node):
1034 1034 sg, s = None, {}
1035 1035 for g,n in ancestors(node):
1036 1036 if g != sg:
1037 1037 if sg:
1038 1038 yield sg, s
1039 1039 sg, s = g, {n:1}
1040 1040 else:
1041 1041 s[n] = 1
1042 1042 yield sg, s
1043 1043
1044 1044 x = generations(a)
1045 1045 y = generations(b)
1046 1046 gx = x.next()
1047 1047 gy = y.next()
1048 1048
1049 1049 # increment each ancestor list until it is closer to root than
1050 1050 # the other, or they match
1051 1051 while 1:
1052 1052 #print "ancestor gen %s %s" % (gx[0], gy[0])
1053 1053 if gx[0] == gy[0]:
1054 1054 # find the intersection
1055 1055 i = [ n for n in gx[1] if n in gy[1] ]
1056 1056 if i:
1057 1057 return i[0]
1058 1058 else:
1059 1059 #print "next"
1060 1060 gy = y.next()
1061 1061 gx = x.next()
1062 1062 elif gx[0] < gy[0]:
1063 1063 #print "next y"
1064 1064 gy = y.next()
1065 1065 else:
1066 1066 #print "next x"
1067 1067 gx = x.next()
1068 1068
1069 1069 def group(self, nodelist, lookup, infocollect=None):
1070 1070 """calculate a delta group
1071 1071
1072 1072 Given a list of changeset revs, return a set of deltas and
1073 1073 metadata corresponding to nodes. the first delta is
1074 1074 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1075 1075 have this parent as it has all history before these
1076 1076 changesets. parent is parent[0]
1077 1077 """
1078 1078 revs = [self.rev(n) for n in nodelist]
1079 1079
1080 1080 # if we don't have any revisions touched by these changesets, bail
1081 1081 if not revs:
1082 1082 yield changegroup.closechunk()
1083 1083 return
1084 1084
1085 1085 # add the parent of the first rev
1086 1086 p = self.parents(self.node(revs[0]))[0]
1087 1087 revs.insert(0, self.rev(p))
1088 1088
1089 1089 # build deltas
1090 1090 for d in xrange(0, len(revs) - 1):
1091 1091 a, b = revs[d], revs[d + 1]
1092 1092 nb = self.node(b)
1093 1093
1094 1094 if infocollect is not None:
1095 1095 infocollect(nb)
1096 1096
1097 1097 d = self.revdiff(a, b)
1098 1098 p = self.parents(nb)
1099 1099 meta = nb + p[0] + p[1] + lookup(nb)
1100 1100 yield changegroup.genchunk("%s%s" % (meta, d))
1101 1101
1102 1102 yield changegroup.closechunk()
1103 1103
1104 1104 def addgroup(self, revs, linkmapper, transaction, unique=0):
1105 1105 """
1106 1106 add a delta group
1107 1107
1108 1108 given a set of deltas, add them to the revision log. the
1109 1109 first delta is against its parent, which should be in our
1110 1110 log, the rest are against the previous delta.
1111 1111 """
1112 1112
1113 1113 #track the base of the current delta log
1114 1114 r = self.count()
1115 1115 t = r - 1
1116 1116 node = None
1117 1117
1118 1118 base = prev = -1
1119 1119 start = end = textlen = 0
1120 1120 if r:
1121 1121 end = self.end(t)
1122 1122
1123 1123 ifh = self.opener(self.indexfile, "a+")
1124 1124 ifh.seek(0, 2)
1125 1125 transaction.add(self.indexfile, ifh.tell(), self.count())
1126 1126 if self.inlinedata():
1127 1127 dfh = None
1128 1128 else:
1129 1129 transaction.add(self.datafile, end)
1130 1130 dfh = self.opener(self.datafile, "a")
1131 1131
1132 1132 # loop through our set of deltas
1133 1133 chain = None
1134 1134 for chunk in revs:
1135 1135 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1136 1136 link = linkmapper(cs)
1137 1137 if node in self.nodemap:
1138 1138 # this can happen if two branches make the same change
1139 1139 # if unique:
1140 1140 # raise RevlogError(_("already have %s") % hex(node[:4]))
1141 1141 chain = node
1142 1142 continue
1143 1143 delta = chunk[80:]
1144 1144
1145 1145 for p in (p1, p2):
1146 1146 if not p in self.nodemap:
1147 1147 raise RevlogError(_("unknown parent %s") % short(p))
1148 1148
1149 1149 if not chain:
1150 1150 # retrieve the parent revision of the delta chain
1151 1151 chain = p1
1152 1152 if not chain in self.nodemap:
1153 1153 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1154 1154
1155 1155 # full versions are inserted when the needed deltas become
1156 1156 # comparable to the uncompressed text or when the previous
1157 1157 # version is not the one we have a delta against. We use
1158 1158 # the size of the previous full rev as a proxy for the
1159 1159 # current size.
1160 1160
1161 1161 if chain == prev:
1162 1162 tempd = compress(delta)
1163 1163 cdelta = tempd[0] + tempd[1]
1164 1164 textlen = mdiff.patchedsize(textlen, delta)
1165 1165
1166 1166 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1167 1167 # flush our writes here so we can read it in revision
1168 1168 if dfh:
1169 1169 dfh.flush()
1170 1170 ifh.flush()
1171 1171 text = self.revision(chain)
1172 1172 text = self.patches(text, [delta])
1173 1173 chk = self.addrevision(text, transaction, link, p1, p2)
1174 1174 if chk != node:
1175 1175 raise RevlogError(_("consistency error adding group"))
1176 1176 textlen = len(text)
1177 1177 else:
1178 1178 if self.version == REVLOGV0:
1179 1179 e = (end, len(cdelta), base, link, p1, p2, node)
1180 1180 else:
1181 1181 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1182 1182 link, self.rev(p1), self.rev(p2), node)
1183 1183 self.index.append(e)
1184 1184 self.nodemap[node] = r
1185 1185 if self.inlinedata():
1186 1186 ifh.write(struct.pack(self.indexformat, *e))
1187 1187 ifh.write(cdelta)
1188 1188 self.checkinlinesize(transaction, ifh)
1189 1189 if not self.inlinedata():
1190 1190 dfh = self.opener(self.datafile, "a")
1191 1191 ifh = self.opener(self.indexfile, "a")
1192 1192 else:
1193 1193 if not dfh:
1194 1194 # addrevision switched from inline to conventional
1195 1195 # reopen the index
1196 1196 dfh = self.opener(self.datafile, "a")
1197 1197 ifh = self.opener(self.indexfile, "a")
1198 1198 dfh.write(cdelta)
1199 1199 ifh.write(struct.pack(self.indexformat, *e))
1200 1200
1201 1201 t, r, chain, prev = r, r + 1, node, node
1202 1202 base = self.base(t)
1203 1203 start = self.start(base)
1204 1204 end = self.end(t)
1205 1205
1206 1206 return node
1207 1207
1208 1208 def strip(self, rev, minlink):
1209 1209 if self.count() == 0 or rev >= self.count():
1210 1210 return
1211 1211
1212 1212 if isinstance(self.index, lazyindex):
1213 1213 self.loadindexmap()
1214 1214
1215 1215 # When stripping away a revision, we need to make sure it
1216 1216 # does not actually belong to an older changeset.
1217 1217 # The minlink parameter defines the oldest revision
1218 1218 # we're allowed to strip away.
1219 1219 while minlink > self.index[rev][-4]:
1220 1220 rev += 1
1221 1221 if rev >= self.count():
1222 1222 return
1223 1223
1224 1224 # first truncate the files on disk
1225 1225 end = self.start(rev)
1226 1226 if not self.inlinedata():
1227 1227 df = self.opener(self.datafile, "a")
1228 1228 df.truncate(end)
1229 1229 end = rev * struct.calcsize(self.indexformat)
1230 1230 else:
1231 1231 end += rev * struct.calcsize(self.indexformat)
1232 1232
1233 1233 indexf = self.opener(self.indexfile, "a")
1234 1234 indexf.truncate(end)
1235 1235
1236 1236 # then reset internal state in memory to forget those revisions
1237 1237 self.cache = None
1238 1238 self.chunkcache = None
1239 1239 for x in xrange(rev, self.count()):
1240 1240 del self.nodemap[self.node(x)]
1241 1241
1242 1242 del self.index[rev:]
1243 1243
1244 1244 def checksize(self):
1245 1245 expected = 0
1246 1246 if self.count():
1247 1247 expected = self.end(self.count() - 1)
1248 1248
1249 1249 try:
1250 1250 f = self.opener(self.datafile)
1251 1251 f.seek(0, 2)
1252 1252 actual = f.tell()
1253 1253 dd = actual - expected
1254 1254 except IOError, inst:
1255 1255 if inst.errno != errno.ENOENT:
1256 1256 raise
1257 1257 dd = 0
1258 1258
1259 1259 try:
1260 1260 f = self.opener(self.indexfile)
1261 1261 f.seek(0, 2)
1262 1262 actual = f.tell()
1263 1263 s = struct.calcsize(self.indexformat)
1264 1264 i = actual / s
1265 1265 di = actual - (i * s)
1266 1266 if self.inlinedata():
1267 1267 databytes = 0
1268 1268 for r in xrange(self.count()):
1269 1269 databytes += self.length(r)
1270 1270 dd = 0
1271 1271 di = actual - self.count() * s - databytes
1272 1272 except IOError, inst:
1273 1273 if inst.errno != errno.ENOENT:
1274 1274 raise
1275 1275 di = 0
1276 1276
1277 1277 return (dd, di)
1278 1278
1279 1279
General Comments 0
You need to be logged in to leave comments. Login now