##// END OF EJS Templates
correct the handling of linkrev with nullid
Benoit Boissinot -
r2642:6414ee2e default
parent child Browse files
Show More
@@ -1,1283 +1,1284
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 data = self.dataf.read(blocksize)
143 143 lend = len(data) / self.s
144 144 i = blockstart / self.s
145 145 off = 0
146 146 for x in xrange(lend):
147 147 if self.index[i + x] == None:
148 148 b = data[off : off + self.s]
149 149 self.index[i + x] = b
150 150 n = b[self.shaoffset:self.shaoffset + 20]
151 151 self.map[n] = i + x
152 152 off += self.s
153 153
154 154 def findnode(self, node):
155 155 """search backwards through the index file for a specific node"""
156 156 if self.allmap: return None
157 157
158 158 # hg log will cause many many searches for the manifest
159 159 # nodes. After we get called a few times, just load the whole
160 160 # thing.
161 161 if self.mapfind_count > 8:
162 162 self.loadmap()
163 163 if node in self.map:
164 164 return node
165 165 return None
166 166 self.mapfind_count += 1
167 167 last = self.l - 1
168 168 while self.index[last] != None:
169 169 if last == 0:
170 170 self.all = 1
171 171 self.allmap = 1
172 172 return None
173 173 last -= 1
174 174 end = (last + 1) * self.s
175 175 blocksize = self.s * 256
176 176 while end >= 0:
177 177 start = max(end - blocksize, 0)
178 178 self.dataf.seek(start)
179 179 data = self.dataf.read(end - start)
180 180 findend = end - start
181 181 while True:
182 182 # we're searching backwards, so weh have to make sure
183 183 # we don't find a changeset where this node is a parent
184 184 off = data.rfind(node, 0, findend)
185 185 findend = off
186 186 if off >= 0:
187 187 i = off / self.s
188 188 off = i * self.s
189 189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 190 if n == node:
191 191 self.map[n] = i + start / self.s
192 192 return node
193 193 else:
194 194 break
195 195 end -= blocksize
196 196 return None
197 197
198 198 def loadindex(self, i=None, end=None):
199 199 if self.all: return
200 200 all = False
201 201 if i == None:
202 202 blockstart = 0
203 203 blocksize = (512 / self.s) * self.s
204 204 end = self.datasize
205 205 all = True
206 206 else:
207 207 if end:
208 208 blockstart = i * self.s
209 209 end = end * self.s
210 210 blocksize = end - blockstart
211 211 else:
212 212 blockstart = (i & ~(32)) * self.s
213 213 blocksize = self.s * 64
214 214 end = blockstart + blocksize
215 215 while blockstart < end:
216 216 self.loadblock(blockstart, blocksize)
217 217 blockstart += blocksize
218 218 if all: self.all = True
219 219
220 220 class lazyindex(object):
221 221 """a lazy version of the index array"""
222 222 def __init__(self, parser):
223 223 self.p = parser
224 224 def __len__(self):
225 225 return len(self.p.index)
226 226 def load(self, pos):
227 227 if pos < 0:
228 228 pos += len(self.p.index)
229 229 self.p.loadindex(pos)
230 230 return self.p.index[pos]
231 231 def __getitem__(self, pos):
232 232 ret = self.p.index[pos] or self.load(pos)
233 233 if isinstance(ret, str):
234 234 ret = struct.unpack(self.p.indexformat, ret)
235 235 return ret
236 236 def __setitem__(self, pos, item):
237 237 self.p.index[pos] = item
238 238 def __delitem__(self, pos):
239 239 del self.p.index[pos]
240 240 def append(self, e):
241 241 self.p.index.append(e)
242 242
243 243 class lazymap(object):
244 244 """a lazy version of the node map"""
245 245 def __init__(self, parser):
246 246 self.p = parser
247 247 def load(self, key):
248 248 n = self.p.findnode(key)
249 249 if n == None:
250 250 raise KeyError(key)
251 251 def __contains__(self, key):
252 252 if key in self.p.map:
253 253 return True
254 254 self.p.loadmap()
255 255 return key in self.p.map
256 256 def __iter__(self):
257 257 yield nullid
258 258 for i in xrange(self.p.l):
259 259 ret = self.p.index[i]
260 260 if not ret:
261 261 self.p.loadindex(i)
262 262 ret = self.p.index[i]
263 263 if isinstance(ret, str):
264 264 ret = struct.unpack(self.p.indexformat, ret)
265 265 yield ret[-1]
266 266 def __getitem__(self, key):
267 267 try:
268 268 return self.p.map[key]
269 269 except KeyError:
270 270 try:
271 271 self.load(key)
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 raise KeyError("node " + hex(key))
275 275 def __setitem__(self, key, val):
276 276 self.p.map[key] = val
277 277 def __delitem__(self, key):
278 278 del self.p.map[key]
279 279
280 280 class RevlogError(Exception): pass
281 281
282 282 class revlog(object):
283 283 """
284 284 the underlying revision storage object
285 285
286 286 A revlog consists of two parts, an index and the revision data.
287 287
288 288 The index is a file with a fixed record size containing
289 289 information on each revision, includings its nodeid (hash), the
290 290 nodeids of its parents, the position and offset of its data within
291 291 the data file, and the revision it's based on. Finally, each entry
292 292 contains a linkrev entry that can serve as a pointer to external
293 293 data.
294 294
295 295 The revision data itself is a linear collection of data chunks.
296 296 Each chunk represents a revision and is usually represented as a
297 297 delta against the previous chunk. To bound lookup time, runs of
298 298 deltas are limited to about 2 times the length of the original
299 299 version data. This makes retrieval of a version proportional to
300 300 its size, or O(1) relative to the number of revisions.
301 301
302 302 Both pieces of the revlog are written to in an append-only
303 303 fashion, which means we never need to rewrite a file to insert or
304 304 remove data, and can use some simple techniques to avoid the need
305 305 for locking while reading.
306 306 """
307 307 def __init__(self, opener, indexfile, datafile,
308 308 defversion=REVLOG_DEFAULT_VERSION):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314 """
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile
317 317 self.opener = opener
318 318
319 319 self.indexstat = None
320 320 self.cache = None
321 321 self.chunkcache = None
322 322 self.defversion = defversion
323 323 self.load()
324 324
325 325 def load(self):
326 326 v = self.defversion
327 327 try:
328 328 f = self.opener(self.indexfile)
329 329 i = f.read(4)
330 330 f.seek(0)
331 331 except IOError, inst:
332 332 if inst.errno != errno.ENOENT:
333 333 raise
334 334 i = ""
335 335 else:
336 336 try:
337 337 st = util.fstat(f)
338 338 except AttributeError, inst:
339 339 st = None
340 340 else:
341 341 oldst = self.indexstat
342 342 if (oldst and st.st_dev == oldst.st_dev
343 343 and st.st_ino == oldst.st_ino
344 344 and st.st_mtime == oldst.st_mtime
345 345 and st.st_ctime == oldst.st_ctime):
346 346 return
347 347 self.indexstat = st
348 348 if len(i) > 0:
349 349 v = struct.unpack(versionformat, i)[0]
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 355 (self.indexfile, flags)))
356 356 elif fmt == REVLOGNG:
357 357 if flags & ~REVLOGNGINLINEDATA:
358 358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 359 (self.indexfile, flags)))
360 360 else:
361 361 raise RevlogError(_("index %s invalid format %d" %
362 362 (self.indexfile, fmt)))
363 363 self.version = v
364 364 if v == REVLOGV0:
365 365 self.indexformat = indexformatv0
366 366 shaoffset = v0shaoffset
367 367 else:
368 368 self.indexformat = indexformatng
369 369 shaoffset = ngshaoffset
370 370
371 371 if i:
372 372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 373 st and st.st_size > 10000):
374 374 # big index, let's parse it on demand
375 375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 376 self.index = lazyindex(parser)
377 377 self.nodemap = lazymap(parser)
378 378 else:
379 379 self.parseindex(f, st)
380 380 if self.version != REVLOGV0:
381 381 e = list(self.index[0])
382 382 type = self.ngtype(e[0])
383 383 e[0] = self.offset_type(0, type)
384 384 self.index[0] = e
385 385 else:
386 386 self.nodemap = { nullid: -1}
387 387 self.index = []
388 388
389 389
390 390 def parseindex(self, fp, st):
391 391 s = struct.calcsize(self.indexformat)
392 392 self.index = []
393 393 self.nodemap = {nullid: -1}
394 394 inline = self.inlinedata()
395 395 n = 0
396 396 leftover = None
397 397 while True:
398 398 if st:
399 399 data = fp.read(65536)
400 400 else:
401 401 # hack for httprangereader, it doesn't do partial reads well
402 402 data = fp.read()
403 403 if not data:
404 404 break
405 405 if n == 0 and self.inlinedata():
406 406 # cache the first chunk
407 407 self.chunkcache = (0, data)
408 408 if leftover:
409 409 data = leftover + data
410 410 leftover = None
411 411 off = 0
412 412 l = len(data)
413 413 while off < l:
414 414 if l - off < s:
415 415 leftover = data[off:]
416 416 break
417 417 cur = data[off:off + s]
418 418 off += s
419 419 e = struct.unpack(self.indexformat, cur)
420 420 self.index.append(e)
421 421 self.nodemap[e[-1]] = n
422 422 n += 1
423 423 if inline:
424 424 off += e[1]
425 425 if off > l:
426 426 # some things don't seek well, just read it
427 427 fp.read(off - l)
428 428 if not st:
429 429 break
430 430
431 431
432 432 def ngoffset(self, q):
433 433 if q & 0xFFFF:
434 434 raise RevlogError(_('%s: incompatible revision flag %x') %
435 435 (self.indexfile, q))
436 436 return long(q >> 16)
437 437
438 438 def ngtype(self, q):
439 439 return int(q & 0xFFFF)
440 440
441 441 def offset_type(self, offset, type):
442 442 return long(long(offset) << 16 | type)
443 443
444 444 def loadindex(self, start, end):
445 445 """load a block of indexes all at once from the lazy parser"""
446 446 if isinstance(self.index, lazyindex):
447 447 self.index.p.loadindex(start, end)
448 448
449 449 def loadindexmap(self):
450 450 """loads both the map and the index from the lazy parser"""
451 451 if isinstance(self.index, lazyindex):
452 452 p = self.index.p
453 453 p.loadindex()
454 454 self.nodemap = p.map
455 455
456 456 def loadmap(self):
457 457 """loads the map from the lazy parser"""
458 458 if isinstance(self.nodemap, lazymap):
459 459 self.nodemap.p.loadmap()
460 460 self.nodemap = self.nodemap.p.map
461 461
462 462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 463 def tip(self): return self.node(len(self.index) - 1)
464 464 def count(self): return len(self.index)
465 465 def node(self, rev):
466 466 return (rev < 0) and nullid or self.index[rev][-1]
467 467 def rev(self, node):
468 468 try:
469 469 return self.nodemap[node]
470 470 except KeyError:
471 471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 def linkrev(self, node): return self.index[self.rev(node)][-4]
472 def linkrev(self, node):
473 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
473 474 def parents(self, node):
474 475 if node == nullid: return (nullid, nullid)
475 476 r = self.rev(node)
476 477 d = self.index[r][-3:-1]
477 478 if self.version == REVLOGV0:
478 479 return d
479 480 return [ self.node(x) for x in d ]
480 481 def parentrevs(self, rev):
481 482 if rev == -1:
482 483 return (-1, -1)
483 484 d = self.index[rev][-3:-1]
484 485 if self.version == REVLOGV0:
485 486 return [ self.rev(x) for x in d ]
486 487 return d
487 488 def start(self, rev):
488 489 if rev < 0:
489 490 return -1
490 491 if self.version != REVLOGV0:
491 492 return self.ngoffset(self.index[rev][0])
492 493 return self.index[rev][0]
493 494
494 495 def end(self, rev): return self.start(rev) + self.length(rev)
495 496
496 497 def size(self, rev):
497 498 """return the length of the uncompressed text for a given revision"""
498 499 l = -1
499 500 if self.version != REVLOGV0:
500 501 l = self.index[rev][2]
501 502 if l >= 0:
502 503 return l
503 504
504 505 t = self.revision(self.node(rev))
505 506 return len(t)
506 507
507 508 # alternate implementation, The advantage to this code is it
508 509 # will be faster for a single revision. But, the results are not
509 510 # cached, so finding the size of every revision will be slower.
510 511 """
511 512 if self.cache and self.cache[1] == rev:
512 513 return len(self.cache[2])
513 514
514 515 base = self.base(rev)
515 516 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
516 517 base = self.cache[1]
517 518 text = self.cache[2]
518 519 else:
519 520 text = self.revision(self.node(base))
520 521
521 522 l = len(text)
522 523 for x in xrange(base + 1, rev + 1):
523 524 l = mdiff.patchedsize(l, self.chunk(x))
524 525 return l
525 526 """
526 527
527 528 def length(self, rev):
528 529 if rev < 0:
529 530 return 0
530 531 else:
531 532 return self.index[rev][1]
532 533 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
533 534
534 535 def reachable(self, rev, stop=None):
535 536 reachable = {}
536 537 visit = [rev]
537 538 reachable[rev] = 1
538 539 if stop:
539 540 stopn = self.rev(stop)
540 541 else:
541 542 stopn = 0
542 543 while visit:
543 544 n = visit.pop(0)
544 545 if n == stop:
545 546 continue
546 547 if n == nullid:
547 548 continue
548 549 for p in self.parents(n):
549 550 if self.rev(p) < stopn:
550 551 continue
551 552 if p not in reachable:
552 553 reachable[p] = 1
553 554 visit.append(p)
554 555 return reachable
555 556
556 557 def nodesbetween(self, roots=None, heads=None):
557 558 """Return a tuple containing three elements. Elements 1 and 2 contain
558 559 a final list bases and heads after all the unreachable ones have been
559 560 pruned. Element 0 contains a topologically sorted list of all
560 561
561 562 nodes that satisfy these constraints:
562 563 1. All nodes must be descended from a node in roots (the nodes on
563 564 roots are considered descended from themselves).
564 565 2. All nodes must also be ancestors of a node in heads (the nodes in
565 566 heads are considered to be their own ancestors).
566 567
567 568 If roots is unspecified, nullid is assumed as the only root.
568 569 If heads is unspecified, it is taken to be the output of the
569 570 heads method (i.e. a list of all nodes in the repository that
570 571 have no children)."""
571 572 nonodes = ([], [], [])
572 573 if roots is not None:
573 574 roots = list(roots)
574 575 if not roots:
575 576 return nonodes
576 577 lowestrev = min([self.rev(n) for n in roots])
577 578 else:
578 579 roots = [nullid] # Everybody's a descendent of nullid
579 580 lowestrev = -1
580 581 if (lowestrev == -1) and (heads is None):
581 582 # We want _all_ the nodes!
582 583 return ([self.node(r) for r in xrange(0, self.count())],
583 584 [nullid], list(self.heads()))
584 585 if heads is None:
585 586 # All nodes are ancestors, so the latest ancestor is the last
586 587 # node.
587 588 highestrev = self.count() - 1
588 589 # Set ancestors to None to signal that every node is an ancestor.
589 590 ancestors = None
590 591 # Set heads to an empty dictionary for later discovery of heads
591 592 heads = {}
592 593 else:
593 594 heads = list(heads)
594 595 if not heads:
595 596 return nonodes
596 597 ancestors = {}
597 598 # Start at the top and keep marking parents until we're done.
598 599 nodestotag = heads[:]
599 600 # Turn heads into a dictionary so we can remove 'fake' heads.
600 601 # Also, later we will be using it to filter out the heads we can't
601 602 # find from roots.
602 603 heads = dict.fromkeys(heads, 0)
603 604 # Remember where the top was so we can use it as a limit later.
604 605 highestrev = max([self.rev(n) for n in nodestotag])
605 606 while nodestotag:
606 607 # grab a node to tag
607 608 n = nodestotag.pop()
608 609 # Never tag nullid
609 610 if n == nullid:
610 611 continue
611 612 # A node's revision number represents its place in a
612 613 # topologically sorted list of nodes.
613 614 r = self.rev(n)
614 615 if r >= lowestrev:
615 616 if n not in ancestors:
616 617 # If we are possibly a descendent of one of the roots
617 618 # and we haven't already been marked as an ancestor
618 619 ancestors[n] = 1 # Mark as ancestor
619 620 # Add non-nullid parents to list of nodes to tag.
620 621 nodestotag.extend([p for p in self.parents(n) if
621 622 p != nullid])
622 623 elif n in heads: # We've seen it before, is it a fake head?
623 624 # So it is, real heads should not be the ancestors of
624 625 # any other heads.
625 626 heads.pop(n)
626 627 if not ancestors:
627 628 return nonodes
628 629 # Now that we have our set of ancestors, we want to remove any
629 630 # roots that are not ancestors.
630 631
631 632 # If one of the roots was nullid, everything is included anyway.
632 633 if lowestrev > -1:
633 634 # But, since we weren't, let's recompute the lowest rev to not
634 635 # include roots that aren't ancestors.
635 636
636 637 # Filter out roots that aren't ancestors of heads
637 638 roots = [n for n in roots if n in ancestors]
638 639 # Recompute the lowest revision
639 640 if roots:
640 641 lowestrev = min([self.rev(n) for n in roots])
641 642 else:
642 643 # No more roots? Return empty list
643 644 return nonodes
644 645 else:
645 646 # We are descending from nullid, and don't need to care about
646 647 # any other roots.
647 648 lowestrev = -1
648 649 roots = [nullid]
649 650 # Transform our roots list into a 'set' (i.e. a dictionary where the
650 651 # values don't matter.
651 652 descendents = dict.fromkeys(roots, 1)
652 653 # Also, keep the original roots so we can filter out roots that aren't
653 654 # 'real' roots (i.e. are descended from other roots).
654 655 roots = descendents.copy()
655 656 # Our topologically sorted list of output nodes.
656 657 orderedout = []
657 658 # Don't start at nullid since we don't want nullid in our output list,
658 659 # and if nullid shows up in descedents, empty parents will look like
659 660 # they're descendents.
660 661 for r in xrange(max(lowestrev, 0), highestrev + 1):
661 662 n = self.node(r)
662 663 isdescendent = False
663 664 if lowestrev == -1: # Everybody is a descendent of nullid
664 665 isdescendent = True
665 666 elif n in descendents:
666 667 # n is already a descendent
667 668 isdescendent = True
668 669 # This check only needs to be done here because all the roots
669 670 # will start being marked is descendents before the loop.
670 671 if n in roots:
671 672 # If n was a root, check if it's a 'real' root.
672 673 p = tuple(self.parents(n))
673 674 # If any of its parents are descendents, it's not a root.
674 675 if (p[0] in descendents) or (p[1] in descendents):
675 676 roots.pop(n)
676 677 else:
677 678 p = tuple(self.parents(n))
678 679 # A node is a descendent if either of its parents are
679 680 # descendents. (We seeded the dependents list with the roots
680 681 # up there, remember?)
681 682 if (p[0] in descendents) or (p[1] in descendents):
682 683 descendents[n] = 1
683 684 isdescendent = True
684 685 if isdescendent and ((ancestors is None) or (n in ancestors)):
685 686 # Only include nodes that are both descendents and ancestors.
686 687 orderedout.append(n)
687 688 if (ancestors is not None) and (n in heads):
688 689 # We're trying to figure out which heads are reachable
689 690 # from roots.
690 691 # Mark this head as having been reached
691 692 heads[n] = 1
692 693 elif ancestors is None:
693 694 # Otherwise, we're trying to discover the heads.
694 695 # Assume this is a head because if it isn't, the next step
695 696 # will eventually remove it.
696 697 heads[n] = 1
697 698 # But, obviously its parents aren't.
698 699 for p in self.parents(n):
699 700 heads.pop(p, None)
700 701 heads = [n for n in heads.iterkeys() if heads[n] != 0]
701 702 roots = roots.keys()
702 703 assert orderedout
703 704 assert roots
704 705 assert heads
705 706 return (orderedout, roots, heads)
706 707
707 708 def heads(self, start=None):
708 709 """return the list of all nodes that have no children
709 710
710 711 if start is specified, only heads that are descendants of
711 712 start will be returned
712 713
713 714 """
714 715 if start is None:
715 716 start = nullid
716 717 startrev = self.rev(start)
717 718 reachable = {startrev: 1}
718 719 heads = {startrev: 1}
719 720
720 721 parentrevs = self.parentrevs
721 722 for r in xrange(startrev + 1, self.count()):
722 723 for p in parentrevs(r):
723 724 if p in reachable:
724 725 reachable[r] = 1
725 726 heads[r] = 1
726 727 if p in heads:
727 728 del heads[p]
728 729 return [self.node(r) for r in heads]
729 730
730 731 def children(self, node):
731 732 """find the children of a given node"""
732 733 c = []
733 734 p = self.rev(node)
734 735 for r in range(p + 1, self.count()):
735 736 n = self.node(r)
736 737 for pn in self.parents(n):
737 738 if pn == node:
738 739 c.append(n)
739 740 continue
740 741 elif pn == nullid:
741 742 continue
742 743 return c
743 744
744 745 def lookup(self, id):
745 746 """locate a node based on revision number or subset of hex nodeid"""
746 747 if id in self.nodemap:
747 748 return id
748 749 if type(id) == type(0):
749 750 return self.node(id)
750 751 try:
751 752 rev = int(id)
752 753 if str(rev) != id: raise ValueError
753 754 if rev < 0: rev = self.count() + rev
754 755 if rev < 0 or rev >= self.count(): raise ValueError
755 756 return self.node(rev)
756 757 except (ValueError, OverflowError):
757 758 c = []
758 759 for n in self.nodemap:
759 760 if hex(n).startswith(id):
760 761 c.append(n)
761 762 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
762 763 if len(c) < 1: raise RevlogError(_("No match found"))
763 764 return c[0]
764 765
765 766 return None
766 767
767 768 def diff(self, a, b):
768 769 """return a delta between two revisions"""
769 770 return mdiff.textdiff(a, b)
770 771
771 772 def patches(self, t, pl):
772 773 """apply a list of patches to a string"""
773 774 return mdiff.patches(t, pl)
774 775
775 776 def chunk(self, rev, df=None, cachelen=4096):
776 777 start, length = self.start(rev), self.length(rev)
777 778 inline = self.inlinedata()
778 779 if inline:
779 780 start += (rev + 1) * struct.calcsize(self.indexformat)
780 781 end = start + length
781 782 def loadcache(df):
782 783 cache_length = max(cachelen, length) # 4k
783 784 if not df:
784 785 if inline:
785 786 df = self.opener(self.indexfile)
786 787 else:
787 788 df = self.opener(self.datafile)
788 789 df.seek(start)
789 790 self.chunkcache = (start, df.read(cache_length))
790 791
791 792 if not self.chunkcache:
792 793 loadcache(df)
793 794
794 795 cache_start = self.chunkcache[0]
795 796 cache_end = cache_start + len(self.chunkcache[1])
796 797 if start >= cache_start and end <= cache_end:
797 798 # it is cached
798 799 offset = start - cache_start
799 800 else:
800 801 loadcache(df)
801 802 offset = 0
802 803
803 804 #def checkchunk():
804 805 # df = self.opener(self.datafile)
805 806 # df.seek(start)
806 807 # return df.read(length)
807 808 #assert s == checkchunk()
808 809 return decompress(self.chunkcache[1][offset:offset + length])
809 810
810 811 def delta(self, node):
811 812 """return or calculate a delta between a node and its predecessor"""
812 813 r = self.rev(node)
813 814 return self.revdiff(r - 1, r)
814 815
815 816 def revdiff(self, rev1, rev2):
816 817 """return or calculate a delta between two revisions"""
817 818 b1 = self.base(rev1)
818 819 b2 = self.base(rev2)
819 820 if b1 == b2 and rev1 + 1 == rev2:
820 821 return self.chunk(rev2)
821 822 else:
822 823 return self.diff(self.revision(self.node(rev1)),
823 824 self.revision(self.node(rev2)))
824 825
825 826 def revision(self, node):
826 827 """return an uncompressed revision of a given"""
827 828 if node == nullid: return ""
828 829 if self.cache and self.cache[0] == node: return self.cache[2]
829 830
830 831 # look up what we need to read
831 832 text = None
832 833 rev = self.rev(node)
833 834 base = self.base(rev)
834 835
835 836 if self.inlinedata():
836 837 # we probably have the whole chunk cached
837 838 df = None
838 839 else:
839 840 df = self.opener(self.datafile)
840 841
841 842 # do we have useful data cached?
842 843 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
843 844 base = self.cache[1]
844 845 text = self.cache[2]
845 846 self.loadindex(base, rev + 1)
846 847 else:
847 848 self.loadindex(base, rev + 1)
848 849 text = self.chunk(base, df=df)
849 850
850 851 bins = []
851 852 for r in xrange(base + 1, rev + 1):
852 853 bins.append(self.chunk(r, df=df))
853 854
854 855 text = self.patches(text, bins)
855 856
856 857 p1, p2 = self.parents(node)
857 858 if node != hash(text, p1, p2):
858 859 raise RevlogError(_("integrity check failed on %s:%d")
859 860 % (self.datafile, rev))
860 861
861 862 self.cache = (node, rev, text)
862 863 return text
863 864
864 865 def checkinlinesize(self, tr, fp=None):
865 866 if not self.inlinedata():
866 867 return
867 868 if not fp:
868 869 fp = self.opener(self.indexfile, 'r')
869 870 fp.seek(0, 2)
870 871 size = fp.tell()
871 872 if size < 131072:
872 873 return
873 874 trinfo = tr.find(self.indexfile)
874 875 if trinfo == None:
875 876 raise RevlogError(_("%s not found in the transaction" %
876 877 self.indexfile))
877 878
878 879 trindex = trinfo[2]
879 880 dataoff = self.start(trindex)
880 881
881 882 tr.add(self.datafile, dataoff)
882 883 df = self.opener(self.datafile, 'w')
883 884 calc = struct.calcsize(self.indexformat)
884 885 for r in xrange(self.count()):
885 886 start = self.start(r) + (r + 1) * calc
886 887 length = self.length(r)
887 888 fp.seek(start)
888 889 d = fp.read(length)
889 890 df.write(d)
890 891 fp.close()
891 892 df.close()
892 893 fp = self.opener(self.indexfile, 'w', atomictemp=True)
893 894 self.version &= ~(REVLOGNGINLINEDATA)
894 895 if self.count():
895 896 x = self.index[0]
896 897 e = struct.pack(self.indexformat, *x)[4:]
897 898 l = struct.pack(versionformat, self.version)
898 899 fp.write(l)
899 900 fp.write(e)
900 901
901 902 for i in xrange(1, self.count()):
902 903 x = self.index[i]
903 904 e = struct.pack(self.indexformat, *x)
904 905 fp.write(e)
905 906
906 907 # if we don't call rename, the temp file will never replace the
907 908 # real index
908 909 fp.rename()
909 910
910 911 tr.replace(self.indexfile, trindex * calc)
911 912 self.chunkcache = None
912 913
913 914 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
914 915 """add a revision to the log
915 916
916 917 text - the revision data to add
917 918 transaction - the transaction object used for rollback
918 919 link - the linkrev data to add
919 920 p1, p2 - the parent nodeids of the revision
920 921 d - an optional precomputed delta
921 922 """
922 923 if text is None: text = ""
923 924 if p1 is None: p1 = self.tip()
924 925 if p2 is None: p2 = nullid
925 926
926 927 node = hash(text, p1, p2)
927 928
928 929 if node in self.nodemap:
929 930 return node
930 931
931 932 n = self.count()
932 933 t = n - 1
933 934
934 935 if n:
935 936 base = self.base(t)
936 937 start = self.start(base)
937 938 end = self.end(t)
938 939 if not d:
939 940 prev = self.revision(self.tip())
940 941 d = self.diff(prev, str(text))
941 942 data = compress(d)
942 943 l = len(data[1]) + len(data[0])
943 944 dist = end - start + l
944 945
945 946 # full versions are inserted when the needed deltas
946 947 # become comparable to the uncompressed text
947 948 if not n or dist > len(text) * 2:
948 949 data = compress(text)
949 950 l = len(data[1]) + len(data[0])
950 951 base = n
951 952 else:
952 953 base = self.base(t)
953 954
954 955 offset = 0
955 956 if t >= 0:
956 957 offset = self.end(t)
957 958
958 959 if self.version == REVLOGV0:
959 960 e = (offset, l, base, link, p1, p2, node)
960 961 else:
961 962 e = (self.offset_type(offset, 0), l, len(text),
962 963 base, link, self.rev(p1), self.rev(p2), node)
963 964
964 965 self.index.append(e)
965 966 self.nodemap[node] = n
966 967 entry = struct.pack(self.indexformat, *e)
967 968
968 969 if not self.inlinedata():
969 970 transaction.add(self.datafile, offset)
970 971 transaction.add(self.indexfile, n * len(entry))
971 972 f = self.opener(self.datafile, "a")
972 973 if data[0]:
973 974 f.write(data[0])
974 975 f.write(data[1])
975 976 f.close()
976 977 f = self.opener(self.indexfile, "a")
977 978 else:
978 979 f = self.opener(self.indexfile, "a+")
979 980 f.seek(0, 2)
980 981 transaction.add(self.indexfile, f.tell(), self.count() - 1)
981 982
982 983 if len(self.index) == 1 and self.version != REVLOGV0:
983 984 l = struct.pack(versionformat, self.version)
984 985 f.write(l)
985 986 entry = entry[4:]
986 987
987 988 f.write(entry)
988 989
989 990 if self.inlinedata():
990 991 f.write(data[0])
991 992 f.write(data[1])
992 993 self.checkinlinesize(transaction, f)
993 994
994 995 self.cache = (node, n, text)
995 996 return node
996 997
997 998 def ancestor(self, a, b):
998 999 """calculate the least common ancestor of nodes a and b"""
999 1000
1000 1001 # start with some short cuts for the linear cases
1001 1002 if a == b:
1002 1003 return a
1003 1004 ra = self.rev(a)
1004 1005 rb = self.rev(b)
1005 1006 if ra < rb:
1006 1007 last = b
1007 1008 first = a
1008 1009 else:
1009 1010 last = a
1010 1011 first = b
1011 1012
1012 1013 # reachable won't include stop in the list, so we have to use a parent
1013 1014 reachable = self.reachable(last, stop=self.parents(first)[0])
1014 1015 if first in reachable:
1015 1016 return first
1016 1017
1017 1018 # calculate the distance of every node from root
1018 1019 dist = {nullid: 0}
1019 1020 for i in xrange(self.count()):
1020 1021 n = self.node(i)
1021 1022 p1, p2 = self.parents(n)
1022 1023 dist[n] = max(dist[p1], dist[p2]) + 1
1023 1024
1024 1025 # traverse ancestors in order of decreasing distance from root
1025 1026 def ancestors(node):
1026 1027 # we store negative distances because heap returns smallest member
1027 1028 h = [(-dist[node], node)]
1028 1029 seen = {}
1029 1030 while h:
1030 1031 d, n = heapq.heappop(h)
1031 1032 if n not in seen:
1032 1033 seen[n] = 1
1033 1034 yield (-d, n)
1034 1035 for p in self.parents(n):
1035 1036 heapq.heappush(h, (-dist[p], p))
1036 1037
1037 1038 def generations(node):
1038 1039 sg, s = None, {}
1039 1040 for g,n in ancestors(node):
1040 1041 if g != sg:
1041 1042 if sg:
1042 1043 yield sg, s
1043 1044 sg, s = g, {n:1}
1044 1045 else:
1045 1046 s[n] = 1
1046 1047 yield sg, s
1047 1048
1048 1049 x = generations(a)
1049 1050 y = generations(b)
1050 1051 gx = x.next()
1051 1052 gy = y.next()
1052 1053
1053 1054 # increment each ancestor list until it is closer to root than
1054 1055 # the other, or they match
1055 1056 while 1:
1056 1057 #print "ancestor gen %s %s" % (gx[0], gy[0])
1057 1058 if gx[0] == gy[0]:
1058 1059 # find the intersection
1059 1060 i = [ n for n in gx[1] if n in gy[1] ]
1060 1061 if i:
1061 1062 return i[0]
1062 1063 else:
1063 1064 #print "next"
1064 1065 gy = y.next()
1065 1066 gx = x.next()
1066 1067 elif gx[0] < gy[0]:
1067 1068 #print "next y"
1068 1069 gy = y.next()
1069 1070 else:
1070 1071 #print "next x"
1071 1072 gx = x.next()
1072 1073
1073 1074 def group(self, nodelist, lookup, infocollect=None):
1074 1075 """calculate a delta group
1075 1076
1076 1077 Given a list of changeset revs, return a set of deltas and
1077 1078 metadata corresponding to nodes. the first delta is
1078 1079 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1079 1080 have this parent as it has all history before these
1080 1081 changesets. parent is parent[0]
1081 1082 """
1082 1083 revs = [self.rev(n) for n in nodelist]
1083 1084
1084 1085 # if we don't have any revisions touched by these changesets, bail
1085 1086 if not revs:
1086 1087 yield changegroup.closechunk()
1087 1088 return
1088 1089
1089 1090 # add the parent of the first rev
1090 1091 p = self.parents(self.node(revs[0]))[0]
1091 1092 revs.insert(0, self.rev(p))
1092 1093
1093 1094 # build deltas
1094 1095 for d in xrange(0, len(revs) - 1):
1095 1096 a, b = revs[d], revs[d + 1]
1096 1097 nb = self.node(b)
1097 1098
1098 1099 if infocollect is not None:
1099 1100 infocollect(nb)
1100 1101
1101 1102 d = self.revdiff(a, b)
1102 1103 p = self.parents(nb)
1103 1104 meta = nb + p[0] + p[1] + lookup(nb)
1104 1105 yield changegroup.genchunk("%s%s" % (meta, d))
1105 1106
1106 1107 yield changegroup.closechunk()
1107 1108
1108 1109 def addgroup(self, revs, linkmapper, transaction, unique=0):
1109 1110 """
1110 1111 add a delta group
1111 1112
1112 1113 given a set of deltas, add them to the revision log. the
1113 1114 first delta is against its parent, which should be in our
1114 1115 log, the rest are against the previous delta.
1115 1116 """
1116 1117
1117 1118 #track the base of the current delta log
1118 1119 r = self.count()
1119 1120 t = r - 1
1120 1121 node = None
1121 1122
1122 1123 base = prev = -1
1123 1124 start = end = textlen = 0
1124 1125 if r:
1125 1126 end = self.end(t)
1126 1127
1127 1128 ifh = self.opener(self.indexfile, "a+")
1128 1129 ifh.seek(0, 2)
1129 1130 transaction.add(self.indexfile, ifh.tell(), self.count())
1130 1131 if self.inlinedata():
1131 1132 dfh = None
1132 1133 else:
1133 1134 transaction.add(self.datafile, end)
1134 1135 dfh = self.opener(self.datafile, "a")
1135 1136
1136 1137 # loop through our set of deltas
1137 1138 chain = None
1138 1139 for chunk in revs:
1139 1140 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1140 1141 link = linkmapper(cs)
1141 1142 if node in self.nodemap:
1142 1143 # this can happen if two branches make the same change
1143 1144 # if unique:
1144 1145 # raise RevlogError(_("already have %s") % hex(node[:4]))
1145 1146 chain = node
1146 1147 continue
1147 1148 delta = chunk[80:]
1148 1149
1149 1150 for p in (p1, p2):
1150 1151 if not p in self.nodemap:
1151 1152 raise RevlogError(_("unknown parent %s") % short(p))
1152 1153
1153 1154 if not chain:
1154 1155 # retrieve the parent revision of the delta chain
1155 1156 chain = p1
1156 1157 if not chain in self.nodemap:
1157 1158 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1158 1159
1159 1160 # full versions are inserted when the needed deltas become
1160 1161 # comparable to the uncompressed text or when the previous
1161 1162 # version is not the one we have a delta against. We use
1162 1163 # the size of the previous full rev as a proxy for the
1163 1164 # current size.
1164 1165
1165 1166 if chain == prev:
1166 1167 tempd = compress(delta)
1167 1168 cdelta = tempd[0] + tempd[1]
1168 1169 textlen = mdiff.patchedsize(textlen, delta)
1169 1170
1170 1171 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1171 1172 # flush our writes here so we can read it in revision
1172 1173 if dfh:
1173 1174 dfh.flush()
1174 1175 ifh.flush()
1175 1176 text = self.revision(chain)
1176 1177 text = self.patches(text, [delta])
1177 1178 chk = self.addrevision(text, transaction, link, p1, p2)
1178 1179 if chk != node:
1179 1180 raise RevlogError(_("consistency error adding group"))
1180 1181 textlen = len(text)
1181 1182 else:
1182 1183 if self.version == REVLOGV0:
1183 1184 e = (end, len(cdelta), base, link, p1, p2, node)
1184 1185 else:
1185 1186 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1186 1187 link, self.rev(p1), self.rev(p2), node)
1187 1188 self.index.append(e)
1188 1189 self.nodemap[node] = r
1189 1190 if self.inlinedata():
1190 1191 ifh.write(struct.pack(self.indexformat, *e))
1191 1192 ifh.write(cdelta)
1192 1193 self.checkinlinesize(transaction, ifh)
1193 1194 if not self.inlinedata():
1194 1195 dfh = self.opener(self.datafile, "a")
1195 1196 ifh = self.opener(self.indexfile, "a")
1196 1197 else:
1197 1198 if not dfh:
1198 1199 # addrevision switched from inline to conventional
1199 1200 # reopen the index
1200 1201 dfh = self.opener(self.datafile, "a")
1201 1202 ifh = self.opener(self.indexfile, "a")
1202 1203 dfh.write(cdelta)
1203 1204 ifh.write(struct.pack(self.indexformat, *e))
1204 1205
1205 1206 t, r, chain, prev = r, r + 1, node, node
1206 1207 base = self.base(t)
1207 1208 start = self.start(base)
1208 1209 end = self.end(t)
1209 1210
1210 1211 return node
1211 1212
1212 1213 def strip(self, rev, minlink):
1213 1214 if self.count() == 0 or rev >= self.count():
1214 1215 return
1215 1216
1216 1217 if isinstance(self.index, lazyindex):
1217 1218 self.loadindexmap()
1218 1219
1219 1220 # When stripping away a revision, we need to make sure it
1220 1221 # does not actually belong to an older changeset.
1221 1222 # The minlink parameter defines the oldest revision
1222 1223 # we're allowed to strip away.
1223 1224 while minlink > self.index[rev][-4]:
1224 1225 rev += 1
1225 1226 if rev >= self.count():
1226 1227 return
1227 1228
1228 1229 # first truncate the files on disk
1229 1230 end = self.start(rev)
1230 1231 if not self.inlinedata():
1231 1232 df = self.opener(self.datafile, "a")
1232 1233 df.truncate(end)
1233 1234 end = rev * struct.calcsize(self.indexformat)
1234 1235 else:
1235 1236 end += rev * struct.calcsize(self.indexformat)
1236 1237
1237 1238 indexf = self.opener(self.indexfile, "a")
1238 1239 indexf.truncate(end)
1239 1240
1240 1241 # then reset internal state in memory to forget those revisions
1241 1242 self.cache = None
1242 1243 self.chunkcache = None
1243 1244 for x in xrange(rev, self.count()):
1244 1245 del self.nodemap[self.node(x)]
1245 1246
1246 1247 del self.index[rev:]
1247 1248
1248 1249 def checksize(self):
1249 1250 expected = 0
1250 1251 if self.count():
1251 1252 expected = self.end(self.count() - 1)
1252 1253
1253 1254 try:
1254 1255 f = self.opener(self.datafile)
1255 1256 f.seek(0, 2)
1256 1257 actual = f.tell()
1257 1258 dd = actual - expected
1258 1259 except IOError, inst:
1259 1260 if inst.errno != errno.ENOENT:
1260 1261 raise
1261 1262 dd = 0
1262 1263
1263 1264 try:
1264 1265 f = self.opener(self.indexfile)
1265 1266 f.seek(0, 2)
1266 1267 actual = f.tell()
1267 1268 s = struct.calcsize(self.indexformat)
1268 1269 i = actual / s
1269 1270 di = actual - (i * s)
1270 1271 if self.inlinedata():
1271 1272 databytes = 0
1272 1273 for r in xrange(self.count()):
1273 1274 databytes += self.length(r)
1274 1275 dd = 0
1275 1276 di = actual - self.count() * s - databytes
1276 1277 except IOError, inst:
1277 1278 if inst.errno != errno.ENOENT:
1278 1279 raise
1279 1280 di = 0
1280 1281
1281 1282 return (dd, di)
1282 1283
1283 1284
General Comments 0
You need to be logged in to leave comments. Login now