##// END OF EJS Templates
Add revlog.parentrevs function....
Alexis S. L. Carvalho -
r2489:568e58ee default
parent child Browse files
Show More
@@ -1,1272 +1,1279 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 data = self.dataf.read(blocksize)
143 143 lend = len(data) / self.s
144 144 i = blockstart / self.s
145 145 off = 0
146 146 for x in xrange(lend):
147 147 if self.index[i + x] == None:
148 148 b = data[off : off + self.s]
149 149 self.index[i + x] = b
150 150 n = b[self.shaoffset:self.shaoffset + 20]
151 151 self.map[n] = i + x
152 152 off += self.s
153 153
154 154 def findnode(self, node):
155 155 """search backwards through the index file for a specific node"""
156 156 if self.allmap: return None
157 157
158 158 # hg log will cause many many searches for the manifest
159 159 # nodes. After we get called a few times, just load the whole
160 160 # thing.
161 161 if self.mapfind_count > 8:
162 162 self.loadmap()
163 163 if node in self.map:
164 164 return node
165 165 return None
166 166 self.mapfind_count += 1
167 167 last = self.l - 1
168 168 while self.index[last] != None:
169 169 if last == 0:
170 170 self.all = 1
171 171 self.allmap = 1
172 172 return None
173 173 last -= 1
174 174 end = (last + 1) * self.s
175 175 blocksize = self.s * 256
176 176 while end >= 0:
177 177 start = max(end - blocksize, 0)
178 178 self.dataf.seek(start)
179 179 data = self.dataf.read(end - start)
180 180 findend = end - start
181 181 while True:
182 182 # we're searching backwards, so weh have to make sure
183 183 # we don't find a changeset where this node is a parent
184 184 off = data.rfind(node, 0, findend)
185 185 findend = off
186 186 if off >= 0:
187 187 i = off / self.s
188 188 off = i * self.s
189 189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 190 if n == node:
191 191 self.map[n] = i + start / self.s
192 192 return node
193 193 else:
194 194 break
195 195 end -= blocksize
196 196 return None
197 197
198 198 def loadindex(self, i=None, end=None):
199 199 if self.all: return
200 200 all = False
201 201 if i == None:
202 202 blockstart = 0
203 203 blocksize = (512 / self.s) * self.s
204 204 end = self.datasize
205 205 all = True
206 206 else:
207 207 if end:
208 208 blockstart = i * self.s
209 209 end = end * self.s
210 210 blocksize = end - blockstart
211 211 else:
212 212 blockstart = (i & ~(32)) * self.s
213 213 blocksize = self.s * 64
214 214 end = blockstart + blocksize
215 215 while blockstart < end:
216 216 self.loadblock(blockstart, blocksize)
217 217 blockstart += blocksize
218 218 if all: self.all = True
219 219
220 220 class lazyindex(object):
221 221 """a lazy version of the index array"""
222 222 def __init__(self, parser):
223 223 self.p = parser
224 224 def __len__(self):
225 225 return len(self.p.index)
226 226 def load(self, pos):
227 227 if pos < 0:
228 228 pos += len(self.p.index)
229 229 self.p.loadindex(pos)
230 230 return self.p.index[pos]
231 231 def __getitem__(self, pos):
232 232 ret = self.p.index[pos] or self.load(pos)
233 233 if isinstance(ret, str):
234 234 ret = struct.unpack(self.p.indexformat, ret)
235 235 return ret
236 236 def __setitem__(self, pos, item):
237 237 self.p.index[pos] = item
238 238 def __delitem__(self, pos):
239 239 del self.p.index[pos]
240 240 def append(self, e):
241 241 self.p.index.append(e)
242 242
243 243 class lazymap(object):
244 244 """a lazy version of the node map"""
245 245 def __init__(self, parser):
246 246 self.p = parser
247 247 def load(self, key):
248 248 n = self.p.findnode(key)
249 249 if n == None:
250 250 raise KeyError(key)
251 251 def __contains__(self, key):
252 252 if key in self.p.map:
253 253 return True
254 254 self.p.loadmap()
255 255 return key in self.p.map
256 256 def __iter__(self):
257 257 yield nullid
258 258 for i in xrange(self.p.l):
259 259 ret = self.p.index[i]
260 260 if not ret:
261 261 self.p.loadindex(i)
262 262 ret = self.p.index[i]
263 263 if isinstance(ret, str):
264 264 ret = struct.unpack(self.p.indexformat, ret)
265 265 yield ret[-1]
266 266 def __getitem__(self, key):
267 267 try:
268 268 return self.p.map[key]
269 269 except KeyError:
270 270 try:
271 271 self.load(key)
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 raise KeyError("node " + hex(key))
275 275 def __setitem__(self, key, val):
276 276 self.p.map[key] = val
277 277 def __delitem__(self, key):
278 278 del self.p.map[key]
279 279
280 280 class RevlogError(Exception): pass
281 281
282 282 class revlog(object):
283 283 """
284 284 the underlying revision storage object
285 285
286 286 A revlog consists of two parts, an index and the revision data.
287 287
288 288 The index is a file with a fixed record size containing
289 289 information on each revision, includings its nodeid (hash), the
290 290 nodeids of its parents, the position and offset of its data within
291 291 the data file, and the revision it's based on. Finally, each entry
292 292 contains a linkrev entry that can serve as a pointer to external
293 293 data.
294 294
295 295 The revision data itself is a linear collection of data chunks.
296 296 Each chunk represents a revision and is usually represented as a
297 297 delta against the previous chunk. To bound lookup time, runs of
298 298 deltas are limited to about 2 times the length of the original
299 299 version data. This makes retrieval of a version proportional to
300 300 its size, or O(1) relative to the number of revisions.
301 301
302 302 Both pieces of the revlog are written to in an append-only
303 303 fashion, which means we never need to rewrite a file to insert or
304 304 remove data, and can use some simple techniques to avoid the need
305 305 for locking while reading.
306 306 """
307 307 def __init__(self, opener, indexfile, datafile,
308 308 defversion=REVLOG_DEFAULT_VERSION):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314 """
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile
317 317 self.opener = opener
318 318
319 319 self.indexstat = None
320 320 self.cache = None
321 321 self.chunkcache = None
322 322 self.defversion = defversion
323 323 self.load()
324 324
325 325 def load(self):
326 326 v = self.defversion
327 327 try:
328 328 f = self.opener(self.indexfile)
329 329 i = f.read(4)
330 330 f.seek(0)
331 331 except IOError, inst:
332 332 if inst.errno != errno.ENOENT:
333 333 raise
334 334 i = ""
335 335 else:
336 336 try:
337 337 st = util.fstat(f)
338 338 except AttributeError, inst:
339 339 st = None
340 340 else:
341 341 oldst = self.indexstat
342 342 if (oldst and st.st_dev == oldst.st_dev
343 343 and st.st_ino == oldst.st_ino
344 344 and st.st_mtime == oldst.st_mtime
345 345 and st.st_ctime == oldst.st_ctime):
346 346 return
347 347 self.indexstat = st
348 348 if len(i) > 0:
349 349 v = struct.unpack(versionformat, i)[0]
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 355 (self.indexfile, flags)))
356 356 elif fmt == REVLOGNG:
357 357 if flags & ~REVLOGNGINLINEDATA:
358 358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 359 (self.indexfile, flags)))
360 360 else:
361 361 raise RevlogError(_("index %s invalid format %d" %
362 362 (self.indexfile, fmt)))
363 363 self.version = v
364 364 if v == REVLOGV0:
365 365 self.indexformat = indexformatv0
366 366 shaoffset = v0shaoffset
367 367 else:
368 368 self.indexformat = indexformatng
369 369 shaoffset = ngshaoffset
370 370
371 371 if i:
372 372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 373 st and st.st_size > 10000):
374 374 # big index, let's parse it on demand
375 375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 376 self.index = lazyindex(parser)
377 377 self.nodemap = lazymap(parser)
378 378 else:
379 379 self.parseindex(f, st)
380 380 if self.version != REVLOGV0:
381 381 e = list(self.index[0])
382 382 type = self.ngtype(e[0])
383 383 e[0] = self.offset_type(0, type)
384 384 self.index[0] = e
385 385 else:
386 386 self.nodemap = { nullid: -1}
387 387 self.index = []
388 388
389 389
390 390 def parseindex(self, fp, st):
391 391 s = struct.calcsize(self.indexformat)
392 392 self.index = []
393 393 self.nodemap = {nullid: -1}
394 394 inline = self.inlinedata()
395 395 n = 0
396 396 leftover = None
397 397 while True:
398 398 if st:
399 399 data = fp.read(65536)
400 400 else:
401 401 # hack for httprangereader, it doesn't do partial reads well
402 402 data = fp.read()
403 403 if not data:
404 404 break
405 405 if n == 0 and self.inlinedata():
406 406 # cache the first chunk
407 407 self.chunkcache = (0, data)
408 408 if leftover:
409 409 data = leftover + data
410 410 leftover = None
411 411 off = 0
412 412 l = len(data)
413 413 while off < l:
414 414 if l - off < s:
415 415 leftover = data[off:]
416 416 break
417 417 cur = data[off:off + s]
418 418 off += s
419 419 e = struct.unpack(self.indexformat, cur)
420 420 self.index.append(e)
421 421 self.nodemap[e[-1]] = n
422 422 n += 1
423 423 if inline:
424 424 off += e[1]
425 425 if off > l:
426 426 # some things don't seek well, just read it
427 427 fp.read(off - l)
428 428 if not st:
429 429 break
430 430
431 431
432 432 def ngoffset(self, q):
433 433 if q & 0xFFFF:
434 434 raise RevlogError(_('%s: incompatible revision flag %x') %
435 435 (self.indexfile, q))
436 436 return long(q >> 16)
437 437
438 438 def ngtype(self, q):
439 439 return int(q & 0xFFFF)
440 440
441 441 def offset_type(self, offset, type):
442 442 return long(long(offset) << 16 | type)
443 443
444 444 def loadindex(self, start, end):
445 445 """load a block of indexes all at once from the lazy parser"""
446 446 if isinstance(self.index, lazyindex):
447 447 self.index.p.loadindex(start, end)
448 448
449 449 def loadindexmap(self):
450 450 """loads both the map and the index from the lazy parser"""
451 451 if isinstance(self.index, lazyindex):
452 452 p = self.index.p
453 453 p.loadindex()
454 454 self.nodemap = p.map
455 455
456 456 def loadmap(self):
457 457 """loads the map from the lazy parser"""
458 458 if isinstance(self.nodemap, lazymap):
459 459 self.nodemap.p.loadmap()
460 460 self.nodemap = self.nodemap.p.map
461 461
462 462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 463 def tip(self): return self.node(len(self.index) - 1)
464 464 def count(self): return len(self.index)
465 465 def node(self, rev):
466 466 return (rev < 0) and nullid or self.index[rev][-1]
467 467 def rev(self, node):
468 468 try:
469 469 return self.nodemap[node]
470 470 except KeyError:
471 471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 472 def linkrev(self, node): return self.index[self.rev(node)][-4]
473 473 def parents(self, node):
474 474 if node == nullid: return (nullid, nullid)
475 475 r = self.rev(node)
476 476 d = self.index[r][-3:-1]
477 477 if self.version == REVLOGV0:
478 478 return d
479 479 return [ self.node(x) for x in d ]
480 def parentrevs(self, rev):
481 if rev == -1:
482 return (-1, -1)
483 d = self.index[rev][-3:-1]
484 if self.version == REVLOGV0:
485 return [ self.rev(x) for x in d ]
486 return d
480 487 def start(self, rev):
481 488 if rev < 0:
482 489 return -1
483 490 if self.version != REVLOGV0:
484 491 return self.ngoffset(self.index[rev][0])
485 492 return self.index[rev][0]
486 493
487 494 def end(self, rev): return self.start(rev) + self.length(rev)
488 495
489 496 def size(self, rev):
490 497 """return the length of the uncompressed text for a given revision"""
491 498 l = -1
492 499 if self.version != REVLOGV0:
493 500 l = self.index[rev][2]
494 501 if l >= 0:
495 502 return l
496 503
497 504 t = self.revision(self.node(rev))
498 505 return len(t)
499 506
500 507 # alternate implementation, The advantage to this code is it
501 508 # will be faster for a single revision. But, the results are not
502 509 # cached, so finding the size of every revision will be slower.
503 510 """
504 511 if self.cache and self.cache[1] == rev:
505 512 return len(self.cache[2])
506 513
507 514 base = self.base(rev)
508 515 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
509 516 base = self.cache[1]
510 517 text = self.cache[2]
511 518 else:
512 519 text = self.revision(self.node(base))
513 520
514 521 l = len(text)
515 522 for x in xrange(base + 1, rev + 1):
516 523 l = mdiff.patchedsize(l, self.chunk(x))
517 524 return l
518 525 """
519 526
520 527 def length(self, rev):
521 528 if rev < 0:
522 529 return 0
523 530 else:
524 531 return self.index[rev][1]
525 532 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
526 533
527 534 def reachable(self, rev, stop=None):
528 535 reachable = {}
529 536 visit = [rev]
530 537 reachable[rev] = 1
531 538 if stop:
532 539 stopn = self.rev(stop)
533 540 else:
534 541 stopn = 0
535 542 while visit:
536 543 n = visit.pop(0)
537 544 if n == stop:
538 545 continue
539 546 if n == nullid:
540 547 continue
541 548 for p in self.parents(n):
542 549 if self.rev(p) < stopn:
543 550 continue
544 551 if p not in reachable:
545 552 reachable[p] = 1
546 553 visit.append(p)
547 554 return reachable
548 555
549 556 def nodesbetween(self, roots=None, heads=None):
550 557 """Return a tuple containing three elements. Elements 1 and 2 contain
551 558 a final list bases and heads after all the unreachable ones have been
552 559 pruned. Element 0 contains a topologically sorted list of all
553 560
554 561 nodes that satisfy these constraints:
555 562 1. All nodes must be descended from a node in roots (the nodes on
556 563 roots are considered descended from themselves).
557 564 2. All nodes must also be ancestors of a node in heads (the nodes in
558 565 heads are considered to be their own ancestors).
559 566
560 567 If roots is unspecified, nullid is assumed as the only root.
561 568 If heads is unspecified, it is taken to be the output of the
562 569 heads method (i.e. a list of all nodes in the repository that
563 570 have no children)."""
564 571 nonodes = ([], [], [])
565 572 if roots is not None:
566 573 roots = list(roots)
567 574 if not roots:
568 575 return nonodes
569 576 lowestrev = min([self.rev(n) for n in roots])
570 577 else:
571 578 roots = [nullid] # Everybody's a descendent of nullid
572 579 lowestrev = -1
573 580 if (lowestrev == -1) and (heads is None):
574 581 # We want _all_ the nodes!
575 582 return ([self.node(r) for r in xrange(0, self.count())],
576 583 [nullid], list(self.heads()))
577 584 if heads is None:
578 585 # All nodes are ancestors, so the latest ancestor is the last
579 586 # node.
580 587 highestrev = self.count() - 1
581 588 # Set ancestors to None to signal that every node is an ancestor.
582 589 ancestors = None
583 590 # Set heads to an empty dictionary for later discovery of heads
584 591 heads = {}
585 592 else:
586 593 heads = list(heads)
587 594 if not heads:
588 595 return nonodes
589 596 ancestors = {}
590 597 # Start at the top and keep marking parents until we're done.
591 598 nodestotag = heads[:]
592 599 # Turn heads into a dictionary so we can remove 'fake' heads.
593 600 # Also, later we will be using it to filter out the heads we can't
594 601 # find from roots.
595 602 heads = dict.fromkeys(heads, 0)
596 603 # Remember where the top was so we can use it as a limit later.
597 604 highestrev = max([self.rev(n) for n in nodestotag])
598 605 while nodestotag:
599 606 # grab a node to tag
600 607 n = nodestotag.pop()
601 608 # Never tag nullid
602 609 if n == nullid:
603 610 continue
604 611 # A node's revision number represents its place in a
605 612 # topologically sorted list of nodes.
606 613 r = self.rev(n)
607 614 if r >= lowestrev:
608 615 if n not in ancestors:
609 616 # If we are possibly a descendent of one of the roots
610 617 # and we haven't already been marked as an ancestor
611 618 ancestors[n] = 1 # Mark as ancestor
612 619 # Add non-nullid parents to list of nodes to tag.
613 620 nodestotag.extend([p for p in self.parents(n) if
614 621 p != nullid])
615 622 elif n in heads: # We've seen it before, is it a fake head?
616 623 # So it is, real heads should not be the ancestors of
617 624 # any other heads.
618 625 heads.pop(n)
619 626 if not ancestors:
620 627 return nonodes
621 628 # Now that we have our set of ancestors, we want to remove any
622 629 # roots that are not ancestors.
623 630
624 631 # If one of the roots was nullid, everything is included anyway.
625 632 if lowestrev > -1:
626 633 # But, since we weren't, let's recompute the lowest rev to not
627 634 # include roots that aren't ancestors.
628 635
629 636 # Filter out roots that aren't ancestors of heads
630 637 roots = [n for n in roots if n in ancestors]
631 638 # Recompute the lowest revision
632 639 if roots:
633 640 lowestrev = min([self.rev(n) for n in roots])
634 641 else:
635 642 # No more roots? Return empty list
636 643 return nonodes
637 644 else:
638 645 # We are descending from nullid, and don't need to care about
639 646 # any other roots.
640 647 lowestrev = -1
641 648 roots = [nullid]
642 649 # Transform our roots list into a 'set' (i.e. a dictionary where the
643 650 # values don't matter.
644 651 descendents = dict.fromkeys(roots, 1)
645 652 # Also, keep the original roots so we can filter out roots that aren't
646 653 # 'real' roots (i.e. are descended from other roots).
647 654 roots = descendents.copy()
648 655 # Our topologically sorted list of output nodes.
649 656 orderedout = []
650 657 # Don't start at nullid since we don't want nullid in our output list,
651 658 # and if nullid shows up in descedents, empty parents will look like
652 659 # they're descendents.
653 660 for r in xrange(max(lowestrev, 0), highestrev + 1):
654 661 n = self.node(r)
655 662 isdescendent = False
656 663 if lowestrev == -1: # Everybody is a descendent of nullid
657 664 isdescendent = True
658 665 elif n in descendents:
659 666 # n is already a descendent
660 667 isdescendent = True
661 668 # This check only needs to be done here because all the roots
662 669 # will start being marked is descendents before the loop.
663 670 if n in roots:
664 671 # If n was a root, check if it's a 'real' root.
665 672 p = tuple(self.parents(n))
666 673 # If any of its parents are descendents, it's not a root.
667 674 if (p[0] in descendents) or (p[1] in descendents):
668 675 roots.pop(n)
669 676 else:
670 677 p = tuple(self.parents(n))
671 678 # A node is a descendent if either of its parents are
672 679 # descendents. (We seeded the dependents list with the roots
673 680 # up there, remember?)
674 681 if (p[0] in descendents) or (p[1] in descendents):
675 682 descendents[n] = 1
676 683 isdescendent = True
677 684 if isdescendent and ((ancestors is None) or (n in ancestors)):
678 685 # Only include nodes that are both descendents and ancestors.
679 686 orderedout.append(n)
680 687 if (ancestors is not None) and (n in heads):
681 688 # We're trying to figure out which heads are reachable
682 689 # from roots.
683 690 # Mark this head as having been reached
684 691 heads[n] = 1
685 692 elif ancestors is None:
686 693 # Otherwise, we're trying to discover the heads.
687 694 # Assume this is a head because if it isn't, the next step
688 695 # will eventually remove it.
689 696 heads[n] = 1
690 697 # But, obviously its parents aren't.
691 698 for p in self.parents(n):
692 699 heads.pop(p, None)
693 700 heads = [n for n in heads.iterkeys() if heads[n] != 0]
694 701 roots = roots.keys()
695 702 assert orderedout
696 703 assert roots
697 704 assert heads
698 705 return (orderedout, roots, heads)
699 706
700 707 def heads(self, start=None):
701 708 """return the list of all nodes that have no children
702 709
703 710 if start is specified, only heads that are descendants of
704 711 start will be returned
705 712
706 713 """
707 714 if start is None:
708 715 start = nullid
709 716 reachable = {start: 1}
710 717 heads = {start: 1}
711 718 startrev = self.rev(start)
712 719
713 720 for r in xrange(startrev + 1, self.count()):
714 721 n = self.node(r)
715 722 for pn in self.parents(n):
716 723 if pn in reachable:
717 724 reachable[n] = 1
718 725 heads[n] = 1
719 726 if pn in heads:
720 727 del heads[pn]
721 728 return heads.keys()
722 729
723 730 def children(self, node):
724 731 """find the children of a given node"""
725 732 c = []
726 733 p = self.rev(node)
727 734 for r in range(p + 1, self.count()):
728 735 n = self.node(r)
729 736 for pn in self.parents(n):
730 737 if pn == node:
731 738 c.append(n)
732 739 continue
733 740 elif pn == nullid:
734 741 continue
735 742 return c
736 743
737 744 def lookup(self, id):
738 745 """locate a node based on revision number or subset of hex nodeid"""
739 746 try:
740 747 rev = int(id)
741 748 if str(rev) != id: raise ValueError
742 749 if rev < 0: rev = self.count() + rev
743 750 if rev < 0 or rev >= self.count(): raise ValueError
744 751 return self.node(rev)
745 752 except (ValueError, OverflowError):
746 753 c = []
747 754 for n in self.nodemap:
748 755 if hex(n).startswith(id):
749 756 c.append(n)
750 757 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
751 758 if len(c) < 1: raise RevlogError(_("No match found"))
752 759 return c[0]
753 760
754 761 return None
755 762
756 763 def diff(self, a, b):
757 764 """return a delta between two revisions"""
758 765 return mdiff.textdiff(a, b)
759 766
760 767 def patches(self, t, pl):
761 768 """apply a list of patches to a string"""
762 769 return mdiff.patches(t, pl)
763 770
764 771 def chunk(self, rev, df=None, cachelen=4096):
765 772 start, length = self.start(rev), self.length(rev)
766 773 inline = self.inlinedata()
767 774 if inline:
768 775 start += (rev + 1) * struct.calcsize(self.indexformat)
769 776 end = start + length
770 777 def loadcache(df):
771 778 cache_length = max(cachelen, length) # 4k
772 779 if not df:
773 780 if inline:
774 781 df = self.opener(self.indexfile)
775 782 else:
776 783 df = self.opener(self.datafile)
777 784 df.seek(start)
778 785 self.chunkcache = (start, df.read(cache_length))
779 786
780 787 if not self.chunkcache:
781 788 loadcache(df)
782 789
783 790 cache_start = self.chunkcache[0]
784 791 cache_end = cache_start + len(self.chunkcache[1])
785 792 if start >= cache_start and end <= cache_end:
786 793 # it is cached
787 794 offset = start - cache_start
788 795 else:
789 796 loadcache(df)
790 797 offset = 0
791 798
792 799 #def checkchunk():
793 800 # df = self.opener(self.datafile)
794 801 # df.seek(start)
795 802 # return df.read(length)
796 803 #assert s == checkchunk()
797 804 return decompress(self.chunkcache[1][offset:offset + length])
798 805
799 806 def delta(self, node):
800 807 """return or calculate a delta between a node and its predecessor"""
801 808 r = self.rev(node)
802 809 return self.revdiff(r - 1, r)
803 810
804 811 def revdiff(self, rev1, rev2):
805 812 """return or calculate a delta between two revisions"""
806 813 b1 = self.base(rev1)
807 814 b2 = self.base(rev2)
808 815 if b1 == b2 and rev1 + 1 == rev2:
809 816 return self.chunk(rev2)
810 817 else:
811 818 return self.diff(self.revision(self.node(rev1)),
812 819 self.revision(self.node(rev2)))
813 820
814 821 def revision(self, node):
815 822 """return an uncompressed revision of a given"""
816 823 if node == nullid: return ""
817 824 if self.cache and self.cache[0] == node: return self.cache[2]
818 825
819 826 # look up what we need to read
820 827 text = None
821 828 rev = self.rev(node)
822 829 base = self.base(rev)
823 830
824 831 if self.inlinedata():
825 832 # we probably have the whole chunk cached
826 833 df = None
827 834 else:
828 835 df = self.opener(self.datafile)
829 836
830 837 # do we have useful data cached?
831 838 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
832 839 base = self.cache[1]
833 840 text = self.cache[2]
834 841 self.loadindex(base, rev + 1)
835 842 else:
836 843 self.loadindex(base, rev + 1)
837 844 text = self.chunk(base, df=df)
838 845
839 846 bins = []
840 847 for r in xrange(base + 1, rev + 1):
841 848 bins.append(self.chunk(r, df=df))
842 849
843 850 text = self.patches(text, bins)
844 851
845 852 p1, p2 = self.parents(node)
846 853 if node != hash(text, p1, p2):
847 854 raise RevlogError(_("integrity check failed on %s:%d")
848 855 % (self.datafile, rev))
849 856
850 857 self.cache = (node, rev, text)
851 858 return text
852 859
853 860 def checkinlinesize(self, tr, fp=None):
854 861 if not self.inlinedata():
855 862 return
856 863 if not fp:
857 864 fp = self.opener(self.indexfile, 'r')
858 865 fp.seek(0, 2)
859 866 size = fp.tell()
860 867 if size < 131072:
861 868 return
862 869 trinfo = tr.find(self.indexfile)
863 870 if trinfo == None:
864 871 raise RevlogError(_("%s not found in the transaction" %
865 872 self.indexfile))
866 873
867 874 trindex = trinfo[2]
868 875 dataoff = self.start(trindex)
869 876
870 877 tr.add(self.datafile, dataoff)
871 878 df = self.opener(self.datafile, 'w')
872 879 calc = struct.calcsize(self.indexformat)
873 880 for r in xrange(self.count()):
874 881 start = self.start(r) + (r + 1) * calc
875 882 length = self.length(r)
876 883 fp.seek(start)
877 884 d = fp.read(length)
878 885 df.write(d)
879 886 fp.close()
880 887 df.close()
881 888 fp = self.opener(self.indexfile, 'w', atomictemp=True)
882 889 self.version &= ~(REVLOGNGINLINEDATA)
883 890 if self.count():
884 891 x = self.index[0]
885 892 e = struct.pack(self.indexformat, *x)[4:]
886 893 l = struct.pack(versionformat, self.version)
887 894 fp.write(l)
888 895 fp.write(e)
889 896
890 897 for i in xrange(1, self.count()):
891 898 x = self.index[i]
892 899 e = struct.pack(self.indexformat, *x)
893 900 fp.write(e)
894 901
895 902 # if we don't call rename, the temp file will never replace the
896 903 # real index
897 904 fp.rename()
898 905
899 906 tr.replace(self.indexfile, trindex * calc)
900 907 self.chunkcache = None
901 908
902 909 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
903 910 """add a revision to the log
904 911
905 912 text - the revision data to add
906 913 transaction - the transaction object used for rollback
907 914 link - the linkrev data to add
908 915 p1, p2 - the parent nodeids of the revision
909 916 d - an optional precomputed delta
910 917 """
911 918 if text is None: text = ""
912 919 if p1 is None: p1 = self.tip()
913 920 if p2 is None: p2 = nullid
914 921
915 922 node = hash(text, p1, p2)
916 923
917 924 if node in self.nodemap:
918 925 return node
919 926
920 927 n = self.count()
921 928 t = n - 1
922 929
923 930 if n:
924 931 base = self.base(t)
925 932 start = self.start(base)
926 933 end = self.end(t)
927 934 if not d:
928 935 prev = self.revision(self.tip())
929 936 d = self.diff(prev, str(text))
930 937 data = compress(d)
931 938 l = len(data[1]) + len(data[0])
932 939 dist = end - start + l
933 940
934 941 # full versions are inserted when the needed deltas
935 942 # become comparable to the uncompressed text
936 943 if not n or dist > len(text) * 2:
937 944 data = compress(text)
938 945 l = len(data[1]) + len(data[0])
939 946 base = n
940 947 else:
941 948 base = self.base(t)
942 949
943 950 offset = 0
944 951 if t >= 0:
945 952 offset = self.end(t)
946 953
947 954 if self.version == REVLOGV0:
948 955 e = (offset, l, base, link, p1, p2, node)
949 956 else:
950 957 e = (self.offset_type(offset, 0), l, len(text),
951 958 base, link, self.rev(p1), self.rev(p2), node)
952 959
953 960 self.index.append(e)
954 961 self.nodemap[node] = n
955 962 entry = struct.pack(self.indexformat, *e)
956 963
957 964 if not self.inlinedata():
958 965 transaction.add(self.datafile, offset)
959 966 transaction.add(self.indexfile, n * len(entry))
960 967 f = self.opener(self.datafile, "a")
961 968 if data[0]:
962 969 f.write(data[0])
963 970 f.write(data[1])
964 971 f.close()
965 972 f = self.opener(self.indexfile, "a")
966 973 else:
967 974 f = self.opener(self.indexfile, "a+")
968 975 f.seek(0, 2)
969 976 transaction.add(self.indexfile, f.tell(), self.count() - 1)
970 977
971 978 if len(self.index) == 1 and self.version != REVLOGV0:
972 979 l = struct.pack(versionformat, self.version)
973 980 f.write(l)
974 981 entry = entry[4:]
975 982
976 983 f.write(entry)
977 984
978 985 if self.inlinedata():
979 986 f.write(data[0])
980 987 f.write(data[1])
981 988 self.checkinlinesize(transaction, f)
982 989
983 990 self.cache = (node, n, text)
984 991 return node
985 992
986 993 def ancestor(self, a, b):
987 994 """calculate the least common ancestor of nodes a and b"""
988 995
989 996 # start with some short cuts for the linear cases
990 997 if a == b:
991 998 return a
992 999 ra = self.rev(a)
993 1000 rb = self.rev(b)
994 1001 if ra < rb:
995 1002 last = b
996 1003 first = a
997 1004 else:
998 1005 last = a
999 1006 first = b
1000 1007
1001 1008 # reachable won't include stop in the list, so we have to use a parent
1002 1009 reachable = self.reachable(last, stop=self.parents(first)[0])
1003 1010 if first in reachable:
1004 1011 return first
1005 1012
1006 1013 # calculate the distance of every node from root
1007 1014 dist = {nullid: 0}
1008 1015 for i in xrange(self.count()):
1009 1016 n = self.node(i)
1010 1017 p1, p2 = self.parents(n)
1011 1018 dist[n] = max(dist[p1], dist[p2]) + 1
1012 1019
1013 1020 # traverse ancestors in order of decreasing distance from root
1014 1021 def ancestors(node):
1015 1022 # we store negative distances because heap returns smallest member
1016 1023 h = [(-dist[node], node)]
1017 1024 seen = {}
1018 1025 while h:
1019 1026 d, n = heapq.heappop(h)
1020 1027 if n not in seen:
1021 1028 seen[n] = 1
1022 1029 yield (-d, n)
1023 1030 for p in self.parents(n):
1024 1031 heapq.heappush(h, (-dist[p], p))
1025 1032
1026 1033 def generations(node):
1027 1034 sg, s = None, {}
1028 1035 for g,n in ancestors(node):
1029 1036 if g != sg:
1030 1037 if sg:
1031 1038 yield sg, s
1032 1039 sg, s = g, {n:1}
1033 1040 else:
1034 1041 s[n] = 1
1035 1042 yield sg, s
1036 1043
1037 1044 x = generations(a)
1038 1045 y = generations(b)
1039 1046 gx = x.next()
1040 1047 gy = y.next()
1041 1048
1042 1049 # increment each ancestor list until it is closer to root than
1043 1050 # the other, or they match
1044 1051 while 1:
1045 1052 #print "ancestor gen %s %s" % (gx[0], gy[0])
1046 1053 if gx[0] == gy[0]:
1047 1054 # find the intersection
1048 1055 i = [ n for n in gx[1] if n in gy[1] ]
1049 1056 if i:
1050 1057 return i[0]
1051 1058 else:
1052 1059 #print "next"
1053 1060 gy = y.next()
1054 1061 gx = x.next()
1055 1062 elif gx[0] < gy[0]:
1056 1063 #print "next y"
1057 1064 gy = y.next()
1058 1065 else:
1059 1066 #print "next x"
1060 1067 gx = x.next()
1061 1068
1062 1069 def group(self, nodelist, lookup, infocollect=None):
1063 1070 """calculate a delta group
1064 1071
1065 1072 Given a list of changeset revs, return a set of deltas and
1066 1073 metadata corresponding to nodes. the first delta is
1067 1074 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1068 1075 have this parent as it has all history before these
1069 1076 changesets. parent is parent[0]
1070 1077 """
1071 1078 revs = [self.rev(n) for n in nodelist]
1072 1079
1073 1080 # if we don't have any revisions touched by these changesets, bail
1074 1081 if not revs:
1075 1082 yield changegroup.closechunk()
1076 1083 return
1077 1084
1078 1085 # add the parent of the first rev
1079 1086 p = self.parents(self.node(revs[0]))[0]
1080 1087 revs.insert(0, self.rev(p))
1081 1088
1082 1089 # build deltas
1083 1090 for d in xrange(0, len(revs) - 1):
1084 1091 a, b = revs[d], revs[d + 1]
1085 1092 nb = self.node(b)
1086 1093
1087 1094 if infocollect is not None:
1088 1095 infocollect(nb)
1089 1096
1090 1097 d = self.revdiff(a, b)
1091 1098 p = self.parents(nb)
1092 1099 meta = nb + p[0] + p[1] + lookup(nb)
1093 1100 yield changegroup.genchunk("%s%s" % (meta, d))
1094 1101
1095 1102 yield changegroup.closechunk()
1096 1103
1097 1104 def addgroup(self, revs, linkmapper, transaction, unique=0):
1098 1105 """
1099 1106 add a delta group
1100 1107
1101 1108 given a set of deltas, add them to the revision log. the
1102 1109 first delta is against its parent, which should be in our
1103 1110 log, the rest are against the previous delta.
1104 1111 """
1105 1112
1106 1113 #track the base of the current delta log
1107 1114 r = self.count()
1108 1115 t = r - 1
1109 1116 node = None
1110 1117
1111 1118 base = prev = -1
1112 1119 start = end = textlen = 0
1113 1120 if r:
1114 1121 end = self.end(t)
1115 1122
1116 1123 ifh = self.opener(self.indexfile, "a+")
1117 1124 ifh.seek(0, 2)
1118 1125 transaction.add(self.indexfile, ifh.tell(), self.count())
1119 1126 if self.inlinedata():
1120 1127 dfh = None
1121 1128 else:
1122 1129 transaction.add(self.datafile, end)
1123 1130 dfh = self.opener(self.datafile, "a")
1124 1131
1125 1132 # loop through our set of deltas
1126 1133 chain = None
1127 1134 for chunk in revs:
1128 1135 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1129 1136 link = linkmapper(cs)
1130 1137 if node in self.nodemap:
1131 1138 # this can happen if two branches make the same change
1132 1139 # if unique:
1133 1140 # raise RevlogError(_("already have %s") % hex(node[:4]))
1134 1141 chain = node
1135 1142 continue
1136 1143 delta = chunk[80:]
1137 1144
1138 1145 for p in (p1, p2):
1139 1146 if not p in self.nodemap:
1140 1147 raise RevlogError(_("unknown parent %s") % short(p))
1141 1148
1142 1149 if not chain:
1143 1150 # retrieve the parent revision of the delta chain
1144 1151 chain = p1
1145 1152 if not chain in self.nodemap:
1146 1153 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1147 1154
1148 1155 # full versions are inserted when the needed deltas become
1149 1156 # comparable to the uncompressed text or when the previous
1150 1157 # version is not the one we have a delta against. We use
1151 1158 # the size of the previous full rev as a proxy for the
1152 1159 # current size.
1153 1160
1154 1161 if chain == prev:
1155 1162 tempd = compress(delta)
1156 1163 cdelta = tempd[0] + tempd[1]
1157 1164 textlen = mdiff.patchedsize(textlen, delta)
1158 1165
1159 1166 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1160 1167 # flush our writes here so we can read it in revision
1161 1168 if dfh:
1162 1169 dfh.flush()
1163 1170 ifh.flush()
1164 1171 text = self.revision(chain)
1165 1172 text = self.patches(text, [delta])
1166 1173 chk = self.addrevision(text, transaction, link, p1, p2)
1167 1174 if chk != node:
1168 1175 raise RevlogError(_("consistency error adding group"))
1169 1176 textlen = len(text)
1170 1177 else:
1171 1178 if self.version == REVLOGV0:
1172 1179 e = (end, len(cdelta), base, link, p1, p2, node)
1173 1180 else:
1174 1181 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1175 1182 link, self.rev(p1), self.rev(p2), node)
1176 1183 self.index.append(e)
1177 1184 self.nodemap[node] = r
1178 1185 if self.inlinedata():
1179 1186 ifh.write(struct.pack(self.indexformat, *e))
1180 1187 ifh.write(cdelta)
1181 1188 self.checkinlinesize(transaction, ifh)
1182 1189 if not self.inlinedata():
1183 1190 dfh = self.opener(self.datafile, "a")
1184 1191 ifh = self.opener(self.indexfile, "a")
1185 1192 else:
1186 1193 if not dfh:
1187 1194 # addrevision switched from inline to conventional
1188 1195 # reopen the index
1189 1196 dfh = self.opener(self.datafile, "a")
1190 1197 ifh = self.opener(self.indexfile, "a")
1191 1198 dfh.write(cdelta)
1192 1199 ifh.write(struct.pack(self.indexformat, *e))
1193 1200
1194 1201 t, r, chain, prev = r, r + 1, node, node
1195 1202 base = self.base(t)
1196 1203 start = self.start(base)
1197 1204 end = self.end(t)
1198 1205
1199 1206 return node
1200 1207
1201 1208 def strip(self, rev, minlink):
1202 1209 if self.count() == 0 or rev >= self.count():
1203 1210 return
1204 1211
1205 1212 if isinstance(self.index, lazyindex):
1206 1213 self.loadindexmap()
1207 1214
1208 1215 # When stripping away a revision, we need to make sure it
1209 1216 # does not actually belong to an older changeset.
1210 1217 # The minlink parameter defines the oldest revision
1211 1218 # we're allowed to strip away.
1212 1219 while minlink > self.index[rev][-4]:
1213 1220 rev += 1
1214 1221 if rev >= self.count():
1215 1222 return
1216 1223
1217 1224 # first truncate the files on disk
1218 1225 end = self.start(rev)
1219 1226 if not self.inlinedata():
1220 1227 df = self.opener(self.datafile, "a")
1221 1228 df.truncate(end)
1222 1229 end = rev * struct.calcsize(self.indexformat)
1223 1230 else:
1224 1231 end += rev * struct.calcsize(self.indexformat)
1225 1232
1226 1233 indexf = self.opener(self.indexfile, "a")
1227 1234 indexf.truncate(end)
1228 1235
1229 1236 # then reset internal state in memory to forget those revisions
1230 1237 self.cache = None
1231 1238 self.chunkcache = None
1232 1239 for x in xrange(rev, self.count()):
1233 1240 del self.nodemap[self.node(x)]
1234 1241
1235 1242 del self.index[rev:]
1236 1243
1237 1244 def checksize(self):
1238 1245 expected = 0
1239 1246 if self.count():
1240 1247 expected = self.end(self.count() - 1)
1241 1248
1242 1249 try:
1243 1250 f = self.opener(self.datafile)
1244 1251 f.seek(0, 2)
1245 1252 actual = f.tell()
1246 1253 dd = actual - expected
1247 1254 except IOError, inst:
1248 1255 if inst.errno != errno.ENOENT:
1249 1256 raise
1250 1257 dd = 0
1251 1258
1252 1259 try:
1253 1260 f = self.opener(self.indexfile)
1254 1261 f.seek(0, 2)
1255 1262 actual = f.tell()
1256 1263 s = struct.calcsize(self.indexformat)
1257 1264 i = actual / s
1258 1265 di = actual - (i * s)
1259 1266 if self.inlinedata():
1260 1267 databytes = 0
1261 1268 for r in xrange(self.count()):
1262 1269 databytes += self.length(r)
1263 1270 dd = 0
1264 1271 di = actual - self.count() * s - databytes
1265 1272 except IOError, inst:
1266 1273 if inst.errno != errno.ENOENT:
1267 1274 raise
1268 1275 di = 0
1269 1276
1270 1277 return (dd, di)
1271 1278
1272 1279
General Comments 0
You need to be logged in to leave comments. Login now