##// END OF EJS Templates
Fix revlog.children so the real children of the null revision can be calculated.
Thomas Arendsen Hein -
r4746:62c56d8f default
parent child Browse files
Show More
@@ -1,1297 +1,1301 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text: return ("", text)
46 46 if len(text) < 44:
47 47 if text[0] == '\0': return ("", text)
48 48 return ('u', text)
49 49 bin = zlib.compress(text)
50 50 if len(bin) > len(text):
51 51 if text[0] == '\0': return ("", text)
52 52 return ('u', text)
53 53 return ("", bin)
54 54
55 55 def decompress(bin):
56 56 """ decompress the given input """
57 57 if not bin: return bin
58 58 t = bin[0]
59 59 if t == '\0': return bin
60 60 if t == 'x': return zlib.decompress(bin)
61 61 if t == 'u': return bin[1:]
62 62 raise RevlogError(_("unknown compression type %r") % t)
63 63
64 64 indexformatv0 = ">4l20s20s20s"
65 65 v0shaoffset = 56
66 66 # index ng:
67 67 # 6 bytes offset
68 68 # 2 bytes flags
69 69 # 4 bytes compressed length
70 70 # 4 bytes uncompressed length
71 71 # 4 bytes: base rev
72 72 # 4 bytes link rev
73 73 # 4 bytes parent 1 rev
74 74 # 4 bytes parent 2 rev
75 75 # 32 bytes: nodeid
76 76 indexformatng = ">Qiiiiii20s12x"
77 77 ngshaoffset = 32
78 78 versionformat = ">I"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83 """
84 84
85 85 # lazyparser is not safe to use on windows if win32 extensions not
86 86 # available. it keeps file handle open, which make it not possible
87 87 # to break hardlinks on local cloned repos.
88 88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 89 hasattr(util, 'win32api'))
90 90
91 91 def __init__(self, dataf, size, indexformat, shaoffset):
92 92 self.dataf = dataf
93 93 self.format = indexformat
94 94 self.s = struct.calcsize(indexformat)
95 95 self.indexformat = indexformat
96 96 self.datasize = size
97 97 self.l = size/self.s
98 98 self.index = [None] * self.l
99 99 self.map = {nullid: nullrev}
100 100 self.allmap = 0
101 101 self.all = 0
102 102 self.mapfind_count = 0
103 103 self.shaoffset = shaoffset
104 104
105 105 def loadmap(self):
106 106 """
107 107 during a commit, we need to make sure the rev being added is
108 108 not a duplicate. This requires loading the entire index,
109 109 which is fairly slow. loadmap can load up just the node map,
110 110 which takes much less time.
111 111 """
112 112 if self.allmap: return
113 113 end = self.datasize
114 114 self.allmap = 1
115 115 cur = 0
116 116 count = 0
117 117 blocksize = self.s * 256
118 118 self.dataf.seek(0)
119 119 while cur < end:
120 120 data = self.dataf.read(blocksize)
121 121 off = 0
122 122 for x in xrange(256):
123 123 n = data[off + self.shaoffset:off + self.shaoffset + 20]
124 124 self.map[n] = count
125 125 count += 1
126 126 if count >= self.l:
127 127 break
128 128 off += self.s
129 129 cur += blocksize
130 130
131 131 def loadblock(self, blockstart, blocksize, data=None):
132 132 if self.all: return
133 133 if data is None:
134 134 self.dataf.seek(blockstart)
135 135 if blockstart + blocksize > self.datasize:
136 136 # the revlog may have grown since we've started running,
137 137 # but we don't have space in self.index for more entries.
138 138 # limit blocksize so that we don't get too much data.
139 139 blocksize = max(self.datasize - blockstart, 0)
140 140 data = self.dataf.read(blocksize)
141 141 lend = len(data) / self.s
142 142 i = blockstart / self.s
143 143 off = 0
144 144 # lazyindex supports __delitem__
145 145 if lend > len(self.index) - i:
146 146 lend = len(self.index) - i
147 147 for x in xrange(lend):
148 148 if self.index[i + x] == None:
149 149 b = data[off : off + self.s]
150 150 self.index[i + x] = b
151 151 n = b[self.shaoffset:self.shaoffset + 20]
152 152 self.map[n] = i + x
153 153 off += self.s
154 154
155 155 def findnode(self, node):
156 156 """search backwards through the index file for a specific node"""
157 157 if self.allmap: return None
158 158
159 159 # hg log will cause many many searches for the manifest
160 160 # nodes. After we get called a few times, just load the whole
161 161 # thing.
162 162 if self.mapfind_count > 8:
163 163 self.loadmap()
164 164 if node in self.map:
165 165 return node
166 166 return None
167 167 self.mapfind_count += 1
168 168 last = self.l - 1
169 169 while self.index[last] != None:
170 170 if last == 0:
171 171 self.all = 1
172 172 self.allmap = 1
173 173 return None
174 174 last -= 1
175 175 end = (last + 1) * self.s
176 176 blocksize = self.s * 256
177 177 while end >= 0:
178 178 start = max(end - blocksize, 0)
179 179 self.dataf.seek(start)
180 180 data = self.dataf.read(end - start)
181 181 findend = end - start
182 182 while True:
183 183 # we're searching backwards, so weh have to make sure
184 184 # we don't find a changeset where this node is a parent
185 185 off = data.rfind(node, 0, findend)
186 186 findend = off
187 187 if off >= 0:
188 188 i = off / self.s
189 189 off = i * self.s
190 190 n = data[off + self.shaoffset:off + self.shaoffset + 20]
191 191 if n == node:
192 192 self.map[n] = i + start / self.s
193 193 return node
194 194 else:
195 195 break
196 196 end -= blocksize
197 197 return None
198 198
199 199 def loadindex(self, i=None, end=None):
200 200 if self.all: return
201 201 all = False
202 202 if i == None:
203 203 blockstart = 0
204 204 blocksize = (512 / self.s) * self.s
205 205 end = self.datasize
206 206 all = True
207 207 else:
208 208 if end:
209 209 blockstart = i * self.s
210 210 end = end * self.s
211 211 blocksize = end - blockstart
212 212 else:
213 213 blockstart = (i & ~63) * self.s
214 214 blocksize = self.s * 64
215 215 end = blockstart + blocksize
216 216 while blockstart < end:
217 217 self.loadblock(blockstart, blocksize)
218 218 blockstart += blocksize
219 219 if all: self.all = True
220 220
221 221 class lazyindex(object):
222 222 """a lazy version of the index array"""
223 223 def __init__(self, parser):
224 224 self.p = parser
225 225 def __len__(self):
226 226 return len(self.p.index)
227 227 def load(self, pos):
228 228 if pos < 0:
229 229 pos += len(self.p.index)
230 230 self.p.loadindex(pos)
231 231 return self.p.index[pos]
232 232 def __getitem__(self, pos):
233 233 ret = self.p.index[pos] or self.load(pos)
234 234 if isinstance(ret, str):
235 235 ret = struct.unpack(self.p.indexformat, ret)
236 236 return ret
237 237 def __setitem__(self, pos, item):
238 238 self.p.index[pos] = item
239 239 def __delitem__(self, pos):
240 240 del self.p.index[pos]
241 241 def append(self, e):
242 242 self.p.index.append(e)
243 243
244 244 class lazymap(object):
245 245 """a lazy version of the node map"""
246 246 def __init__(self, parser):
247 247 self.p = parser
248 248 def load(self, key):
249 249 n = self.p.findnode(key)
250 250 if n == None:
251 251 raise KeyError(key)
252 252 def __contains__(self, key):
253 253 if key in self.p.map:
254 254 return True
255 255 self.p.loadmap()
256 256 return key in self.p.map
257 257 def __iter__(self):
258 258 yield nullid
259 259 for i in xrange(self.p.l):
260 260 ret = self.p.index[i]
261 261 if not ret:
262 262 self.p.loadindex(i)
263 263 ret = self.p.index[i]
264 264 if isinstance(ret, str):
265 265 ret = struct.unpack(self.p.indexformat, ret)
266 266 yield ret[-1]
267 267 def __getitem__(self, key):
268 268 try:
269 269 return self.p.map[key]
270 270 except KeyError:
271 271 try:
272 272 self.load(key)
273 273 return self.p.map[key]
274 274 except KeyError:
275 275 raise KeyError("node " + hex(key))
276 276 def __setitem__(self, key, val):
277 277 self.p.map[key] = val
278 278 def __delitem__(self, key):
279 279 del self.p.map[key]
280 280
281 281 class RevlogError(Exception): pass
282 282 class LookupError(RevlogError): pass
283 283
284 284 class revlog(object):
285 285 """
286 286 the underlying revision storage object
287 287
288 288 A revlog consists of two parts, an index and the revision data.
289 289
290 290 The index is a file with a fixed record size containing
291 291 information on each revision, includings its nodeid (hash), the
292 292 nodeids of its parents, the position and offset of its data within
293 293 the data file, and the revision it's based on. Finally, each entry
294 294 contains a linkrev entry that can serve as a pointer to external
295 295 data.
296 296
297 297 The revision data itself is a linear collection of data chunks.
298 298 Each chunk represents a revision and is usually represented as a
299 299 delta against the previous chunk. To bound lookup time, runs of
300 300 deltas are limited to about 2 times the length of the original
301 301 version data. This makes retrieval of a version proportional to
302 302 its size, or O(1) relative to the number of revisions.
303 303
304 304 Both pieces of the revlog are written to in an append-only
305 305 fashion, which means we never need to rewrite a file to insert or
306 306 remove data, and can use some simple techniques to avoid the need
307 307 for locking while reading.
308 308 """
309 309 def __init__(self, opener, indexfile):
310 310 """
311 311 create a revlog object
312 312
313 313 opener is a function that abstracts the file opening operation
314 314 and can be used to implement COW semantics or the like.
315 315 """
316 316 self.indexfile = indexfile
317 317 self.datafile = indexfile[:-2] + ".d"
318 318 self.opener = opener
319 319
320 320 self.indexstat = None
321 321 self.cache = None
322 322 self.chunkcache = None
323 323 self.defversion = REVLOG_DEFAULT_VERSION
324 324 if hasattr(opener, "defversion"):
325 325 self.defversion = opener.defversion
326 326 if self.defversion & REVLOGNG:
327 327 self.defversion |= REVLOGNGINLINEDATA
328 328 self.load()
329 329
330 330 def load(self):
331 331 v = self.defversion
332 332 try:
333 333 f = self.opener(self.indexfile)
334 334 i = f.read(4)
335 335 f.seek(0)
336 336 except IOError, inst:
337 337 if inst.errno != errno.ENOENT:
338 338 raise
339 339 i = ""
340 340 else:
341 341 try:
342 342 st = util.fstat(f)
343 343 except AttributeError, inst:
344 344 st = None
345 345 else:
346 346 oldst = self.indexstat
347 347 if (oldst and st.st_dev == oldst.st_dev
348 348 and st.st_ino == oldst.st_ino
349 349 and st.st_mtime == oldst.st_mtime
350 350 and st.st_ctime == oldst.st_ctime
351 351 and st.st_size == oldst.st_size):
352 352 return
353 353 self.indexstat = st
354 354 if len(i) > 0:
355 355 v = struct.unpack(versionformat, i)[0]
356 356 flags = v & ~0xFFFF
357 357 fmt = v & 0xFFFF
358 358 if fmt == REVLOGV0:
359 359 if flags:
360 360 raise RevlogError(_("index %s unknown flags %#04x for format v0")
361 361 % (self.indexfile, flags >> 16))
362 362 elif fmt == REVLOGNG:
363 363 if flags & ~REVLOGNGINLINEDATA:
364 364 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
365 365 % (self.indexfile, flags >> 16))
366 366 else:
367 367 raise RevlogError(_("index %s unknown format %d")
368 368 % (self.indexfile, fmt))
369 369 self.version = v
370 370 if v == REVLOGV0:
371 371 self.indexformat = indexformatv0
372 372 shaoffset = v0shaoffset
373 373 else:
374 374 self.indexformat = indexformatng
375 375 shaoffset = ngshaoffset
376 376
377 377 if i:
378 378 if (lazyparser.safe_to_use and not self.inlinedata() and
379 379 st and st.st_size > 10000):
380 380 # big index, let's parse it on demand
381 381 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
382 382 self.index = lazyindex(parser)
383 383 self.nodemap = lazymap(parser)
384 384 else:
385 385 self.parseindex(f, st)
386 386 if self.version != REVLOGV0:
387 387 e = list(self.index[0])
388 388 type = self.ngtype(e[0])
389 389 e[0] = self.offset_type(0, type)
390 390 self.index[0] = e
391 391 else:
392 392 self.nodemap = {nullid: nullrev}
393 393 self.index = []
394 394
395 395
396 396 def parseindex(self, fp, st):
397 397 s = struct.calcsize(self.indexformat)
398 398 self.index = []
399 399 self.nodemap = {nullid: nullrev}
400 400 inline = self.inlinedata()
401 401 n = 0
402 402 leftover = None
403 403 while True:
404 404 if st:
405 405 data = fp.read(65536)
406 406 else:
407 407 # hack for httprangereader, it doesn't do partial reads well
408 408 data = fp.read()
409 409 if not data:
410 410 break
411 411 if n == 0 and self.inlinedata():
412 412 # cache the first chunk
413 413 self.chunkcache = (0, data)
414 414 if leftover:
415 415 data = leftover + data
416 416 leftover = None
417 417 off = 0
418 418 l = len(data)
419 419 while off < l:
420 420 if l - off < s:
421 421 leftover = data[off:]
422 422 break
423 423 cur = data[off:off + s]
424 424 off += s
425 425 e = struct.unpack(self.indexformat, cur)
426 426 self.index.append(e)
427 427 self.nodemap[e[-1]] = n
428 428 n += 1
429 429 if inline:
430 430 if e[1] < 0:
431 431 break
432 432 off += e[1]
433 433 if off > l:
434 434 # some things don't seek well, just read it
435 435 fp.read(off - l)
436 436 break
437 437 if not st:
438 438 break
439 439
440 440
441 441 def ngoffset(self, q):
442 442 if q & 0xFFFF:
443 443 raise RevlogError(_('%s: incompatible revision flag %x') %
444 444 (self.indexfile, q))
445 445 return long(q >> 16)
446 446
447 447 def ngtype(self, q):
448 448 return int(q & 0xFFFF)
449 449
450 450 def offset_type(self, offset, type):
451 451 return long(long(offset) << 16 | type)
452 452
453 453 def loadindex(self, start, end):
454 454 """load a block of indexes all at once from the lazy parser"""
455 455 if isinstance(self.index, lazyindex):
456 456 self.index.p.loadindex(start, end)
457 457
458 458 def loadindexmap(self):
459 459 """loads both the map and the index from the lazy parser"""
460 460 if isinstance(self.index, lazyindex):
461 461 p = self.index.p
462 462 p.loadindex()
463 463 self.nodemap = p.map
464 464
465 465 def loadmap(self):
466 466 """loads the map from the lazy parser"""
467 467 if isinstance(self.nodemap, lazymap):
468 468 self.nodemap.p.loadmap()
469 469 self.nodemap = self.nodemap.p.map
470 470
471 471 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
472 472 def tip(self): return self.node(len(self.index) - 1)
473 473 def count(self): return len(self.index)
474 474 def node(self, rev):
475 475 return rev == nullrev and nullid or self.index[rev][-1]
476 476 def rev(self, node):
477 477 try:
478 478 return self.nodemap[node]
479 479 except KeyError:
480 480 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
481 481 def linkrev(self, node):
482 482 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
483 483 def parents(self, node):
484 484 if node == nullid: return (nullid, nullid)
485 485 r = self.rev(node)
486 486 d = self.index[r][-3:-1]
487 487 if self.version == REVLOGV0:
488 488 return d
489 489 return (self.node(d[0]), self.node(d[1]))
490 490 def parentrevs(self, rev):
491 491 if rev == nullrev:
492 492 return (nullrev, nullrev)
493 493 d = self.index[rev][-3:-1]
494 494 if self.version == REVLOGV0:
495 495 return (self.rev(d[0]), self.rev(d[1]))
496 496 return d
497 497 def start(self, rev):
498 498 if rev == nullrev:
499 499 return 0
500 500 if self.version != REVLOGV0:
501 501 return self.ngoffset(self.index[rev][0])
502 502 return self.index[rev][0]
503 503
504 504 def end(self, rev): return self.start(rev) + self.length(rev)
505 505
506 506 def size(self, rev):
507 507 """return the length of the uncompressed text for a given revision"""
508 508 if rev == nullrev:
509 509 return 0
510 510 l = -1
511 511 if self.version != REVLOGV0:
512 512 l = self.index[rev][2]
513 513 if l >= 0:
514 514 return l
515 515
516 516 t = self.revision(self.node(rev))
517 517 return len(t)
518 518
519 519 # alternate implementation, The advantage to this code is it
520 520 # will be faster for a single revision. But, the results are not
521 521 # cached, so finding the size of every revision will be slower.
522 522 """
523 523 if self.cache and self.cache[1] == rev:
524 524 return len(self.cache[2])
525 525
526 526 base = self.base(rev)
527 527 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
528 528 base = self.cache[1]
529 529 text = self.cache[2]
530 530 else:
531 531 text = self.revision(self.node(base))
532 532
533 533 l = len(text)
534 534 for x in xrange(base + 1, rev + 1):
535 535 l = mdiff.patchedsize(l, self.chunk(x))
536 536 return l
537 537 """
538 538
539 539 def length(self, rev):
540 540 if rev == nullrev:
541 541 return 0
542 542 else:
543 543 return self.index[rev][1]
544 544 def base(self, rev):
545 545 if (rev == nullrev):
546 546 return nullrev
547 547 else:
548 548 return self.index[rev][-5]
549 549
550 550 def reachable(self, node, stop=None):
551 551 """return a hash of all nodes ancestral to a given node, including
552 552 the node itself, stopping when stop is matched"""
553 553 reachable = {}
554 554 visit = [node]
555 555 reachable[node] = 1
556 556 if stop:
557 557 stopn = self.rev(stop)
558 558 else:
559 559 stopn = 0
560 560 while visit:
561 561 n = visit.pop(0)
562 562 if n == stop:
563 563 continue
564 564 if n == nullid:
565 565 continue
566 566 for p in self.parents(n):
567 567 if self.rev(p) < stopn:
568 568 continue
569 569 if p not in reachable:
570 570 reachable[p] = 1
571 571 visit.append(p)
572 572 return reachable
573 573
574 574 def nodesbetween(self, roots=None, heads=None):
575 575 """Return a tuple containing three elements. Elements 1 and 2 contain
576 576 a final list bases and heads after all the unreachable ones have been
577 577 pruned. Element 0 contains a topologically sorted list of all
578 578
579 579 nodes that satisfy these constraints:
580 580 1. All nodes must be descended from a node in roots (the nodes on
581 581 roots are considered descended from themselves).
582 582 2. All nodes must also be ancestors of a node in heads (the nodes in
583 583 heads are considered to be their own ancestors).
584 584
585 585 If roots is unspecified, nullid is assumed as the only root.
586 586 If heads is unspecified, it is taken to be the output of the
587 587 heads method (i.e. a list of all nodes in the repository that
588 588 have no children)."""
589 589 nonodes = ([], [], [])
590 590 if roots is not None:
591 591 roots = list(roots)
592 592 if not roots:
593 593 return nonodes
594 594 lowestrev = min([self.rev(n) for n in roots])
595 595 else:
596 596 roots = [nullid] # Everybody's a descendent of nullid
597 597 lowestrev = nullrev
598 598 if (lowestrev == nullrev) and (heads is None):
599 599 # We want _all_ the nodes!
600 600 return ([self.node(r) for r in xrange(0, self.count())],
601 601 [nullid], list(self.heads()))
602 602 if heads is None:
603 603 # All nodes are ancestors, so the latest ancestor is the last
604 604 # node.
605 605 highestrev = self.count() - 1
606 606 # Set ancestors to None to signal that every node is an ancestor.
607 607 ancestors = None
608 608 # Set heads to an empty dictionary for later discovery of heads
609 609 heads = {}
610 610 else:
611 611 heads = list(heads)
612 612 if not heads:
613 613 return nonodes
614 614 ancestors = {}
615 615 # Turn heads into a dictionary so we can remove 'fake' heads.
616 616 # Also, later we will be using it to filter out the heads we can't
617 617 # find from roots.
618 618 heads = dict.fromkeys(heads, 0)
619 619 # Start at the top and keep marking parents until we're done.
620 620 nodestotag = heads.keys()
621 621 # Remember where the top was so we can use it as a limit later.
622 622 highestrev = max([self.rev(n) for n in nodestotag])
623 623 while nodestotag:
624 624 # grab a node to tag
625 625 n = nodestotag.pop()
626 626 # Never tag nullid
627 627 if n == nullid:
628 628 continue
629 629 # A node's revision number represents its place in a
630 630 # topologically sorted list of nodes.
631 631 r = self.rev(n)
632 632 if r >= lowestrev:
633 633 if n not in ancestors:
634 634 # If we are possibly a descendent of one of the roots
635 635 # and we haven't already been marked as an ancestor
636 636 ancestors[n] = 1 # Mark as ancestor
637 637 # Add non-nullid parents to list of nodes to tag.
638 638 nodestotag.extend([p for p in self.parents(n) if
639 639 p != nullid])
640 640 elif n in heads: # We've seen it before, is it a fake head?
641 641 # So it is, real heads should not be the ancestors of
642 642 # any other heads.
643 643 heads.pop(n)
644 644 if not ancestors:
645 645 return nonodes
646 646 # Now that we have our set of ancestors, we want to remove any
647 647 # roots that are not ancestors.
648 648
649 649 # If one of the roots was nullid, everything is included anyway.
650 650 if lowestrev > nullrev:
651 651 # But, since we weren't, let's recompute the lowest rev to not
652 652 # include roots that aren't ancestors.
653 653
654 654 # Filter out roots that aren't ancestors of heads
655 655 roots = [n for n in roots if n in ancestors]
656 656 # Recompute the lowest revision
657 657 if roots:
658 658 lowestrev = min([self.rev(n) for n in roots])
659 659 else:
660 660 # No more roots? Return empty list
661 661 return nonodes
662 662 else:
663 663 # We are descending from nullid, and don't need to care about
664 664 # any other roots.
665 665 lowestrev = nullrev
666 666 roots = [nullid]
667 667 # Transform our roots list into a 'set' (i.e. a dictionary where the
668 668 # values don't matter.
669 669 descendents = dict.fromkeys(roots, 1)
670 670 # Also, keep the original roots so we can filter out roots that aren't
671 671 # 'real' roots (i.e. are descended from other roots).
672 672 roots = descendents.copy()
673 673 # Our topologically sorted list of output nodes.
674 674 orderedout = []
675 675 # Don't start at nullid since we don't want nullid in our output list,
676 676 # and if nullid shows up in descedents, empty parents will look like
677 677 # they're descendents.
678 678 for r in xrange(max(lowestrev, 0), highestrev + 1):
679 679 n = self.node(r)
680 680 isdescendent = False
681 681 if lowestrev == nullrev: # Everybody is a descendent of nullid
682 682 isdescendent = True
683 683 elif n in descendents:
684 684 # n is already a descendent
685 685 isdescendent = True
686 686 # This check only needs to be done here because all the roots
687 687 # will start being marked is descendents before the loop.
688 688 if n in roots:
689 689 # If n was a root, check if it's a 'real' root.
690 690 p = tuple(self.parents(n))
691 691 # If any of its parents are descendents, it's not a root.
692 692 if (p[0] in descendents) or (p[1] in descendents):
693 693 roots.pop(n)
694 694 else:
695 695 p = tuple(self.parents(n))
696 696 # A node is a descendent if either of its parents are
697 697 # descendents. (We seeded the dependents list with the roots
698 698 # up there, remember?)
699 699 if (p[0] in descendents) or (p[1] in descendents):
700 700 descendents[n] = 1
701 701 isdescendent = True
702 702 if isdescendent and ((ancestors is None) or (n in ancestors)):
703 703 # Only include nodes that are both descendents and ancestors.
704 704 orderedout.append(n)
705 705 if (ancestors is not None) and (n in heads):
706 706 # We're trying to figure out which heads are reachable
707 707 # from roots.
708 708 # Mark this head as having been reached
709 709 heads[n] = 1
710 710 elif ancestors is None:
711 711 # Otherwise, we're trying to discover the heads.
712 712 # Assume this is a head because if it isn't, the next step
713 713 # will eventually remove it.
714 714 heads[n] = 1
715 715 # But, obviously its parents aren't.
716 716 for p in self.parents(n):
717 717 heads.pop(p, None)
718 718 heads = [n for n in heads.iterkeys() if heads[n] != 0]
719 719 roots = roots.keys()
720 720 assert orderedout
721 721 assert roots
722 722 assert heads
723 723 return (orderedout, roots, heads)
724 724
725 725 def heads(self, start=None, stop=None):
726 726 """return the list of all nodes that have no children
727 727
728 728 if start is specified, only heads that are descendants of
729 729 start will be returned
730 730 if stop is specified, it will consider all the revs from stop
731 731 as if they had no children
732 732 """
733 733 if start is None:
734 734 start = nullid
735 735 if stop is None:
736 736 stop = []
737 737 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
738 738 startrev = self.rev(start)
739 739 reachable = {startrev: 1}
740 740 heads = {startrev: 1}
741 741
742 742 parentrevs = self.parentrevs
743 743 for r in xrange(startrev + 1, self.count()):
744 744 for p in parentrevs(r):
745 745 if p in reachable:
746 746 if r not in stoprevs:
747 747 reachable[r] = 1
748 748 heads[r] = 1
749 749 if p in heads and p not in stoprevs:
750 750 del heads[p]
751 751
752 752 return [self.node(r) for r in heads]
753 753
754 754 def children(self, node):
755 755 """find the children of a given node"""
756 756 c = []
757 757 p = self.rev(node)
758 758 for r in range(p + 1, self.count()):
759 for pr in self.parentrevs(r):
760 if pr == p:
761 c.append(self.node(r))
759 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
760 if prevs:
761 for pr in prevs:
762 if pr == p:
763 c.append(self.node(r))
764 elif p == nullrev:
765 c.append(self.node(r))
762 766 return c
763 767
764 768 def _match(self, id):
765 769 if isinstance(id, (long, int)):
766 770 # rev
767 771 return self.node(id)
768 772 if len(id) == 20:
769 773 # possibly a binary node
770 774 # odds of a binary node being all hex in ASCII are 1 in 10**25
771 775 try:
772 776 node = id
773 777 r = self.rev(node) # quick search the index
774 778 return node
775 779 except LookupError:
776 780 pass # may be partial hex id
777 781 try:
778 782 # str(rev)
779 783 rev = int(id)
780 784 if str(rev) != id: raise ValueError
781 785 if rev < 0: rev = self.count() + rev
782 786 if rev < 0 or rev >= self.count(): raise ValueError
783 787 return self.node(rev)
784 788 except (ValueError, OverflowError):
785 789 pass
786 790 if len(id) == 40:
787 791 try:
788 792 # a full hex nodeid?
789 793 node = bin(id)
790 794 r = self.rev(node)
791 795 return node
792 796 except TypeError:
793 797 pass
794 798
795 799 def _partialmatch(self, id):
796 800 if len(id) < 40:
797 801 try:
798 802 # hex(node)[:...]
799 803 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
800 804 node = None
801 805 for n in self.nodemap:
802 806 if n.startswith(bin_id) and hex(n).startswith(id):
803 807 if node is not None:
804 808 raise LookupError(_("Ambiguous identifier"))
805 809 node = n
806 810 if node is not None:
807 811 return node
808 812 except TypeError:
809 813 pass
810 814
811 815 def lookup(self, id):
812 816 """locate a node based on:
813 817 - revision number or str(revision number)
814 818 - nodeid or subset of hex nodeid
815 819 """
816 820
817 821 n = self._match(id)
818 822 if n is not None:
819 823 return n
820 824 n = self._partialmatch(id)
821 825 if n:
822 826 return n
823 827
824 828 raise LookupError(_("No match found"))
825 829
826 830 def cmp(self, node, text):
827 831 """compare text with a given file revision"""
828 832 p1, p2 = self.parents(node)
829 833 return hash(text, p1, p2) != node
830 834
831 835 def makenode(self, node, text):
832 836 """calculate a file nodeid for text, descended or possibly
833 837 unchanged from node"""
834 838
835 839 if self.cmp(node, text):
836 840 return hash(text, node, nullid)
837 841 return node
838 842
839 843 def diff(self, a, b):
840 844 """return a delta between two revisions"""
841 845 return mdiff.textdiff(a, b)
842 846
843 847 def patches(self, t, pl):
844 848 """apply a list of patches to a string"""
845 849 return mdiff.patches(t, pl)
846 850
847 851 def chunk(self, rev, df=None, cachelen=4096):
848 852 start, length = self.start(rev), self.length(rev)
849 853 inline = self.inlinedata()
850 854 if inline:
851 855 start += (rev + 1) * struct.calcsize(self.indexformat)
852 856 end = start + length
853 857 def loadcache(df):
854 858 cache_length = max(cachelen, length) # 4k
855 859 if not df:
856 860 if inline:
857 861 df = self.opener(self.indexfile)
858 862 else:
859 863 df = self.opener(self.datafile)
860 864 df.seek(start)
861 865 self.chunkcache = (start, df.read(cache_length))
862 866
863 867 if not self.chunkcache:
864 868 loadcache(df)
865 869
866 870 cache_start = self.chunkcache[0]
867 871 cache_end = cache_start + len(self.chunkcache[1])
868 872 if start >= cache_start and end <= cache_end:
869 873 # it is cached
870 874 offset = start - cache_start
871 875 else:
872 876 loadcache(df)
873 877 offset = 0
874 878
875 879 #def checkchunk():
876 880 # df = self.opener(self.datafile)
877 881 # df.seek(start)
878 882 # return df.read(length)
879 883 #assert s == checkchunk()
880 884 return decompress(self.chunkcache[1][offset:offset + length])
881 885
882 886 def delta(self, node):
883 887 """return or calculate a delta between a node and its predecessor"""
884 888 r = self.rev(node)
885 889 return self.revdiff(r - 1, r)
886 890
887 891 def revdiff(self, rev1, rev2):
888 892 """return or calculate a delta between two revisions"""
889 893 b1 = self.base(rev1)
890 894 b2 = self.base(rev2)
891 895 if b1 == b2 and rev1 + 1 == rev2:
892 896 return self.chunk(rev2)
893 897 else:
894 898 return self.diff(self.revision(self.node(rev1)),
895 899 self.revision(self.node(rev2)))
896 900
897 901 def revision(self, node):
898 902 """return an uncompressed revision of a given"""
899 903 if node == nullid: return ""
900 904 if self.cache and self.cache[0] == node: return self.cache[2]
901 905
902 906 # look up what we need to read
903 907 text = None
904 908 rev = self.rev(node)
905 909 base = self.base(rev)
906 910
907 911 if self.inlinedata():
908 912 # we probably have the whole chunk cached
909 913 df = None
910 914 else:
911 915 df = self.opener(self.datafile)
912 916
913 917 # do we have useful data cached?
914 918 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
915 919 base = self.cache[1]
916 920 text = self.cache[2]
917 921 self.loadindex(base, rev + 1)
918 922 else:
919 923 self.loadindex(base, rev + 1)
920 924 text = self.chunk(base, df=df)
921 925
922 926 bins = []
923 927 for r in xrange(base + 1, rev + 1):
924 928 bins.append(self.chunk(r, df=df))
925 929
926 930 text = self.patches(text, bins)
927 931
928 932 p1, p2 = self.parents(node)
929 933 if node != hash(text, p1, p2):
930 934 raise RevlogError(_("integrity check failed on %s:%d")
931 935 % (self.datafile, rev))
932 936
933 937 self.cache = (node, rev, text)
934 938 return text
935 939
936 940 def checkinlinesize(self, tr, fp=None):
937 941 if not self.inlinedata():
938 942 return
939 943 if not fp:
940 944 fp = self.opener(self.indexfile, 'r')
941 945 fp.seek(0, 2)
942 946 size = fp.tell()
943 947 if size < 131072:
944 948 return
945 949 trinfo = tr.find(self.indexfile)
946 950 if trinfo == None:
947 951 raise RevlogError(_("%s not found in the transaction")
948 952 % self.indexfile)
949 953
950 954 trindex = trinfo[2]
951 955 dataoff = self.start(trindex)
952 956
953 957 tr.add(self.datafile, dataoff)
954 958 df = self.opener(self.datafile, 'w')
955 959 calc = struct.calcsize(self.indexformat)
956 960 for r in xrange(self.count()):
957 961 start = self.start(r) + (r + 1) * calc
958 962 length = self.length(r)
959 963 fp.seek(start)
960 964 d = fp.read(length)
961 965 df.write(d)
962 966 fp.close()
963 967 df.close()
964 968 fp = self.opener(self.indexfile, 'w', atomictemp=True)
965 969 self.version &= ~(REVLOGNGINLINEDATA)
966 970 if self.count():
967 971 x = self.index[0]
968 972 e = struct.pack(self.indexformat, *x)[4:]
969 973 l = struct.pack(versionformat, self.version)
970 974 fp.write(l)
971 975 fp.write(e)
972 976
973 977 for i in xrange(1, self.count()):
974 978 x = self.index[i]
975 979 e = struct.pack(self.indexformat, *x)
976 980 fp.write(e)
977 981
978 982 # if we don't call rename, the temp file will never replace the
979 983 # real index
980 984 fp.rename()
981 985
982 986 tr.replace(self.indexfile, trindex * calc)
983 987 self.chunkcache = None
984 988
985 989 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
986 990 """add a revision to the log
987 991
988 992 text - the revision data to add
989 993 transaction - the transaction object used for rollback
990 994 link - the linkrev data to add
991 995 p1, p2 - the parent nodeids of the revision
992 996 d - an optional precomputed delta
993 997 """
994 998 if not self.inlinedata():
995 999 dfh = self.opener(self.datafile, "a")
996 1000 else:
997 1001 dfh = None
998 1002 ifh = self.opener(self.indexfile, "a+")
999 1003 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1000 1004
1001 1005 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1002 1006 if text is None: text = ""
1003 1007 if p1 is None: p1 = self.tip()
1004 1008 if p2 is None: p2 = nullid
1005 1009
1006 1010 node = hash(text, p1, p2)
1007 1011
1008 1012 if node in self.nodemap:
1009 1013 return node
1010 1014
1011 1015 n = self.count()
1012 1016 t = n - 1
1013 1017
1014 1018 if n:
1015 1019 base = self.base(t)
1016 1020 start = self.start(base)
1017 1021 end = self.end(t)
1018 1022 if not d:
1019 1023 prev = self.revision(self.tip())
1020 1024 d = self.diff(prev, text)
1021 1025 data = compress(d)
1022 1026 l = len(data[1]) + len(data[0])
1023 1027 dist = end - start + l
1024 1028
1025 1029 # full versions are inserted when the needed deltas
1026 1030 # become comparable to the uncompressed text
1027 1031 if not n or dist > len(text) * 2:
1028 1032 data = compress(text)
1029 1033 l = len(data[1]) + len(data[0])
1030 1034 base = n
1031 1035 else:
1032 1036 base = self.base(t)
1033 1037
1034 1038 offset = 0
1035 1039 if t >= 0:
1036 1040 offset = self.end(t)
1037 1041
1038 1042 if self.version == REVLOGV0:
1039 1043 e = (offset, l, base, link, p1, p2, node)
1040 1044 else:
1041 1045 e = (self.offset_type(offset, 0), l, len(text),
1042 1046 base, link, self.rev(p1), self.rev(p2), node)
1043 1047
1044 1048 self.index.append(e)
1045 1049 self.nodemap[node] = n
1046 1050 entry = struct.pack(self.indexformat, *e)
1047 1051
1048 1052 if not self.inlinedata():
1049 1053 transaction.add(self.datafile, offset)
1050 1054 transaction.add(self.indexfile, n * len(entry))
1051 1055 if data[0]:
1052 1056 dfh.write(data[0])
1053 1057 dfh.write(data[1])
1054 1058 dfh.flush()
1055 1059 else:
1056 1060 ifh.seek(0, 2)
1057 1061 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1058 1062
1059 1063 if len(self.index) == 1 and self.version != REVLOGV0:
1060 1064 l = struct.pack(versionformat, self.version)
1061 1065 ifh.write(l)
1062 1066 entry = entry[4:]
1063 1067
1064 1068 ifh.write(entry)
1065 1069
1066 1070 if self.inlinedata():
1067 1071 ifh.write(data[0])
1068 1072 ifh.write(data[1])
1069 1073 self.checkinlinesize(transaction, ifh)
1070 1074
1071 1075 self.cache = (node, n, text)
1072 1076 return node
1073 1077
1074 1078 def ancestor(self, a, b):
1075 1079 """calculate the least common ancestor of nodes a and b"""
1076 1080
1077 1081 def parents(rev):
1078 1082 return [p for p in self.parentrevs(rev) if p != nullrev]
1079 1083
1080 1084 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1081 1085 if c is None:
1082 1086 return nullid
1083 1087
1084 1088 return self.node(c)
1085 1089
1086 1090 def group(self, nodelist, lookup, infocollect=None):
1087 1091 """calculate a delta group
1088 1092
1089 1093 Given a list of changeset revs, return a set of deltas and
1090 1094 metadata corresponding to nodes. the first delta is
1091 1095 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1092 1096 have this parent as it has all history before these
1093 1097 changesets. parent is parent[0]
1094 1098 """
1095 1099 revs = [self.rev(n) for n in nodelist]
1096 1100
1097 1101 # if we don't have any revisions touched by these changesets, bail
1098 1102 if not revs:
1099 1103 yield changegroup.closechunk()
1100 1104 return
1101 1105
1102 1106 # add the parent of the first rev
1103 1107 p = self.parents(self.node(revs[0]))[0]
1104 1108 revs.insert(0, self.rev(p))
1105 1109
1106 1110 # build deltas
1107 1111 for d in xrange(0, len(revs) - 1):
1108 1112 a, b = revs[d], revs[d + 1]
1109 1113 nb = self.node(b)
1110 1114
1111 1115 if infocollect is not None:
1112 1116 infocollect(nb)
1113 1117
1114 1118 d = self.revdiff(a, b)
1115 1119 p = self.parents(nb)
1116 1120 meta = nb + p[0] + p[1] + lookup(nb)
1117 1121 yield changegroup.genchunk("%s%s" % (meta, d))
1118 1122
1119 1123 yield changegroup.closechunk()
1120 1124
1121 1125 def addgroup(self, revs, linkmapper, transaction, unique=0):
1122 1126 """
1123 1127 add a delta group
1124 1128
1125 1129 given a set of deltas, add them to the revision log. the
1126 1130 first delta is against its parent, which should be in our
1127 1131 log, the rest are against the previous delta.
1128 1132 """
1129 1133
1130 1134 #track the base of the current delta log
1131 1135 r = self.count()
1132 1136 t = r - 1
1133 1137 node = None
1134 1138
1135 1139 base = prev = nullrev
1136 1140 start = end = textlen = 0
1137 1141 if r:
1138 1142 end = self.end(t)
1139 1143
1140 1144 ifh = self.opener(self.indexfile, "a+")
1141 1145 ifh.seek(0, 2)
1142 1146 transaction.add(self.indexfile, ifh.tell(), self.count())
1143 1147 if self.inlinedata():
1144 1148 dfh = None
1145 1149 else:
1146 1150 transaction.add(self.datafile, end)
1147 1151 dfh = self.opener(self.datafile, "a")
1148 1152
1149 1153 # loop through our set of deltas
1150 1154 chain = None
1151 1155 for chunk in revs:
1152 1156 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1153 1157 link = linkmapper(cs)
1154 1158 if node in self.nodemap:
1155 1159 # this can happen if two branches make the same change
1156 1160 # if unique:
1157 1161 # raise RevlogError(_("already have %s") % hex(node[:4]))
1158 1162 chain = node
1159 1163 continue
1160 1164 delta = chunk[80:]
1161 1165
1162 1166 for p in (p1, p2):
1163 1167 if not p in self.nodemap:
1164 1168 raise LookupError(_("unknown parent %s") % short(p))
1165 1169
1166 1170 if not chain:
1167 1171 # retrieve the parent revision of the delta chain
1168 1172 chain = p1
1169 1173 if not chain in self.nodemap:
1170 1174 raise LookupError(_("unknown base %s") % short(chain[:4]))
1171 1175
1172 1176 # full versions are inserted when the needed deltas become
1173 1177 # comparable to the uncompressed text or when the previous
1174 1178 # version is not the one we have a delta against. We use
1175 1179 # the size of the previous full rev as a proxy for the
1176 1180 # current size.
1177 1181
1178 1182 if chain == prev:
1179 1183 tempd = compress(delta)
1180 1184 cdelta = tempd[0] + tempd[1]
1181 1185 textlen = mdiff.patchedsize(textlen, delta)
1182 1186
1183 1187 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1184 1188 # flush our writes here so we can read it in revision
1185 1189 if dfh:
1186 1190 dfh.flush()
1187 1191 ifh.flush()
1188 1192 text = self.revision(chain)
1189 1193 text = self.patches(text, [delta])
1190 1194 chk = self._addrevision(text, transaction, link, p1, p2, None,
1191 1195 ifh, dfh)
1192 1196 if not dfh and not self.inlinedata():
1193 1197 # addrevision switched from inline to conventional
1194 1198 # reopen the index
1195 1199 dfh = self.opener(self.datafile, "a")
1196 1200 ifh = self.opener(self.indexfile, "a")
1197 1201 if chk != node:
1198 1202 raise RevlogError(_("consistency error adding group"))
1199 1203 textlen = len(text)
1200 1204 else:
1201 1205 if self.version == REVLOGV0:
1202 1206 e = (end, len(cdelta), base, link, p1, p2, node)
1203 1207 else:
1204 1208 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1205 1209 link, self.rev(p1), self.rev(p2), node)
1206 1210 self.index.append(e)
1207 1211 self.nodemap[node] = r
1208 1212 if self.inlinedata():
1209 1213 ifh.write(struct.pack(self.indexformat, *e))
1210 1214 ifh.write(cdelta)
1211 1215 self.checkinlinesize(transaction, ifh)
1212 1216 if not self.inlinedata():
1213 1217 dfh = self.opener(self.datafile, "a")
1214 1218 ifh = self.opener(self.indexfile, "a")
1215 1219 else:
1216 1220 dfh.write(cdelta)
1217 1221 ifh.write(struct.pack(self.indexformat, *e))
1218 1222
1219 1223 t, r, chain, prev = r, r + 1, node, node
1220 1224 base = self.base(t)
1221 1225 start = self.start(base)
1222 1226 end = self.end(t)
1223 1227
1224 1228 return node
1225 1229
1226 1230 def strip(self, rev, minlink):
1227 1231 if self.count() == 0 or rev >= self.count():
1228 1232 return
1229 1233
1230 1234 if isinstance(self.index, lazyindex):
1231 1235 self.loadindexmap()
1232 1236
1233 1237 # When stripping away a revision, we need to make sure it
1234 1238 # does not actually belong to an older changeset.
1235 1239 # The minlink parameter defines the oldest revision
1236 1240 # we're allowed to strip away.
1237 1241 while minlink > self.index[rev][-4]:
1238 1242 rev += 1
1239 1243 if rev >= self.count():
1240 1244 return
1241 1245
1242 1246 # first truncate the files on disk
1243 1247 end = self.start(rev)
1244 1248 if not self.inlinedata():
1245 1249 df = self.opener(self.datafile, "a")
1246 1250 df.truncate(end)
1247 1251 end = rev * struct.calcsize(self.indexformat)
1248 1252 else:
1249 1253 end += rev * struct.calcsize(self.indexformat)
1250 1254
1251 1255 indexf = self.opener(self.indexfile, "a")
1252 1256 indexf.truncate(end)
1253 1257
1254 1258 # then reset internal state in memory to forget those revisions
1255 1259 self.cache = None
1256 1260 self.chunkcache = None
1257 1261 for x in xrange(rev, self.count()):
1258 1262 del self.nodemap[self.node(x)]
1259 1263
1260 1264 del self.index[rev:]
1261 1265
1262 1266 def checksize(self):
1263 1267 expected = 0
1264 1268 if self.count():
1265 1269 expected = self.end(self.count() - 1)
1266 1270
1267 1271 try:
1268 1272 f = self.opener(self.datafile)
1269 1273 f.seek(0, 2)
1270 1274 actual = f.tell()
1271 1275 dd = actual - expected
1272 1276 except IOError, inst:
1273 1277 if inst.errno != errno.ENOENT:
1274 1278 raise
1275 1279 dd = 0
1276 1280
1277 1281 try:
1278 1282 f = self.opener(self.indexfile)
1279 1283 f.seek(0, 2)
1280 1284 actual = f.tell()
1281 1285 s = struct.calcsize(self.indexformat)
1282 1286 i = actual / s
1283 1287 di = actual - (i * s)
1284 1288 if self.inlinedata():
1285 1289 databytes = 0
1286 1290 for r in xrange(self.count()):
1287 1291 databytes += self.length(r)
1288 1292 dd = 0
1289 1293 di = actual - self.count() * s - databytes
1290 1294 except IOError, inst:
1291 1295 if inst.errno != errno.ENOENT:
1292 1296 raise
1293 1297 di = 0
1294 1298
1295 1299 return (dd, di)
1296 1300
1297 1301
General Comments 0
You need to be logged in to leave comments. Login now