##// END OF EJS Templates
revlog: make revlogv0 loading more robust against corruption
Matt Mackall -
r5544:686899a7 default
parent child Browse files
Show More
@@ -1,1305 +1,1305 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22 _sha = sha.new
23 23
24 24 # revlog flags
25 25 REVLOGV0 = 0
26 26 REVLOGNG = 1
27 27 REVLOGNGINLINEDATA = (1 << 16)
28 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 31
32 32 class RevlogError(Exception):
33 33 pass
34 34 class LookupError(RevlogError):
35 35 pass
36 36
37 37 def getoffset(q):
38 38 return int(q >> 16)
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43 def offset_type(offset, type):
44 44 return long(long(offset) << 16 | type)
45 45
46 46 def hash(text, p1, p2):
47 47 """generate a hash from the given text and its parent hashes
48 48
49 49 This hash combines both the current file contents and its history
50 50 in a manner that makes it easy to distinguish nodes with the same
51 51 content in the revision graph.
52 52 """
53 53 l = [p1, p2]
54 54 l.sort()
55 55 s = _sha(l[0])
56 56 s.update(l[1])
57 57 s.update(text)
58 58 return s.digest()
59 59
60 60 def compress(text):
61 61 """ generate a possibly-compressed representation of text """
62 62 if not text:
63 63 return ("", text)
64 64 l = len(text)
65 65 bin = None
66 66 if l < 44:
67 67 pass
68 68 elif l > 1000000:
69 69 # zlib makes an internal copy, thus doubling memory usage for
70 70 # large files, so lets do this in pieces
71 71 z = zlib.compressobj()
72 72 p = []
73 73 pos = 0
74 74 while pos < l:
75 75 pos2 = pos + 2**20
76 76 p.append(z.compress(text[pos:pos2]))
77 77 pos = pos2
78 78 p.append(z.flush())
79 79 if sum(map(len, p)) < l:
80 80 bin = "".join(p)
81 81 else:
82 82 bin = _compress(text)
83 83 if bin is None or len(bin) > l:
84 84 if text[0] == '\0':
85 85 return ("", text)
86 86 return ('u', text)
87 87 return ("", bin)
88 88
89 89 def decompress(bin):
90 90 """ decompress the given input """
91 91 if not bin:
92 92 return bin
93 93 t = bin[0]
94 94 if t == '\0':
95 95 return bin
96 96 if t == 'x':
97 97 return _decompress(bin)
98 98 if t == 'u':
99 99 return bin[1:]
100 100 raise RevlogError(_("unknown compression type %r") % t)
101 101
102 102 class lazyparser(object):
103 103 """
104 104 this class avoids the need to parse the entirety of large indices
105 105 """
106 106
107 107 # lazyparser is not safe to use on windows if win32 extensions not
108 108 # available. it keeps file handle open, which make it not possible
109 109 # to break hardlinks on local cloned repos.
110 110 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
111 111 hasattr(util, 'win32api'))
112 112
113 113 def __init__(self, dataf, size):
114 114 self.dataf = dataf
115 115 self.s = struct.calcsize(indexformatng)
116 116 self.datasize = size
117 117 self.l = size/self.s
118 118 self.index = [None] * self.l
119 119 self.map = {nullid: nullrev}
120 120 self.allmap = 0
121 121 self.all = 0
122 122 self.mapfind_count = 0
123 123
124 124 def loadmap(self):
125 125 """
126 126 during a commit, we need to make sure the rev being added is
127 127 not a duplicate. This requires loading the entire index,
128 128 which is fairly slow. loadmap can load up just the node map,
129 129 which takes much less time.
130 130 """
131 131 if self.allmap:
132 132 return
133 133 end = self.datasize
134 134 self.allmap = 1
135 135 cur = 0
136 136 count = 0
137 137 blocksize = self.s * 256
138 138 self.dataf.seek(0)
139 139 while cur < end:
140 140 data = self.dataf.read(blocksize)
141 141 off = 0
142 142 for x in xrange(256):
143 143 n = data[off + ngshaoffset:off + ngshaoffset + 20]
144 144 self.map[n] = count
145 145 count += 1
146 146 if count >= self.l:
147 147 break
148 148 off += self.s
149 149 cur += blocksize
150 150
151 151 def loadblock(self, blockstart, blocksize, data=None):
152 152 if self.all:
153 153 return
154 154 if data is None:
155 155 self.dataf.seek(blockstart)
156 156 if blockstart + blocksize > self.datasize:
157 157 # the revlog may have grown since we've started running,
158 158 # but we don't have space in self.index for more entries.
159 159 # limit blocksize so that we don't get too much data.
160 160 blocksize = max(self.datasize - blockstart, 0)
161 161 data = self.dataf.read(blocksize)
162 162 lend = len(data) / self.s
163 163 i = blockstart / self.s
164 164 off = 0
165 165 # lazyindex supports __delitem__
166 166 if lend > len(self.index) - i:
167 167 lend = len(self.index) - i
168 168 for x in xrange(lend):
169 169 if self.index[i + x] == None:
170 170 b = data[off : off + self.s]
171 171 self.index[i + x] = b
172 172 n = b[ngshaoffset:ngshaoffset + 20]
173 173 self.map[n] = i + x
174 174 off += self.s
175 175
176 176 def findnode(self, node):
177 177 """search backwards through the index file for a specific node"""
178 178 if self.allmap:
179 179 return None
180 180
181 181 # hg log will cause many many searches for the manifest
182 182 # nodes. After we get called a few times, just load the whole
183 183 # thing.
184 184 if self.mapfind_count > 8:
185 185 self.loadmap()
186 186 if node in self.map:
187 187 return node
188 188 return None
189 189 self.mapfind_count += 1
190 190 last = self.l - 1
191 191 while self.index[last] != None:
192 192 if last == 0:
193 193 self.all = 1
194 194 self.allmap = 1
195 195 return None
196 196 last -= 1
197 197 end = (last + 1) * self.s
198 198 blocksize = self.s * 256
199 199 while end >= 0:
200 200 start = max(end - blocksize, 0)
201 201 self.dataf.seek(start)
202 202 data = self.dataf.read(end - start)
203 203 findend = end - start
204 204 while True:
205 205 # we're searching backwards, so we have to make sure
206 206 # we don't find a changeset where this node is a parent
207 207 off = data.find(node, 0, findend)
208 208 findend = off
209 209 if off >= 0:
210 210 i = off / self.s
211 211 off = i * self.s
212 212 n = data[off + ngshaoffset:off + ngshaoffset + 20]
213 213 if n == node:
214 214 self.map[n] = i + start / self.s
215 215 return node
216 216 else:
217 217 break
218 218 end -= blocksize
219 219 return None
220 220
221 221 def loadindex(self, i=None, end=None):
222 222 if self.all:
223 223 return
224 224 all = False
225 225 if i == None:
226 226 blockstart = 0
227 227 blocksize = (65536 / self.s) * self.s
228 228 end = self.datasize
229 229 all = True
230 230 else:
231 231 if end:
232 232 blockstart = i * self.s
233 233 end = end * self.s
234 234 blocksize = end - blockstart
235 235 else:
236 236 blockstart = (i & ~1023) * self.s
237 237 blocksize = self.s * 1024
238 238 end = blockstart + blocksize
239 239 while blockstart < end:
240 240 self.loadblock(blockstart, blocksize)
241 241 blockstart += blocksize
242 242 if all:
243 243 self.all = True
244 244
245 245 class lazyindex(object):
246 246 """a lazy version of the index array"""
247 247 def __init__(self, parser):
248 248 self.p = parser
249 249 def __len__(self):
250 250 return len(self.p.index)
251 251 def load(self, pos):
252 252 if pos < 0:
253 253 pos += len(self.p.index)
254 254 self.p.loadindex(pos)
255 255 return self.p.index[pos]
256 256 def __getitem__(self, pos):
257 257 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
258 258 def __setitem__(self, pos, item):
259 259 self.p.index[pos] = _pack(indexformatng, *item)
260 260 def __delitem__(self, pos):
261 261 del self.p.index[pos]
262 262 def insert(self, pos, e):
263 263 self.p.index.insert(pos, _pack(indexformatng, *e))
264 264 def append(self, e):
265 265 self.p.index.append(_pack(indexformatng, *e))
266 266
267 267 class lazymap(object):
268 268 """a lazy version of the node map"""
269 269 def __init__(self, parser):
270 270 self.p = parser
271 271 def load(self, key):
272 272 n = self.p.findnode(key)
273 273 if n == None:
274 274 raise KeyError(key)
275 275 def __contains__(self, key):
276 276 if key in self.p.map:
277 277 return True
278 278 self.p.loadmap()
279 279 return key in self.p.map
280 280 def __iter__(self):
281 281 yield nullid
282 282 for i in xrange(self.p.l):
283 283 ret = self.p.index[i]
284 284 if not ret:
285 285 self.p.loadindex(i)
286 286 ret = self.p.index[i]
287 287 if isinstance(ret, str):
288 288 ret = _unpack(indexformatng, ret)
289 289 yield ret[7]
290 290 def __getitem__(self, key):
291 291 try:
292 292 return self.p.map[key]
293 293 except KeyError:
294 294 try:
295 295 self.load(key)
296 296 return self.p.map[key]
297 297 except KeyError:
298 298 raise KeyError("node " + hex(key))
299 299 def __setitem__(self, key, val):
300 300 self.p.map[key] = val
301 301 def __delitem__(self, key):
302 302 del self.p.map[key]
303 303
304 304 indexformatv0 = ">4l20s20s20s"
305 305 v0shaoffset = 56
306 306
307 307 class revlogoldio(object):
308 308 def __init__(self):
309 309 self.size = struct.calcsize(indexformatv0)
310 310
311 311 def parseindex(self, fp, inline):
312 312 s = self.size
313 313 index = []
314 314 nodemap = {nullid: nullrev}
315 315 n = off = 0
316 316 data = fp.read()
317 317 l = len(data)
318 318 while off + s <= l:
319 319 cur = data[off:off + s]
320 320 off += s
321 321 e = _unpack(indexformatv0, cur)
322 322 # transform to revlogv1 format
323 323 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
324 nodemap[e[4]], nodemap[e[5]], e[6])
324 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
325 325 index.append(e2)
326 326 nodemap[e[6]] = n
327 327 n += 1
328 328
329 329 return index, nodemap, None
330 330
331 331 def packentry(self, entry, node, version, rev):
332 332 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
333 333 node(entry[5]), node(entry[6]), entry[7])
334 334 return _pack(indexformatv0, *e2)
335 335
336 336 # index ng:
337 337 # 6 bytes offset
338 338 # 2 bytes flags
339 339 # 4 bytes compressed length
340 340 # 4 bytes uncompressed length
341 341 # 4 bytes: base rev
342 342 # 4 bytes link rev
343 343 # 4 bytes parent 1 rev
344 344 # 4 bytes parent 2 rev
345 345 # 32 bytes: nodeid
346 346 indexformatng = ">Qiiiiii20s12x"
347 347 ngshaoffset = 32
348 348 versionformat = ">I"
349 349
350 350 class revlogio(object):
351 351 def __init__(self):
352 352 self.size = struct.calcsize(indexformatng)
353 353
354 354 def parseindex(self, fp, inline):
355 355 try:
356 356 size = util.fstat(fp).st_size
357 357 except AttributeError:
358 358 size = 0
359 359
360 360 if lazyparser.safe_to_use and not inline and size > 1000000:
361 361 # big index, let's parse it on demand
362 362 parser = lazyparser(fp, size)
363 363 index = lazyindex(parser)
364 364 nodemap = lazymap(parser)
365 365 e = list(index[0])
366 366 type = gettype(e[0])
367 367 e[0] = offset_type(0, type)
368 368 index[0] = e
369 369 return index, nodemap, None
370 370
371 371 s = self.size
372 372 cache = None
373 373 index = []
374 374 nodemap = {nullid: nullrev}
375 375 n = off = 0
376 376 # if we're not using lazymap, always read the whole index
377 377 data = fp.read()
378 378 l = len(data) - s
379 379 append = index.append
380 380 if inline:
381 381 cache = (0, data)
382 382 while off <= l:
383 383 e = _unpack(indexformatng, data[off:off + s])
384 384 nodemap[e[7]] = n
385 385 append(e)
386 386 n += 1
387 387 if e[1] < 0:
388 388 break
389 389 off += e[1] + s
390 390 else:
391 391 while off <= l:
392 392 e = _unpack(indexformatng, data[off:off + s])
393 393 nodemap[e[7]] = n
394 394 append(e)
395 395 n += 1
396 396 off += s
397 397
398 398 e = list(index[0])
399 399 type = gettype(e[0])
400 400 e[0] = offset_type(0, type)
401 401 index[0] = e
402 402
403 403 return index, nodemap, cache
404 404
405 405 def packentry(self, entry, node, version, rev):
406 406 p = _pack(indexformatng, *entry)
407 407 if rev == 0:
408 408 p = _pack(versionformat, version) + p[4:]
409 409 return p
410 410
411 411 class revlog(object):
412 412 """
413 413 the underlying revision storage object
414 414
415 415 A revlog consists of two parts, an index and the revision data.
416 416
417 417 The index is a file with a fixed record size containing
418 418 information on each revision, includings its nodeid (hash), the
419 419 nodeids of its parents, the position and offset of its data within
420 420 the data file, and the revision it's based on. Finally, each entry
421 421 contains a linkrev entry that can serve as a pointer to external
422 422 data.
423 423
424 424 The revision data itself is a linear collection of data chunks.
425 425 Each chunk represents a revision and is usually represented as a
426 426 delta against the previous chunk. To bound lookup time, runs of
427 427 deltas are limited to about 2 times the length of the original
428 428 version data. This makes retrieval of a version proportional to
429 429 its size, or O(1) relative to the number of revisions.
430 430
431 431 Both pieces of the revlog are written to in an append-only
432 432 fashion, which means we never need to rewrite a file to insert or
433 433 remove data, and can use some simple techniques to avoid the need
434 434 for locking while reading.
435 435 """
436 436 def __init__(self, opener, indexfile):
437 437 """
438 438 create a revlog object
439 439
440 440 opener is a function that abstracts the file opening operation
441 441 and can be used to implement COW semantics or the like.
442 442 """
443 443 self.indexfile = indexfile
444 444 self.datafile = indexfile[:-2] + ".d"
445 445 self.opener = opener
446 446 self._cache = None
447 447 self._chunkcache = None
448 448 self.nodemap = {nullid: nullrev}
449 449 self.index = []
450 450
451 451 v = REVLOG_DEFAULT_VERSION
452 452 if hasattr(opener, "defversion"):
453 453 v = opener.defversion
454 454 if v & REVLOGNG:
455 455 v |= REVLOGNGINLINEDATA
456 456
457 457 i = ""
458 458 try:
459 459 f = self.opener(self.indexfile)
460 460 i = f.read(4)
461 461 f.seek(0)
462 462 if len(i) > 0:
463 463 v = struct.unpack(versionformat, i)[0]
464 464 except IOError, inst:
465 465 if inst.errno != errno.ENOENT:
466 466 raise
467 467
468 468 self.version = v
469 469 self._inline = v & REVLOGNGINLINEDATA
470 470 flags = v & ~0xFFFF
471 471 fmt = v & 0xFFFF
472 472 if fmt == REVLOGV0 and flags:
473 473 raise RevlogError(_("index %s unknown flags %#04x for format v0")
474 474 % (self.indexfile, flags >> 16))
475 475 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
476 476 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
477 477 % (self.indexfile, flags >> 16))
478 478 elif fmt > REVLOGNG:
479 479 raise RevlogError(_("index %s unknown format %d")
480 480 % (self.indexfile, fmt))
481 481
482 482 self._io = revlogio()
483 483 if self.version == REVLOGV0:
484 484 self._io = revlogoldio()
485 485 if i:
486 486 d = self._io.parseindex(f, self._inline)
487 487 self.index, self.nodemap, self._chunkcache = d
488 488
489 489 # add the magic null revision at -1
490 490 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
491 491
492 492 def _loadindex(self, start, end):
493 493 """load a block of indexes all at once from the lazy parser"""
494 494 if isinstance(self.index, lazyindex):
495 495 self.index.p.loadindex(start, end)
496 496
497 497 def _loadindexmap(self):
498 498 """loads both the map and the index from the lazy parser"""
499 499 if isinstance(self.index, lazyindex):
500 500 p = self.index.p
501 501 p.loadindex()
502 502 self.nodemap = p.map
503 503
504 504 def _loadmap(self):
505 505 """loads the map from the lazy parser"""
506 506 if isinstance(self.nodemap, lazymap):
507 507 self.nodemap.p.loadmap()
508 508 self.nodemap = self.nodemap.p.map
509 509
510 510 def tip(self):
511 511 return self.node(len(self.index) - 2)
512 512 def count(self):
513 513 return len(self.index) - 1
514 514
515 515 def rev(self, node):
516 516 try:
517 517 return self.nodemap[node]
518 518 except KeyError:
519 519 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
520 520 def node(self, rev):
521 521 return self.index[rev][7]
522 522 def linkrev(self, node):
523 523 return self.index[self.rev(node)][4]
524 524 def parents(self, node):
525 525 d = self.index[self.rev(node)][5:7]
526 526 return (self.node(d[0]), self.node(d[1]))
527 527 def parentrevs(self, rev):
528 528 return self.index[rev][5:7]
529 529 def start(self, rev):
530 530 return int(self.index[rev][0] >> 16)
531 531 def end(self, rev):
532 532 return self.start(rev) + self.length(rev)
533 533 def length(self, rev):
534 534 return self.index[rev][1]
535 535 def base(self, rev):
536 536 return self.index[rev][3]
537 537
538 538 def size(self, rev):
539 539 """return the length of the uncompressed text for a given revision"""
540 540 l = self.index[rev][2]
541 541 if l >= 0:
542 542 return l
543 543
544 544 t = self.revision(self.node(rev))
545 545 return len(t)
546 546
547 547 # alternate implementation, The advantage to this code is it
548 548 # will be faster for a single revision. But, the results are not
549 549 # cached, so finding the size of every revision will be slower.
550 550 """
551 551 if self.cache and self.cache[1] == rev:
552 552 return len(self.cache[2])
553 553
554 554 base = self.base(rev)
555 555 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
556 556 base = self.cache[1]
557 557 text = self.cache[2]
558 558 else:
559 559 text = self.revision(self.node(base))
560 560
561 561 l = len(text)
562 562 for x in xrange(base + 1, rev + 1):
563 563 l = mdiff.patchedsize(l, self.chunk(x))
564 564 return l
565 565 """
566 566
567 567 def reachable(self, node, stop=None):
568 568 """return a hash of all nodes ancestral to a given node, including
569 569 the node itself, stopping when stop is matched"""
570 570 reachable = {}
571 571 visit = [node]
572 572 reachable[node] = 1
573 573 if stop:
574 574 stopn = self.rev(stop)
575 575 else:
576 576 stopn = 0
577 577 while visit:
578 578 n = visit.pop(0)
579 579 if n == stop:
580 580 continue
581 581 if n == nullid:
582 582 continue
583 583 for p in self.parents(n):
584 584 if self.rev(p) < stopn:
585 585 continue
586 586 if p not in reachable:
587 587 reachable[p] = 1
588 588 visit.append(p)
589 589 return reachable
590 590
591 591 def nodesbetween(self, roots=None, heads=None):
592 592 """Return a tuple containing three elements. Elements 1 and 2 contain
593 593 a final list bases and heads after all the unreachable ones have been
594 594 pruned. Element 0 contains a topologically sorted list of all
595 595
596 596 nodes that satisfy these constraints:
597 597 1. All nodes must be descended from a node in roots (the nodes on
598 598 roots are considered descended from themselves).
599 599 2. All nodes must also be ancestors of a node in heads (the nodes in
600 600 heads are considered to be their own ancestors).
601 601
602 602 If roots is unspecified, nullid is assumed as the only root.
603 603 If heads is unspecified, it is taken to be the output of the
604 604 heads method (i.e. a list of all nodes in the repository that
605 605 have no children)."""
606 606 nonodes = ([], [], [])
607 607 if roots is not None:
608 608 roots = list(roots)
609 609 if not roots:
610 610 return nonodes
611 611 lowestrev = min([self.rev(n) for n in roots])
612 612 else:
613 613 roots = [nullid] # Everybody's a descendent of nullid
614 614 lowestrev = nullrev
615 615 if (lowestrev == nullrev) and (heads is None):
616 616 # We want _all_ the nodes!
617 617 return ([self.node(r) for r in xrange(0, self.count())],
618 618 [nullid], list(self.heads()))
619 619 if heads is None:
620 620 # All nodes are ancestors, so the latest ancestor is the last
621 621 # node.
622 622 highestrev = self.count() - 1
623 623 # Set ancestors to None to signal that every node is an ancestor.
624 624 ancestors = None
625 625 # Set heads to an empty dictionary for later discovery of heads
626 626 heads = {}
627 627 else:
628 628 heads = list(heads)
629 629 if not heads:
630 630 return nonodes
631 631 ancestors = {}
632 632 # Turn heads into a dictionary so we can remove 'fake' heads.
633 633 # Also, later we will be using it to filter out the heads we can't
634 634 # find from roots.
635 635 heads = dict.fromkeys(heads, 0)
636 636 # Start at the top and keep marking parents until we're done.
637 637 nodestotag = heads.keys()
638 638 # Remember where the top was so we can use it as a limit later.
639 639 highestrev = max([self.rev(n) for n in nodestotag])
640 640 while nodestotag:
641 641 # grab a node to tag
642 642 n = nodestotag.pop()
643 643 # Never tag nullid
644 644 if n == nullid:
645 645 continue
646 646 # A node's revision number represents its place in a
647 647 # topologically sorted list of nodes.
648 648 r = self.rev(n)
649 649 if r >= lowestrev:
650 650 if n not in ancestors:
651 651 # If we are possibly a descendent of one of the roots
652 652 # and we haven't already been marked as an ancestor
653 653 ancestors[n] = 1 # Mark as ancestor
654 654 # Add non-nullid parents to list of nodes to tag.
655 655 nodestotag.extend([p for p in self.parents(n) if
656 656 p != nullid])
657 657 elif n in heads: # We've seen it before, is it a fake head?
658 658 # So it is, real heads should not be the ancestors of
659 659 # any other heads.
660 660 heads.pop(n)
661 661 if not ancestors:
662 662 return nonodes
663 663 # Now that we have our set of ancestors, we want to remove any
664 664 # roots that are not ancestors.
665 665
666 666 # If one of the roots was nullid, everything is included anyway.
667 667 if lowestrev > nullrev:
668 668 # But, since we weren't, let's recompute the lowest rev to not
669 669 # include roots that aren't ancestors.
670 670
671 671 # Filter out roots that aren't ancestors of heads
672 672 roots = [n for n in roots if n in ancestors]
673 673 # Recompute the lowest revision
674 674 if roots:
675 675 lowestrev = min([self.rev(n) for n in roots])
676 676 else:
677 677 # No more roots? Return empty list
678 678 return nonodes
679 679 else:
680 680 # We are descending from nullid, and don't need to care about
681 681 # any other roots.
682 682 lowestrev = nullrev
683 683 roots = [nullid]
684 684 # Transform our roots list into a 'set' (i.e. a dictionary where the
685 685 # values don't matter.
686 686 descendents = dict.fromkeys(roots, 1)
687 687 # Also, keep the original roots so we can filter out roots that aren't
688 688 # 'real' roots (i.e. are descended from other roots).
689 689 roots = descendents.copy()
690 690 # Our topologically sorted list of output nodes.
691 691 orderedout = []
692 692 # Don't start at nullid since we don't want nullid in our output list,
693 693 # and if nullid shows up in descedents, empty parents will look like
694 694 # they're descendents.
695 695 for r in xrange(max(lowestrev, 0), highestrev + 1):
696 696 n = self.node(r)
697 697 isdescendent = False
698 698 if lowestrev == nullrev: # Everybody is a descendent of nullid
699 699 isdescendent = True
700 700 elif n in descendents:
701 701 # n is already a descendent
702 702 isdescendent = True
703 703 # This check only needs to be done here because all the roots
704 704 # will start being marked is descendents before the loop.
705 705 if n in roots:
706 706 # If n was a root, check if it's a 'real' root.
707 707 p = tuple(self.parents(n))
708 708 # If any of its parents are descendents, it's not a root.
709 709 if (p[0] in descendents) or (p[1] in descendents):
710 710 roots.pop(n)
711 711 else:
712 712 p = tuple(self.parents(n))
713 713 # A node is a descendent if either of its parents are
714 714 # descendents. (We seeded the dependents list with the roots
715 715 # up there, remember?)
716 716 if (p[0] in descendents) or (p[1] in descendents):
717 717 descendents[n] = 1
718 718 isdescendent = True
719 719 if isdescendent and ((ancestors is None) or (n in ancestors)):
720 720 # Only include nodes that are both descendents and ancestors.
721 721 orderedout.append(n)
722 722 if (ancestors is not None) and (n in heads):
723 723 # We're trying to figure out which heads are reachable
724 724 # from roots.
725 725 # Mark this head as having been reached
726 726 heads[n] = 1
727 727 elif ancestors is None:
728 728 # Otherwise, we're trying to discover the heads.
729 729 # Assume this is a head because if it isn't, the next step
730 730 # will eventually remove it.
731 731 heads[n] = 1
732 732 # But, obviously its parents aren't.
733 733 for p in self.parents(n):
734 734 heads.pop(p, None)
735 735 heads = [n for n in heads.iterkeys() if heads[n] != 0]
736 736 roots = roots.keys()
737 737 assert orderedout
738 738 assert roots
739 739 assert heads
740 740 return (orderedout, roots, heads)
741 741
742 742 def heads(self, start=None, stop=None):
743 743 """return the list of all nodes that have no children
744 744
745 745 if start is specified, only heads that are descendants of
746 746 start will be returned
747 747 if stop is specified, it will consider all the revs from stop
748 748 as if they had no children
749 749 """
750 750 if start is None and stop is None:
751 751 count = self.count()
752 752 if not count:
753 753 return [nullid]
754 754 ishead = [1] * (count + 1)
755 755 index = self.index
756 756 for r in xrange(count):
757 757 e = index[r]
758 758 ishead[e[5]] = ishead[e[6]] = 0
759 759 return [self.node(r) for r in xrange(count) if ishead[r]]
760 760
761 761 if start is None:
762 762 start = nullid
763 763 if stop is None:
764 764 stop = []
765 765 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
766 766 startrev = self.rev(start)
767 767 reachable = {startrev: 1}
768 768 heads = {startrev: 1}
769 769
770 770 parentrevs = self.parentrevs
771 771 for r in xrange(startrev + 1, self.count()):
772 772 for p in parentrevs(r):
773 773 if p in reachable:
774 774 if r not in stoprevs:
775 775 reachable[r] = 1
776 776 heads[r] = 1
777 777 if p in heads and p not in stoprevs:
778 778 del heads[p]
779 779
780 780 return [self.node(r) for r in heads]
781 781
782 782 def children(self, node):
783 783 """find the children of a given node"""
784 784 c = []
785 785 p = self.rev(node)
786 786 for r in range(p + 1, self.count()):
787 787 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
788 788 if prevs:
789 789 for pr in prevs:
790 790 if pr == p:
791 791 c.append(self.node(r))
792 792 elif p == nullrev:
793 793 c.append(self.node(r))
794 794 return c
795 795
796 796 def _match(self, id):
797 797 if isinstance(id, (long, int)):
798 798 # rev
799 799 return self.node(id)
800 800 if len(id) == 20:
801 801 # possibly a binary node
802 802 # odds of a binary node being all hex in ASCII are 1 in 10**25
803 803 try:
804 804 node = id
805 805 r = self.rev(node) # quick search the index
806 806 return node
807 807 except LookupError:
808 808 pass # may be partial hex id
809 809 try:
810 810 # str(rev)
811 811 rev = int(id)
812 812 if str(rev) != id:
813 813 raise ValueError
814 814 if rev < 0:
815 815 rev = self.count() + rev
816 816 if rev < 0 or rev >= self.count():
817 817 raise ValueError
818 818 return self.node(rev)
819 819 except (ValueError, OverflowError):
820 820 pass
821 821 if len(id) == 40:
822 822 try:
823 823 # a full hex nodeid?
824 824 node = bin(id)
825 825 r = self.rev(node)
826 826 return node
827 827 except TypeError:
828 828 pass
829 829
830 830 def _partialmatch(self, id):
831 831 if len(id) < 40:
832 832 try:
833 833 # hex(node)[:...]
834 834 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
835 835 node = None
836 836 for n in self.nodemap:
837 837 if n.startswith(bin_id) and hex(n).startswith(id):
838 838 if node is not None:
839 839 raise LookupError(_("Ambiguous identifier"))
840 840 node = n
841 841 if node is not None:
842 842 return node
843 843 except TypeError:
844 844 pass
845 845
846 846 def lookup(self, id):
847 847 """locate a node based on:
848 848 - revision number or str(revision number)
849 849 - nodeid or subset of hex nodeid
850 850 """
851 851 n = self._match(id)
852 852 if n is not None:
853 853 return n
854 854 n = self._partialmatch(id)
855 855 if n:
856 856 return n
857 857
858 858 raise LookupError(_("No match found"))
859 859
860 860 def cmp(self, node, text):
861 861 """compare text with a given file revision"""
862 862 p1, p2 = self.parents(node)
863 863 return hash(text, p1, p2) != node
864 864
865 865 def chunk(self, rev, df=None):
866 866 def loadcache(df):
867 867 if not df:
868 868 if self._inline:
869 869 df = self.opener(self.indexfile)
870 870 else:
871 871 df = self.opener(self.datafile)
872 872 df.seek(start)
873 873 self._chunkcache = (start, df.read(cache_length))
874 874
875 875 start, length = self.start(rev), self.length(rev)
876 876 if self._inline:
877 877 start += (rev + 1) * self._io.size
878 878 end = start + length
879 879
880 880 offset = 0
881 881 if not self._chunkcache:
882 882 cache_length = max(65536, length)
883 883 loadcache(df)
884 884 else:
885 885 cache_start = self._chunkcache[0]
886 886 cache_length = len(self._chunkcache[1])
887 887 cache_end = cache_start + cache_length
888 888 if start >= cache_start and end <= cache_end:
889 889 # it is cached
890 890 offset = start - cache_start
891 891 else:
892 892 cache_length = max(65536, length)
893 893 loadcache(df)
894 894
895 895 # avoid copying large chunks
896 896 c = self._chunkcache[1]
897 897 if cache_length != length:
898 898 c = c[offset:offset + length]
899 899
900 900 return decompress(c)
901 901
902 902 def delta(self, node):
903 903 """return or calculate a delta between a node and its predecessor"""
904 904 r = self.rev(node)
905 905 return self.revdiff(r - 1, r)
906 906
907 907 def revdiff(self, rev1, rev2):
908 908 """return or calculate a delta between two revisions"""
909 909 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
910 910 return self.chunk(rev2)
911 911
912 912 return mdiff.textdiff(self.revision(self.node(rev1)),
913 913 self.revision(self.node(rev2)))
914 914
915 915 def revision(self, node):
916 916 """return an uncompressed revision of a given"""
917 917 if node == nullid:
918 918 return ""
919 919 if self._cache and self._cache[0] == node:
920 920 return str(self._cache[2])
921 921
922 922 # look up what we need to read
923 923 text = None
924 924 rev = self.rev(node)
925 925 base = self.base(rev)
926 926
927 927 # check rev flags
928 928 if self.index[rev][0] & 0xFFFF:
929 929 raise RevlogError(_('incompatible revision flag %x') %
930 930 (self.index[rev][0] & 0xFFFF))
931 931
932 932 if self._inline:
933 933 # we probably have the whole chunk cached
934 934 df = None
935 935 else:
936 936 df = self.opener(self.datafile)
937 937
938 938 # do we have useful data cached?
939 939 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
940 940 base = self._cache[1]
941 941 text = str(self._cache[2])
942 942 self._loadindex(base, rev + 1)
943 943 else:
944 944 self._loadindex(base, rev + 1)
945 945 text = self.chunk(base, df=df)
946 946
947 947 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
948 948 text = mdiff.patches(text, bins)
949 949 p1, p2 = self.parents(node)
950 950 if node != hash(text, p1, p2):
951 951 raise RevlogError(_("integrity check failed on %s:%d")
952 952 % (self.datafile, rev))
953 953
954 954 self._cache = (node, rev, text)
955 955 return text
956 956
957 957 def checkinlinesize(self, tr, fp=None):
958 958 if not self._inline:
959 959 return
960 960 if not fp:
961 961 fp = self.opener(self.indexfile, 'r')
962 962 fp.seek(0, 2)
963 963 size = fp.tell()
964 964 if size < 131072:
965 965 return
966 966 trinfo = tr.find(self.indexfile)
967 967 if trinfo == None:
968 968 raise RevlogError(_("%s not found in the transaction")
969 969 % self.indexfile)
970 970
971 971 trindex = trinfo[2]
972 972 dataoff = self.start(trindex)
973 973
974 974 tr.add(self.datafile, dataoff)
975 975 df = self.opener(self.datafile, 'w')
976 976 calc = self._io.size
977 977 for r in xrange(self.count()):
978 978 start = self.start(r) + (r + 1) * calc
979 979 length = self.length(r)
980 980 fp.seek(start)
981 981 d = fp.read(length)
982 982 df.write(d)
983 983 fp.close()
984 984 df.close()
985 985 fp = self.opener(self.indexfile, 'w', atomictemp=True)
986 986 self.version &= ~(REVLOGNGINLINEDATA)
987 987 self._inline = False
988 988 for i in xrange(self.count()):
989 989 e = self._io.packentry(self.index[i], self.node, self.version, i)
990 990 fp.write(e)
991 991
992 992 # if we don't call rename, the temp file will never replace the
993 993 # real index
994 994 fp.rename()
995 995
996 996 tr.replace(self.indexfile, trindex * calc)
997 997 self._chunkcache = None
998 998
999 999 def addrevision(self, text, transaction, link, p1, p2, d=None):
1000 1000 """add a revision to the log
1001 1001
1002 1002 text - the revision data to add
1003 1003 transaction - the transaction object used for rollback
1004 1004 link - the linkrev data to add
1005 1005 p1, p2 - the parent nodeids of the revision
1006 1006 d - an optional precomputed delta
1007 1007 """
1008 1008 dfh = None
1009 1009 if not self._inline:
1010 1010 dfh = self.opener(self.datafile, "a")
1011 1011 ifh = self.opener(self.indexfile, "a+")
1012 1012 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1013 1013
1014 1014 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1015 1015 node = hash(text, p1, p2)
1016 1016 if node in self.nodemap:
1017 1017 return node
1018 1018
1019 1019 curr = self.count()
1020 1020 prev = curr - 1
1021 1021 base = self.base(prev)
1022 1022 offset = self.end(prev)
1023 1023
1024 1024 if curr:
1025 1025 if not d:
1026 1026 ptext = self.revision(self.node(prev))
1027 1027 d = mdiff.textdiff(ptext, text)
1028 1028 data = compress(d)
1029 1029 l = len(data[1]) + len(data[0])
1030 1030 dist = l + offset - self.start(base)
1031 1031
1032 1032 # full versions are inserted when the needed deltas
1033 1033 # become comparable to the uncompressed text
1034 1034 if not curr or dist > len(text) * 2:
1035 1035 data = compress(text)
1036 1036 l = len(data[1]) + len(data[0])
1037 1037 base = curr
1038 1038
1039 1039 e = (offset_type(offset, 0), l, len(text),
1040 1040 base, link, self.rev(p1), self.rev(p2), node)
1041 1041 self.index.insert(-1, e)
1042 1042 self.nodemap[node] = curr
1043 1043
1044 1044 entry = self._io.packentry(e, self.node, self.version, curr)
1045 1045 if not self._inline:
1046 1046 transaction.add(self.datafile, offset)
1047 1047 transaction.add(self.indexfile, curr * len(entry))
1048 1048 if data[0]:
1049 1049 dfh.write(data[0])
1050 1050 dfh.write(data[1])
1051 1051 dfh.flush()
1052 1052 ifh.write(entry)
1053 1053 else:
1054 1054 offset += curr * self._io.size
1055 1055 transaction.add(self.indexfile, offset, curr)
1056 1056 ifh.write(entry)
1057 1057 ifh.write(data[0])
1058 1058 ifh.write(data[1])
1059 1059 self.checkinlinesize(transaction, ifh)
1060 1060
1061 1061 self._cache = (node, curr, text)
1062 1062 return node
1063 1063
1064 1064 def ancestor(self, a, b):
1065 1065 """calculate the least common ancestor of nodes a and b"""
1066 1066
1067 1067 def parents(rev):
1068 1068 return [p for p in self.parentrevs(rev) if p != nullrev]
1069 1069
1070 1070 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1071 1071 if c is None:
1072 1072 return nullid
1073 1073
1074 1074 return self.node(c)
1075 1075
1076 1076 def group(self, nodelist, lookup, infocollect=None):
1077 1077 """calculate a delta group
1078 1078
1079 1079 Given a list of changeset revs, return a set of deltas and
1080 1080 metadata corresponding to nodes. the first delta is
1081 1081 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1082 1082 have this parent as it has all history before these
1083 1083 changesets. parent is parent[0]
1084 1084 """
1085 1085 revs = [self.rev(n) for n in nodelist]
1086 1086
1087 1087 # if we don't have any revisions touched by these changesets, bail
1088 1088 if not revs:
1089 1089 yield changegroup.closechunk()
1090 1090 return
1091 1091
1092 1092 # add the parent of the first rev
1093 1093 p = self.parents(self.node(revs[0]))[0]
1094 1094 revs.insert(0, self.rev(p))
1095 1095
1096 1096 # build deltas
1097 1097 for d in xrange(0, len(revs) - 1):
1098 1098 a, b = revs[d], revs[d + 1]
1099 1099 nb = self.node(b)
1100 1100
1101 1101 if infocollect is not None:
1102 1102 infocollect(nb)
1103 1103
1104 1104 p = self.parents(nb)
1105 1105 meta = nb + p[0] + p[1] + lookup(nb)
1106 1106 if a == -1:
1107 1107 d = self.revision(nb)
1108 1108 meta += mdiff.trivialdiffheader(len(d))
1109 1109 else:
1110 1110 d = self.revdiff(a, b)
1111 1111 yield changegroup.chunkheader(len(meta) + len(d))
1112 1112 yield meta
1113 1113 if len(d) > 2**20:
1114 1114 pos = 0
1115 1115 while pos < len(d):
1116 1116 pos2 = pos + 2 ** 18
1117 1117 yield d[pos:pos2]
1118 1118 pos = pos2
1119 1119 else:
1120 1120 yield d
1121 1121
1122 1122 yield changegroup.closechunk()
1123 1123
1124 1124 def addgroup(self, revs, linkmapper, transaction, unique=0):
1125 1125 """
1126 1126 add a delta group
1127 1127
1128 1128 given a set of deltas, add them to the revision log. the
1129 1129 first delta is against its parent, which should be in our
1130 1130 log, the rest are against the previous delta.
1131 1131 """
1132 1132
1133 1133 #track the base of the current delta log
1134 1134 r = self.count()
1135 1135 t = r - 1
1136 1136 node = None
1137 1137
1138 1138 base = prev = nullrev
1139 1139 start = end = textlen = 0
1140 1140 if r:
1141 1141 end = self.end(t)
1142 1142
1143 1143 ifh = self.opener(self.indexfile, "a+")
1144 1144 isize = r * self._io.size
1145 1145 if self._inline:
1146 1146 transaction.add(self.indexfile, end + isize, r)
1147 1147 dfh = None
1148 1148 else:
1149 1149 transaction.add(self.indexfile, isize, r)
1150 1150 transaction.add(self.datafile, end)
1151 1151 dfh = self.opener(self.datafile, "a")
1152 1152
1153 1153 # loop through our set of deltas
1154 1154 chain = None
1155 1155 for chunk in revs:
1156 1156 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1157 1157 link = linkmapper(cs)
1158 1158 if node in self.nodemap:
1159 1159 # this can happen if two branches make the same change
1160 1160 # if unique:
1161 1161 # raise RevlogError(_("already have %s") % hex(node[:4]))
1162 1162 chain = node
1163 1163 continue
1164 1164 delta = buffer(chunk, 80)
1165 1165 del chunk
1166 1166
1167 1167 for p in (p1, p2):
1168 1168 if not p in self.nodemap:
1169 1169 raise LookupError(_("unknown parent %s") % short(p))
1170 1170
1171 1171 if not chain:
1172 1172 # retrieve the parent revision of the delta chain
1173 1173 chain = p1
1174 1174 if not chain in self.nodemap:
1175 1175 raise LookupError(_("unknown base %s") % short(chain[:4]))
1176 1176
1177 1177 # full versions are inserted when the needed deltas become
1178 1178 # comparable to the uncompressed text or when the previous
1179 1179 # version is not the one we have a delta against. We use
1180 1180 # the size of the previous full rev as a proxy for the
1181 1181 # current size.
1182 1182
1183 1183 if chain == prev:
1184 1184 cdelta = compress(delta)
1185 1185 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1186 1186 textlen = mdiff.patchedsize(textlen, delta)
1187 1187
1188 1188 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1189 1189 # flush our writes here so we can read it in revision
1190 1190 if dfh:
1191 1191 dfh.flush()
1192 1192 ifh.flush()
1193 1193 text = self.revision(chain)
1194 1194 if len(text) == 0:
1195 1195 # skip over trivial delta header
1196 1196 text = buffer(delta, 12)
1197 1197 else:
1198 1198 text = mdiff.patches(text, [delta])
1199 1199 del delta
1200 1200 chk = self._addrevision(text, transaction, link, p1, p2, None,
1201 1201 ifh, dfh)
1202 1202 if not dfh and not self._inline:
1203 1203 # addrevision switched from inline to conventional
1204 1204 # reopen the index
1205 1205 dfh = self.opener(self.datafile, "a")
1206 1206 ifh = self.opener(self.indexfile, "a")
1207 1207 if chk != node:
1208 1208 raise RevlogError(_("consistency error adding group"))
1209 1209 textlen = len(text)
1210 1210 else:
1211 1211 e = (offset_type(end, 0), cdeltalen, textlen, base,
1212 1212 link, self.rev(p1), self.rev(p2), node)
1213 1213 self.index.insert(-1, e)
1214 1214 self.nodemap[node] = r
1215 1215 entry = self._io.packentry(e, self.node, self.version, r)
1216 1216 if self._inline:
1217 1217 ifh.write(entry)
1218 1218 ifh.write(cdelta[0])
1219 1219 ifh.write(cdelta[1])
1220 1220 self.checkinlinesize(transaction, ifh)
1221 1221 if not self._inline:
1222 1222 dfh = self.opener(self.datafile, "a")
1223 1223 ifh = self.opener(self.indexfile, "a")
1224 1224 else:
1225 1225 dfh.write(cdelta[0])
1226 1226 dfh.write(cdelta[1])
1227 1227 ifh.write(entry)
1228 1228
1229 1229 t, r, chain, prev = r, r + 1, node, node
1230 1230 base = self.base(t)
1231 1231 start = self.start(base)
1232 1232 end = self.end(t)
1233 1233
1234 1234 return node
1235 1235
1236 1236 def strip(self, rev, minlink):
1237 1237 if self.count() == 0 or rev >= self.count():
1238 1238 return
1239 1239
1240 1240 if isinstance(self.index, lazyindex):
1241 1241 self._loadindexmap()
1242 1242
1243 1243 # When stripping away a revision, we need to make sure it
1244 1244 # does not actually belong to an older changeset.
1245 1245 # The minlink parameter defines the oldest revision
1246 1246 # we're allowed to strip away.
1247 1247 while minlink > self.index[rev][4]:
1248 1248 rev += 1
1249 1249 if rev >= self.count():
1250 1250 return
1251 1251
1252 1252 # first truncate the files on disk
1253 1253 end = self.start(rev)
1254 1254 if not self._inline:
1255 1255 df = self.opener(self.datafile, "a")
1256 1256 df.truncate(end)
1257 1257 end = rev * self._io.size
1258 1258 else:
1259 1259 end += rev * self._io.size
1260 1260
1261 1261 indexf = self.opener(self.indexfile, "a")
1262 1262 indexf.truncate(end)
1263 1263
1264 1264 # then reset internal state in memory to forget those revisions
1265 1265 self._cache = None
1266 1266 self._chunkcache = None
1267 1267 for x in xrange(rev, self.count()):
1268 1268 del self.nodemap[self.node(x)]
1269 1269
1270 1270 del self.index[rev:-1]
1271 1271
1272 1272 def checksize(self):
1273 1273 expected = 0
1274 1274 if self.count():
1275 1275 expected = max(0, self.end(self.count() - 1))
1276 1276
1277 1277 try:
1278 1278 f = self.opener(self.datafile)
1279 1279 f.seek(0, 2)
1280 1280 actual = f.tell()
1281 1281 dd = actual - expected
1282 1282 except IOError, inst:
1283 1283 if inst.errno != errno.ENOENT:
1284 1284 raise
1285 1285 dd = 0
1286 1286
1287 1287 try:
1288 1288 f = self.opener(self.indexfile)
1289 1289 f.seek(0, 2)
1290 1290 actual = f.tell()
1291 1291 s = self._io.size
1292 1292 i = max(0, actual / s)
1293 1293 di = actual - (i * s)
1294 1294 if self._inline:
1295 1295 databytes = 0
1296 1296 for r in xrange(self.count()):
1297 1297 databytes += max(0, self.length(r))
1298 1298 dd = 0
1299 1299 di = actual - self.count() * s - databytes
1300 1300 except IOError, inst:
1301 1301 if inst.errno != errno.ENOENT:
1302 1302 raise
1303 1303 di = 0
1304 1304
1305 1305 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now