##// END OF EJS Templates
raise RevlogError when parser can't parse the revlog index...
Benoit Boissinot -
r8016:baaa832f default
parent child Browse files
Show More
@@ -1,1360 +1,1363 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import bin, hex, nullid, nullrev, short
14 14 from i18n import _
15 15 import changegroup, errno, ancestor, mdiff, parsers
16 16 import struct, util, zlib, error
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22 _sha = util.sha1
23 23
24 24 # revlog flags
25 25 REVLOGV0 = 0
26 26 REVLOGNG = 1
27 27 REVLOGNGINLINEDATA = (1 << 16)
28 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 31
32 32 RevlogError = error.RevlogError
33 33 LookupError = error.LookupError
34 34
35 35 def getoffset(q):
36 36 return int(q >> 16)
37 37
38 38 def gettype(q):
39 39 return int(q & 0xFFFF)
40 40
41 41 def offset_type(offset, type):
42 42 return long(long(offset) << 16 | type)
43 43
44 44 def hash(text, p1, p2):
45 45 """generate a hash from the given text and its parent hashes
46 46
47 47 This hash combines both the current file contents and its history
48 48 in a manner that makes it easy to distinguish nodes with the same
49 49 content in the revision graph.
50 50 """
51 51 l = [p1, p2]
52 52 l.sort()
53 53 s = _sha(l[0])
54 54 s.update(l[1])
55 55 s.update(text)
56 56 return s.digest()
57 57
58 58 def compress(text):
59 59 """ generate a possibly-compressed representation of text """
60 60 if not text:
61 61 return ("", text)
62 62 l = len(text)
63 63 bin = None
64 64 if l < 44:
65 65 pass
66 66 elif l > 1000000:
67 67 # zlib makes an internal copy, thus doubling memory usage for
68 68 # large files, so lets do this in pieces
69 69 z = zlib.compressobj()
70 70 p = []
71 71 pos = 0
72 72 while pos < l:
73 73 pos2 = pos + 2**20
74 74 p.append(z.compress(text[pos:pos2]))
75 75 pos = pos2
76 76 p.append(z.flush())
77 77 if sum(map(len, p)) < l:
78 78 bin = "".join(p)
79 79 else:
80 80 bin = _compress(text)
81 81 if bin is None or len(bin) > l:
82 82 if text[0] == '\0':
83 83 return ("", text)
84 84 return ('u', text)
85 85 return ("", bin)
86 86
87 87 def decompress(bin):
88 88 """ decompress the given input """
89 89 if not bin:
90 90 return bin
91 91 t = bin[0]
92 92 if t == '\0':
93 93 return bin
94 94 if t == 'x':
95 95 return _decompress(bin)
96 96 if t == 'u':
97 97 return bin[1:]
98 98 raise RevlogError(_("unknown compression type %r") % t)
99 99
100 100 class lazyparser(object):
101 101 """
102 102 this class avoids the need to parse the entirety of large indices
103 103 """
104 104
105 105 # lazyparser is not safe to use on windows if win32 extensions not
106 106 # available. it keeps file handle open, which make it not possible
107 107 # to break hardlinks on local cloned repos.
108 108
109 109 def __init__(self, dataf, size):
110 110 self.dataf = dataf
111 111 self.s = struct.calcsize(indexformatng)
112 112 self.datasize = size
113 113 self.l = size/self.s
114 114 self.index = [None] * self.l
115 115 self.map = {nullid: nullrev}
116 116 self.allmap = 0
117 117 self.all = 0
118 118 self.mapfind_count = 0
119 119
120 120 def loadmap(self):
121 121 """
122 122 during a commit, we need to make sure the rev being added is
123 123 not a duplicate. This requires loading the entire index,
124 124 which is fairly slow. loadmap can load up just the node map,
125 125 which takes much less time.
126 126 """
127 127 if self.allmap:
128 128 return
129 129 end = self.datasize
130 130 self.allmap = 1
131 131 cur = 0
132 132 count = 0
133 133 blocksize = self.s * 256
134 134 self.dataf.seek(0)
135 135 while cur < end:
136 136 data = self.dataf.read(blocksize)
137 137 off = 0
138 138 for x in xrange(256):
139 139 n = data[off + ngshaoffset:off + ngshaoffset + 20]
140 140 self.map[n] = count
141 141 count += 1
142 142 if count >= self.l:
143 143 break
144 144 off += self.s
145 145 cur += blocksize
146 146
147 147 def loadblock(self, blockstart, blocksize, data=None):
148 148 if self.all:
149 149 return
150 150 if data is None:
151 151 self.dataf.seek(blockstart)
152 152 if blockstart + blocksize > self.datasize:
153 153 # the revlog may have grown since we've started running,
154 154 # but we don't have space in self.index for more entries.
155 155 # limit blocksize so that we don't get too much data.
156 156 blocksize = max(self.datasize - blockstart, 0)
157 157 data = self.dataf.read(blocksize)
158 158 lend = len(data) / self.s
159 159 i = blockstart / self.s
160 160 off = 0
161 161 # lazyindex supports __delitem__
162 162 if lend > len(self.index) - i:
163 163 lend = len(self.index) - i
164 164 for x in xrange(lend):
165 165 if self.index[i + x] == None:
166 166 b = data[off : off + self.s]
167 167 self.index[i + x] = b
168 168 n = b[ngshaoffset:ngshaoffset + 20]
169 169 self.map[n] = i + x
170 170 off += self.s
171 171
172 172 def findnode(self, node):
173 173 """search backwards through the index file for a specific node"""
174 174 if self.allmap:
175 175 return None
176 176
177 177 # hg log will cause many many searches for the manifest
178 178 # nodes. After we get called a few times, just load the whole
179 179 # thing.
180 180 if self.mapfind_count > 8:
181 181 self.loadmap()
182 182 if node in self.map:
183 183 return node
184 184 return None
185 185 self.mapfind_count += 1
186 186 last = self.l - 1
187 187 while self.index[last] != None:
188 188 if last == 0:
189 189 self.all = 1
190 190 self.allmap = 1
191 191 return None
192 192 last -= 1
193 193 end = (last + 1) * self.s
194 194 blocksize = self.s * 256
195 195 while end >= 0:
196 196 start = max(end - blocksize, 0)
197 197 self.dataf.seek(start)
198 198 data = self.dataf.read(end - start)
199 199 findend = end - start
200 200 while True:
201 201 # we're searching backwards, so we have to make sure
202 202 # we don't find a changeset where this node is a parent
203 203 off = data.find(node, 0, findend)
204 204 findend = off
205 205 if off >= 0:
206 206 i = off / self.s
207 207 off = i * self.s
208 208 n = data[off + ngshaoffset:off + ngshaoffset + 20]
209 209 if n == node:
210 210 self.map[n] = i + start / self.s
211 211 return node
212 212 else:
213 213 break
214 214 end -= blocksize
215 215 return None
216 216
217 217 def loadindex(self, i=None, end=None):
218 218 if self.all:
219 219 return
220 220 all = False
221 221 if i == None:
222 222 blockstart = 0
223 223 blocksize = (65536 / self.s) * self.s
224 224 end = self.datasize
225 225 all = True
226 226 else:
227 227 if end:
228 228 blockstart = i * self.s
229 229 end = end * self.s
230 230 blocksize = end - blockstart
231 231 else:
232 232 blockstart = (i & ~1023) * self.s
233 233 blocksize = self.s * 1024
234 234 end = blockstart + blocksize
235 235 while blockstart < end:
236 236 self.loadblock(blockstart, blocksize)
237 237 blockstart += blocksize
238 238 if all:
239 239 self.all = True
240 240
241 241 class lazyindex(object):
242 242 """a lazy version of the index array"""
243 243 def __init__(self, parser):
244 244 self.p = parser
245 245 def __len__(self):
246 246 return len(self.p.index)
247 247 def load(self, pos):
248 248 if pos < 0:
249 249 pos += len(self.p.index)
250 250 self.p.loadindex(pos)
251 251 return self.p.index[pos]
252 252 def __getitem__(self, pos):
253 253 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
254 254 def __setitem__(self, pos, item):
255 255 self.p.index[pos] = _pack(indexformatng, *item)
256 256 def __delitem__(self, pos):
257 257 del self.p.index[pos]
258 258 def insert(self, pos, e):
259 259 self.p.index.insert(pos, _pack(indexformatng, *e))
260 260 def append(self, e):
261 261 self.p.index.append(_pack(indexformatng, *e))
262 262
263 263 class lazymap(object):
264 264 """a lazy version of the node map"""
265 265 def __init__(self, parser):
266 266 self.p = parser
267 267 def load(self, key):
268 268 n = self.p.findnode(key)
269 269 if n == None:
270 270 raise KeyError(key)
271 271 def __contains__(self, key):
272 272 if key in self.p.map:
273 273 return True
274 274 self.p.loadmap()
275 275 return key in self.p.map
276 276 def __iter__(self):
277 277 yield nullid
278 278 for i in xrange(self.p.l):
279 279 ret = self.p.index[i]
280 280 if not ret:
281 281 self.p.loadindex(i)
282 282 ret = self.p.index[i]
283 283 if isinstance(ret, str):
284 284 ret = _unpack(indexformatng, ret)
285 285 yield ret[7]
286 286 def __getitem__(self, key):
287 287 try:
288 288 return self.p.map[key]
289 289 except KeyError:
290 290 try:
291 291 self.load(key)
292 292 return self.p.map[key]
293 293 except KeyError:
294 294 raise KeyError("node " + hex(key))
295 295 def __setitem__(self, key, val):
296 296 self.p.map[key] = val
297 297 def __delitem__(self, key):
298 298 del self.p.map[key]
299 299
300 300 indexformatv0 = ">4l20s20s20s"
301 301 v0shaoffset = 56
302 302
303 303 class revlogoldio(object):
304 304 def __init__(self):
305 305 self.size = struct.calcsize(indexformatv0)
306 306
307 307 def parseindex(self, fp, inline):
308 308 s = self.size
309 309 index = []
310 310 nodemap = {nullid: nullrev}
311 311 n = off = 0
312 312 data = fp.read()
313 313 l = len(data)
314 314 while off + s <= l:
315 315 cur = data[off:off + s]
316 316 off += s
317 317 e = _unpack(indexformatv0, cur)
318 318 # transform to revlogv1 format
319 319 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
320 320 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
321 321 index.append(e2)
322 322 nodemap[e[6]] = n
323 323 n += 1
324 324
325 325 return index, nodemap, None
326 326
327 327 def packentry(self, entry, node, version, rev):
328 328 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
329 329 node(entry[5]), node(entry[6]), entry[7])
330 330 return _pack(indexformatv0, *e2)
331 331
332 332 # index ng:
333 333 # 6 bytes offset
334 334 # 2 bytes flags
335 335 # 4 bytes compressed length
336 336 # 4 bytes uncompressed length
337 337 # 4 bytes: base rev
338 338 # 4 bytes link rev
339 339 # 4 bytes parent 1 rev
340 340 # 4 bytes parent 2 rev
341 341 # 32 bytes: nodeid
342 342 indexformatng = ">Qiiiiii20s12x"
343 343 ngshaoffset = 32
344 344 versionformat = ">I"
345 345
346 346 class revlogio(object):
347 347 def __init__(self):
348 348 self.size = struct.calcsize(indexformatng)
349 349
350 350 def parseindex(self, fp, inline):
351 351 try:
352 352 size = util.fstat(fp).st_size
353 353 except AttributeError:
354 354 size = 0
355 355
356 356 if util.openhardlinks() and not inline and size > 1000000:
357 357 # big index, let's parse it on demand
358 358 parser = lazyparser(fp, size)
359 359 index = lazyindex(parser)
360 360 nodemap = lazymap(parser)
361 361 e = list(index[0])
362 362 type = gettype(e[0])
363 363 e[0] = offset_type(0, type)
364 364 index[0] = e
365 365 return index, nodemap, None
366 366
367 367 data = fp.read()
368 368 # call the C implementation to parse the index data
369 369 index, nodemap, cache = parsers.parse_index(data, inline)
370 370 return index, nodemap, cache
371 371
372 372 def packentry(self, entry, node, version, rev):
373 373 p = _pack(indexformatng, *entry)
374 374 if rev == 0:
375 375 p = _pack(versionformat, version) + p[4:]
376 376 return p
377 377
378 378 class revlog(object):
379 379 """
380 380 the underlying revision storage object
381 381
382 382 A revlog consists of two parts, an index and the revision data.
383 383
384 384 The index is a file with a fixed record size containing
385 385 information on each revision, including its nodeid (hash), the
386 386 nodeids of its parents, the position and offset of its data within
387 387 the data file, and the revision it's based on. Finally, each entry
388 388 contains a linkrev entry that can serve as a pointer to external
389 389 data.
390 390
391 391 The revision data itself is a linear collection of data chunks.
392 392 Each chunk represents a revision and is usually represented as a
393 393 delta against the previous chunk. To bound lookup time, runs of
394 394 deltas are limited to about 2 times the length of the original
395 395 version data. This makes retrieval of a version proportional to
396 396 its size, or O(1) relative to the number of revisions.
397 397
398 398 Both pieces of the revlog are written to in an append-only
399 399 fashion, which means we never need to rewrite a file to insert or
400 400 remove data, and can use some simple techniques to avoid the need
401 401 for locking while reading.
402 402 """
403 403 def __init__(self, opener, indexfile):
404 404 """
405 405 create a revlog object
406 406
407 407 opener is a function that abstracts the file opening operation
408 408 and can be used to implement COW semantics or the like.
409 409 """
410 410 self.indexfile = indexfile
411 411 self.datafile = indexfile[:-2] + ".d"
412 412 self.opener = opener
413 413 self._cache = None
414 414 self._chunkcache = None
415 415 self.nodemap = {nullid: nullrev}
416 416 self.index = []
417 417
418 418 v = REVLOG_DEFAULT_VERSION
419 419 if hasattr(opener, "defversion"):
420 420 v = opener.defversion
421 421 if v & REVLOGNG:
422 422 v |= REVLOGNGINLINEDATA
423 423
424 424 i = ""
425 425 try:
426 426 f = self.opener(self.indexfile)
427 427 i = f.read(4)
428 428 f.seek(0)
429 429 if len(i) > 0:
430 430 v = struct.unpack(versionformat, i)[0]
431 431 except IOError, inst:
432 432 if inst.errno != errno.ENOENT:
433 433 raise
434 434
435 435 self.version = v
436 436 self._inline = v & REVLOGNGINLINEDATA
437 437 flags = v & ~0xFFFF
438 438 fmt = v & 0xFFFF
439 439 if fmt == REVLOGV0 and flags:
440 440 raise RevlogError(_("index %s unknown flags %#04x for format v0")
441 441 % (self.indexfile, flags >> 16))
442 442 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
443 443 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
444 444 % (self.indexfile, flags >> 16))
445 445 elif fmt > REVLOGNG:
446 446 raise RevlogError(_("index %s unknown format %d")
447 447 % (self.indexfile, fmt))
448 448
449 449 self._io = revlogio()
450 450 if self.version == REVLOGV0:
451 451 self._io = revlogoldio()
452 452 if i:
453 d = self._io.parseindex(f, self._inline)
453 try:
454 d = self._io.parseindex(f, self._inline)
455 except (ValueError, IndexError), e:
456 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
454 457 self.index, self.nodemap, self._chunkcache = d
455 458
456 459 # add the magic null revision at -1 (if it hasn't been done already)
457 460 if (self.index == [] or isinstance(self.index, lazyindex) or
458 461 self.index[-1][7] != nullid) :
459 462 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
460 463
461 464 def _loadindex(self, start, end):
462 465 """load a block of indexes all at once from the lazy parser"""
463 466 if isinstance(self.index, lazyindex):
464 467 self.index.p.loadindex(start, end)
465 468
466 469 def _loadindexmap(self):
467 470 """loads both the map and the index from the lazy parser"""
468 471 if isinstance(self.index, lazyindex):
469 472 p = self.index.p
470 473 p.loadindex()
471 474 self.nodemap = p.map
472 475
473 476 def _loadmap(self):
474 477 """loads the map from the lazy parser"""
475 478 if isinstance(self.nodemap, lazymap):
476 479 self.nodemap.p.loadmap()
477 480 self.nodemap = self.nodemap.p.map
478 481
479 482 def tip(self):
480 483 return self.node(len(self.index) - 2)
481 484 def __len__(self):
482 485 return len(self.index) - 1
483 486 def __iter__(self):
484 487 for i in xrange(len(self)):
485 488 yield i
486 489 def rev(self, node):
487 490 try:
488 491 return self.nodemap[node]
489 492 except KeyError:
490 493 raise LookupError(node, self.indexfile, _('no node'))
491 494 def node(self, rev):
492 495 return self.index[rev][7]
493 496 def linkrev(self, rev):
494 497 return self.index[rev][4]
495 498 def parents(self, node):
496 499 i = self.index
497 500 d = i[self.rev(node)]
498 501 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
499 502 def parentrevs(self, rev):
500 503 return self.index[rev][5:7]
501 504 def start(self, rev):
502 505 return int(self.index[rev][0] >> 16)
503 506 def end(self, rev):
504 507 return self.start(rev) + self.length(rev)
505 508 def length(self, rev):
506 509 return self.index[rev][1]
507 510 def base(self, rev):
508 511 return self.index[rev][3]
509 512
510 513 def size(self, rev):
511 514 """return the length of the uncompressed text for a given revision"""
512 515 l = self.index[rev][2]
513 516 if l >= 0:
514 517 return l
515 518
516 519 t = self.revision(self.node(rev))
517 520 return len(t)
518 521
519 522 # alternate implementation, The advantage to this code is it
520 523 # will be faster for a single revision. But, the results are not
521 524 # cached, so finding the size of every revision will be slower.
522 525 """
523 526 if self.cache and self.cache[1] == rev:
524 527 return len(self.cache[2])
525 528
526 529 base = self.base(rev)
527 530 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
528 531 base = self.cache[1]
529 532 text = self.cache[2]
530 533 else:
531 534 text = self.revision(self.node(base))
532 535
533 536 l = len(text)
534 537 for x in xrange(base + 1, rev + 1):
535 538 l = mdiff.patchedsize(l, self.chunk(x))
536 539 return l
537 540 """
538 541
539 542 def reachable(self, node, stop=None):
540 543 """return a hash of all nodes ancestral to a given node, including
541 544 the node itself, stopping when stop is matched"""
542 545 reachable = {}
543 546 visit = [node]
544 547 reachable[node] = 1
545 548 if stop:
546 549 stopn = self.rev(stop)
547 550 else:
548 551 stopn = 0
549 552 while visit:
550 553 n = visit.pop(0)
551 554 if n == stop:
552 555 continue
553 556 if n == nullid:
554 557 continue
555 558 for p in self.parents(n):
556 559 if self.rev(p) < stopn:
557 560 continue
558 561 if p not in reachable:
559 562 reachable[p] = 1
560 563 visit.append(p)
561 564 return reachable
562 565
563 566 def ancestors(self, *revs):
564 567 'Generate the ancestors of revs using a breadth-first visit'
565 568 visit = list(revs)
566 569 seen = util.set([nullrev])
567 570 while visit:
568 571 for parent in self.parentrevs(visit.pop(0)):
569 572 if parent not in seen:
570 573 visit.append(parent)
571 574 seen.add(parent)
572 575 yield parent
573 576
574 577 def descendants(self, *revs):
575 578 'Generate the descendants of revs in topological order'
576 579 seen = util.set(revs)
577 580 for i in xrange(min(revs) + 1, len(self)):
578 581 for x in self.parentrevs(i):
579 582 if x != nullrev and x in seen:
580 583 seen.add(i)
581 584 yield i
582 585 break
583 586
584 587 def findmissing(self, common=None, heads=None):
585 588 '''
586 589 returns the topologically sorted list of nodes from the set:
587 590 missing = (ancestors(heads) \ ancestors(common))
588 591
589 592 where ancestors() is the set of ancestors from heads, heads included
590 593
591 594 if heads is None, the heads of the revlog are used
592 595 if common is None, nullid is assumed to be a common node
593 596 '''
594 597 if common is None:
595 598 common = [nullid]
596 599 if heads is None:
597 600 heads = self.heads()
598 601
599 602 common = [self.rev(n) for n in common]
600 603 heads = [self.rev(n) for n in heads]
601 604
602 605 # we want the ancestors, but inclusive
603 606 has = dict.fromkeys(self.ancestors(*common))
604 607 has[nullrev] = None
605 608 for r in common:
606 609 has[r] = None
607 610
608 611 # take all ancestors from heads that aren't in has
609 612 missing = {}
610 613 visit = [r for r in heads if r not in has]
611 614 while visit:
612 615 r = visit.pop(0)
613 616 if r in missing:
614 617 continue
615 618 else:
616 619 missing[r] = None
617 620 for p in self.parentrevs(r):
618 621 if p not in has:
619 622 visit.append(p)
620 623 missing = missing.keys()
621 624 missing.sort()
622 625 return [self.node(r) for r in missing]
623 626
624 627 def nodesbetween(self, roots=None, heads=None):
625 628 """Return a tuple containing three elements. Elements 1 and 2 contain
626 629 a final list bases and heads after all the unreachable ones have been
627 630 pruned. Element 0 contains a topologically sorted list of all
628 631
629 632 nodes that satisfy these constraints:
630 633 1. All nodes must be descended from a node in roots (the nodes on
631 634 roots are considered descended from themselves).
632 635 2. All nodes must also be ancestors of a node in heads (the nodes in
633 636 heads are considered to be their own ancestors).
634 637
635 638 If roots is unspecified, nullid is assumed as the only root.
636 639 If heads is unspecified, it is taken to be the output of the
637 640 heads method (i.e. a list of all nodes in the repository that
638 641 have no children)."""
639 642 nonodes = ([], [], [])
640 643 if roots is not None:
641 644 roots = list(roots)
642 645 if not roots:
643 646 return nonodes
644 647 lowestrev = min([self.rev(n) for n in roots])
645 648 else:
646 649 roots = [nullid] # Everybody's a descendent of nullid
647 650 lowestrev = nullrev
648 651 if (lowestrev == nullrev) and (heads is None):
649 652 # We want _all_ the nodes!
650 653 return ([self.node(r) for r in self], [nullid], list(self.heads()))
651 654 if heads is None:
652 655 # All nodes are ancestors, so the latest ancestor is the last
653 656 # node.
654 657 highestrev = len(self) - 1
655 658 # Set ancestors to None to signal that every node is an ancestor.
656 659 ancestors = None
657 660 # Set heads to an empty dictionary for later discovery of heads
658 661 heads = {}
659 662 else:
660 663 heads = list(heads)
661 664 if not heads:
662 665 return nonodes
663 666 ancestors = {}
664 667 # Turn heads into a dictionary so we can remove 'fake' heads.
665 668 # Also, later we will be using it to filter out the heads we can't
666 669 # find from roots.
667 670 heads = dict.fromkeys(heads, 0)
668 671 # Start at the top and keep marking parents until we're done.
669 672 nodestotag = heads.keys()
670 673 # Remember where the top was so we can use it as a limit later.
671 674 highestrev = max([self.rev(n) for n in nodestotag])
672 675 while nodestotag:
673 676 # grab a node to tag
674 677 n = nodestotag.pop()
675 678 # Never tag nullid
676 679 if n == nullid:
677 680 continue
678 681 # A node's revision number represents its place in a
679 682 # topologically sorted list of nodes.
680 683 r = self.rev(n)
681 684 if r >= lowestrev:
682 685 if n not in ancestors:
683 686 # If we are possibly a descendent of one of the roots
684 687 # and we haven't already been marked as an ancestor
685 688 ancestors[n] = 1 # Mark as ancestor
686 689 # Add non-nullid parents to list of nodes to tag.
687 690 nodestotag.extend([p for p in self.parents(n) if
688 691 p != nullid])
689 692 elif n in heads: # We've seen it before, is it a fake head?
690 693 # So it is, real heads should not be the ancestors of
691 694 # any other heads.
692 695 heads.pop(n)
693 696 if not ancestors:
694 697 return nonodes
695 698 # Now that we have our set of ancestors, we want to remove any
696 699 # roots that are not ancestors.
697 700
698 701 # If one of the roots was nullid, everything is included anyway.
699 702 if lowestrev > nullrev:
700 703 # But, since we weren't, let's recompute the lowest rev to not
701 704 # include roots that aren't ancestors.
702 705
703 706 # Filter out roots that aren't ancestors of heads
704 707 roots = [n for n in roots if n in ancestors]
705 708 # Recompute the lowest revision
706 709 if roots:
707 710 lowestrev = min([self.rev(n) for n in roots])
708 711 else:
709 712 # No more roots? Return empty list
710 713 return nonodes
711 714 else:
712 715 # We are descending from nullid, and don't need to care about
713 716 # any other roots.
714 717 lowestrev = nullrev
715 718 roots = [nullid]
716 719 # Transform our roots list into a 'set' (i.e. a dictionary where the
717 720 # values don't matter.
718 721 descendents = dict.fromkeys(roots, 1)
719 722 # Also, keep the original roots so we can filter out roots that aren't
720 723 # 'real' roots (i.e. are descended from other roots).
721 724 roots = descendents.copy()
722 725 # Our topologically sorted list of output nodes.
723 726 orderedout = []
724 727 # Don't start at nullid since we don't want nullid in our output list,
725 728 # and if nullid shows up in descedents, empty parents will look like
726 729 # they're descendents.
727 730 for r in xrange(max(lowestrev, 0), highestrev + 1):
728 731 n = self.node(r)
729 732 isdescendent = False
730 733 if lowestrev == nullrev: # Everybody is a descendent of nullid
731 734 isdescendent = True
732 735 elif n in descendents:
733 736 # n is already a descendent
734 737 isdescendent = True
735 738 # This check only needs to be done here because all the roots
736 739 # will start being marked is descendents before the loop.
737 740 if n in roots:
738 741 # If n was a root, check if it's a 'real' root.
739 742 p = tuple(self.parents(n))
740 743 # If any of its parents are descendents, it's not a root.
741 744 if (p[0] in descendents) or (p[1] in descendents):
742 745 roots.pop(n)
743 746 else:
744 747 p = tuple(self.parents(n))
745 748 # A node is a descendent if either of its parents are
746 749 # descendents. (We seeded the dependents list with the roots
747 750 # up there, remember?)
748 751 if (p[0] in descendents) or (p[1] in descendents):
749 752 descendents[n] = 1
750 753 isdescendent = True
751 754 if isdescendent and ((ancestors is None) or (n in ancestors)):
752 755 # Only include nodes that are both descendents and ancestors.
753 756 orderedout.append(n)
754 757 if (ancestors is not None) and (n in heads):
755 758 # We're trying to figure out which heads are reachable
756 759 # from roots.
757 760 # Mark this head as having been reached
758 761 heads[n] = 1
759 762 elif ancestors is None:
760 763 # Otherwise, we're trying to discover the heads.
761 764 # Assume this is a head because if it isn't, the next step
762 765 # will eventually remove it.
763 766 heads[n] = 1
764 767 # But, obviously its parents aren't.
765 768 for p in self.parents(n):
766 769 heads.pop(p, None)
767 770 heads = [n for n in heads.iterkeys() if heads[n] != 0]
768 771 roots = roots.keys()
769 772 assert orderedout
770 773 assert roots
771 774 assert heads
772 775 return (orderedout, roots, heads)
773 776
774 777 def heads(self, start=None, stop=None):
775 778 """return the list of all nodes that have no children
776 779
777 780 if start is specified, only heads that are descendants of
778 781 start will be returned
779 782 if stop is specified, it will consider all the revs from stop
780 783 as if they had no children
781 784 """
782 785 if start is None and stop is None:
783 786 count = len(self)
784 787 if not count:
785 788 return [nullid]
786 789 ishead = [1] * (count + 1)
787 790 index = self.index
788 791 for r in xrange(count):
789 792 e = index[r]
790 793 ishead[e[5]] = ishead[e[6]] = 0
791 794 return [self.node(r) for r in xrange(count) if ishead[r]]
792 795
793 796 if start is None:
794 797 start = nullid
795 798 if stop is None:
796 799 stop = []
797 800 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
798 801 startrev = self.rev(start)
799 802 reachable = {startrev: 1}
800 803 heads = {startrev: 1}
801 804
802 805 parentrevs = self.parentrevs
803 806 for r in xrange(startrev + 1, len(self)):
804 807 for p in parentrevs(r):
805 808 if p in reachable:
806 809 if r not in stoprevs:
807 810 reachable[r] = 1
808 811 heads[r] = 1
809 812 if p in heads and p not in stoprevs:
810 813 del heads[p]
811 814
812 815 return [self.node(r) for r in heads]
813 816
814 817 def children(self, node):
815 818 """find the children of a given node"""
816 819 c = []
817 820 p = self.rev(node)
818 821 for r in range(p + 1, len(self)):
819 822 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
820 823 if prevs:
821 824 for pr in prevs:
822 825 if pr == p:
823 826 c.append(self.node(r))
824 827 elif p == nullrev:
825 828 c.append(self.node(r))
826 829 return c
827 830
828 831 def _match(self, id):
829 832 if isinstance(id, (long, int)):
830 833 # rev
831 834 return self.node(id)
832 835 if len(id) == 20:
833 836 # possibly a binary node
834 837 # odds of a binary node being all hex in ASCII are 1 in 10**25
835 838 try:
836 839 node = id
837 840 r = self.rev(node) # quick search the index
838 841 return node
839 842 except LookupError:
840 843 pass # may be partial hex id
841 844 try:
842 845 # str(rev)
843 846 rev = int(id)
844 847 if str(rev) != id:
845 848 raise ValueError
846 849 if rev < 0:
847 850 rev = len(self) + rev
848 851 if rev < 0 or rev >= len(self):
849 852 raise ValueError
850 853 return self.node(rev)
851 854 except (ValueError, OverflowError):
852 855 pass
853 856 if len(id) == 40:
854 857 try:
855 858 # a full hex nodeid?
856 859 node = bin(id)
857 860 r = self.rev(node)
858 861 return node
859 862 except (TypeError, LookupError):
860 863 pass
861 864
862 865 def _partialmatch(self, id):
863 866 if len(id) < 40:
864 867 try:
865 868 # hex(node)[:...]
866 869 l = len(id) / 2 # grab an even number of digits
867 870 bin_id = bin(id[:l*2])
868 871 nl = [n for n in self.nodemap if n[:l] == bin_id]
869 872 nl = [n for n in nl if hex(n).startswith(id)]
870 873 if len(nl) > 0:
871 874 if len(nl) == 1:
872 875 return nl[0]
873 876 raise LookupError(id, self.indexfile,
874 877 _('ambiguous identifier'))
875 878 return None
876 879 except TypeError:
877 880 pass
878 881
879 882 def lookup(self, id):
880 883 """locate a node based on:
881 884 - revision number or str(revision number)
882 885 - nodeid or subset of hex nodeid
883 886 """
884 887 n = self._match(id)
885 888 if n is not None:
886 889 return n
887 890 n = self._partialmatch(id)
888 891 if n:
889 892 return n
890 893
891 894 raise LookupError(id, self.indexfile, _('no match found'))
892 895
893 896 def cmp(self, node, text):
894 897 """compare text with a given file revision"""
895 898 p1, p2 = self.parents(node)
896 899 return hash(text, p1, p2) != node
897 900
898 901 def chunk(self, rev, df=None):
899 902 def loadcache(df):
900 903 if not df:
901 904 if self._inline:
902 905 df = self.opener(self.indexfile)
903 906 else:
904 907 df = self.opener(self.datafile)
905 908 df.seek(start)
906 909 self._chunkcache = (start, df.read(cache_length))
907 910
908 911 start, length = self.start(rev), self.length(rev)
909 912 if self._inline:
910 913 start += (rev + 1) * self._io.size
911 914 end = start + length
912 915
913 916 offset = 0
914 917 if not self._chunkcache:
915 918 cache_length = max(65536, length)
916 919 loadcache(df)
917 920 else:
918 921 cache_start = self._chunkcache[0]
919 922 cache_length = len(self._chunkcache[1])
920 923 cache_end = cache_start + cache_length
921 924 if start >= cache_start and end <= cache_end:
922 925 # it is cached
923 926 offset = start - cache_start
924 927 else:
925 928 cache_length = max(65536, length)
926 929 loadcache(df)
927 930
928 931 # avoid copying large chunks
929 932 c = self._chunkcache[1]
930 933 if cache_length != length:
931 934 c = c[offset:offset + length]
932 935
933 936 return decompress(c)
934 937
935 938 def revdiff(self, rev1, rev2):
936 939 """return or calculate a delta between two revisions"""
937 940 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
938 941 return self.chunk(rev2)
939 942
940 943 return mdiff.textdiff(self.revision(self.node(rev1)),
941 944 self.revision(self.node(rev2)))
942 945
943 946 def revision(self, node):
944 947 """return an uncompressed revision of a given node"""
945 948 if node == nullid:
946 949 return ""
947 950 if self._cache and self._cache[0] == node:
948 951 return str(self._cache[2])
949 952
950 953 # look up what we need to read
951 954 text = None
952 955 rev = self.rev(node)
953 956 base = self.base(rev)
954 957
955 958 # check rev flags
956 959 if self.index[rev][0] & 0xFFFF:
957 960 raise RevlogError(_('incompatible revision flag %x') %
958 961 (self.index[rev][0] & 0xFFFF))
959 962
960 963 df = None
961 964
962 965 # do we have useful data cached?
963 966 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
964 967 base = self._cache[1]
965 968 text = str(self._cache[2])
966 969 self._loadindex(base, rev + 1)
967 970 if not self._inline and rev > base + 1:
968 971 df = self.opener(self.datafile)
969 972 else:
970 973 self._loadindex(base, rev + 1)
971 974 if not self._inline and rev > base:
972 975 df = self.opener(self.datafile)
973 976 text = self.chunk(base, df=df)
974 977
975 978 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
976 979 text = mdiff.patches(text, bins)
977 980 p1, p2 = self.parents(node)
978 981 if node != hash(text, p1, p2):
979 982 raise RevlogError(_("integrity check failed on %s:%d")
980 983 % (self.datafile, rev))
981 984
982 985 self._cache = (node, rev, text)
983 986 return text
984 987
985 988 def checkinlinesize(self, tr, fp=None):
986 989 if not self._inline:
987 990 return
988 991 if not fp:
989 992 fp = self.opener(self.indexfile, 'r')
990 993 fp.seek(0, 2)
991 994 size = fp.tell()
992 995 if size < 131072:
993 996 return
994 997 trinfo = tr.find(self.indexfile)
995 998 if trinfo == None:
996 999 raise RevlogError(_("%s not found in the transaction")
997 1000 % self.indexfile)
998 1001
999 1002 trindex = trinfo[2]
1000 1003 dataoff = self.start(trindex)
1001 1004
1002 1005 tr.add(self.datafile, dataoff)
1003 1006 df = self.opener(self.datafile, 'w')
1004 1007 try:
1005 1008 calc = self._io.size
1006 1009 for r in self:
1007 1010 start = self.start(r) + (r + 1) * calc
1008 1011 length = self.length(r)
1009 1012 fp.seek(start)
1010 1013 d = fp.read(length)
1011 1014 df.write(d)
1012 1015 finally:
1013 1016 df.close()
1014 1017
1015 1018 fp.close()
1016 1019 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1017 1020 self.version &= ~(REVLOGNGINLINEDATA)
1018 1021 self._inline = False
1019 1022 for i in self:
1020 1023 e = self._io.packentry(self.index[i], self.node, self.version, i)
1021 1024 fp.write(e)
1022 1025
1023 1026 # if we don't call rename, the temp file will never replace the
1024 1027 # real index
1025 1028 fp.rename()
1026 1029
1027 1030 tr.replace(self.indexfile, trindex * calc)
1028 1031 self._chunkcache = None
1029 1032
1030 1033 def addrevision(self, text, transaction, link, p1, p2, d=None):
1031 1034 """add a revision to the log
1032 1035
1033 1036 text - the revision data to add
1034 1037 transaction - the transaction object used for rollback
1035 1038 link - the linkrev data to add
1036 1039 p1, p2 - the parent nodeids of the revision
1037 1040 d - an optional precomputed delta
1038 1041 """
1039 1042 dfh = None
1040 1043 if not self._inline:
1041 1044 dfh = self.opener(self.datafile, "a")
1042 1045 ifh = self.opener(self.indexfile, "a+")
1043 1046 try:
1044 1047 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1045 1048 finally:
1046 1049 if dfh:
1047 1050 dfh.close()
1048 1051 ifh.close()
1049 1052
1050 1053 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1051 1054 node = hash(text, p1, p2)
1052 1055 if node in self.nodemap:
1053 1056 return node
1054 1057
1055 1058 curr = len(self)
1056 1059 prev = curr - 1
1057 1060 base = self.base(prev)
1058 1061 offset = self.end(prev)
1059 1062
1060 1063 if curr:
1061 1064 if not d:
1062 1065 ptext = self.revision(self.node(prev))
1063 1066 d = mdiff.textdiff(ptext, text)
1064 1067 data = compress(d)
1065 1068 l = len(data[1]) + len(data[0])
1066 1069 dist = l + offset - self.start(base)
1067 1070
1068 1071 # full versions are inserted when the needed deltas
1069 1072 # become comparable to the uncompressed text
1070 1073 if not curr or dist > len(text) * 2:
1071 1074 data = compress(text)
1072 1075 l = len(data[1]) + len(data[0])
1073 1076 base = curr
1074 1077
1075 1078 e = (offset_type(offset, 0), l, len(text),
1076 1079 base, link, self.rev(p1), self.rev(p2), node)
1077 1080 self.index.insert(-1, e)
1078 1081 self.nodemap[node] = curr
1079 1082
1080 1083 entry = self._io.packentry(e, self.node, self.version, curr)
1081 1084 if not self._inline:
1082 1085 transaction.add(self.datafile, offset)
1083 1086 transaction.add(self.indexfile, curr * len(entry))
1084 1087 if data[0]:
1085 1088 dfh.write(data[0])
1086 1089 dfh.write(data[1])
1087 1090 dfh.flush()
1088 1091 ifh.write(entry)
1089 1092 else:
1090 1093 offset += curr * self._io.size
1091 1094 transaction.add(self.indexfile, offset, curr)
1092 1095 ifh.write(entry)
1093 1096 ifh.write(data[0])
1094 1097 ifh.write(data[1])
1095 1098 self.checkinlinesize(transaction, ifh)
1096 1099
1097 1100 self._cache = (node, curr, text)
1098 1101 return node
1099 1102
1100 1103 def ancestor(self, a, b):
1101 1104 """calculate the least common ancestor of nodes a and b"""
1102 1105
1103 1106 def parents(rev):
1104 1107 return [p for p in self.parentrevs(rev) if p != nullrev]
1105 1108
1106 1109 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1107 1110 if c is None:
1108 1111 return nullid
1109 1112
1110 1113 return self.node(c)
1111 1114
1112 1115 def group(self, nodelist, lookup, infocollect=None):
1113 1116 """calculate a delta group
1114 1117
1115 1118 Given a list of changeset revs, return a set of deltas and
1116 1119 metadata corresponding to nodes. the first delta is
1117 1120 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1118 1121 have this parent as it has all history before these
1119 1122 changesets. parent is parent[0]
1120 1123 """
1121 1124 revs = [self.rev(n) for n in nodelist]
1122 1125
1123 1126 # if we don't have any revisions touched by these changesets, bail
1124 1127 if not revs:
1125 1128 yield changegroup.closechunk()
1126 1129 return
1127 1130
1128 1131 # add the parent of the first rev
1129 1132 p = self.parents(self.node(revs[0]))[0]
1130 1133 revs.insert(0, self.rev(p))
1131 1134
1132 1135 # build deltas
1133 1136 for d in xrange(0, len(revs) - 1):
1134 1137 a, b = revs[d], revs[d + 1]
1135 1138 nb = self.node(b)
1136 1139
1137 1140 if infocollect is not None:
1138 1141 infocollect(nb)
1139 1142
1140 1143 p = self.parents(nb)
1141 1144 meta = nb + p[0] + p[1] + lookup(nb)
1142 1145 if a == -1:
1143 1146 d = self.revision(nb)
1144 1147 meta += mdiff.trivialdiffheader(len(d))
1145 1148 else:
1146 1149 d = self.revdiff(a, b)
1147 1150 yield changegroup.chunkheader(len(meta) + len(d))
1148 1151 yield meta
1149 1152 if len(d) > 2**20:
1150 1153 pos = 0
1151 1154 while pos < len(d):
1152 1155 pos2 = pos + 2 ** 18
1153 1156 yield d[pos:pos2]
1154 1157 pos = pos2
1155 1158 else:
1156 1159 yield d
1157 1160
1158 1161 yield changegroup.closechunk()
1159 1162
1160 1163 def addgroup(self, revs, linkmapper, transaction):
1161 1164 """
1162 1165 add a delta group
1163 1166
1164 1167 given a set of deltas, add them to the revision log. the
1165 1168 first delta is against its parent, which should be in our
1166 1169 log, the rest are against the previous delta.
1167 1170 """
1168 1171
1169 1172 #track the base of the current delta log
1170 1173 r = len(self)
1171 1174 t = r - 1
1172 1175 node = None
1173 1176
1174 1177 base = prev = nullrev
1175 1178 start = end = textlen = 0
1176 1179 if r:
1177 1180 end = self.end(t)
1178 1181
1179 1182 ifh = self.opener(self.indexfile, "a+")
1180 1183 isize = r * self._io.size
1181 1184 if self._inline:
1182 1185 transaction.add(self.indexfile, end + isize, r)
1183 1186 dfh = None
1184 1187 else:
1185 1188 transaction.add(self.indexfile, isize, r)
1186 1189 transaction.add(self.datafile, end)
1187 1190 dfh = self.opener(self.datafile, "a")
1188 1191
1189 1192 try:
1190 1193 # loop through our set of deltas
1191 1194 chain = None
1192 1195 for chunk in revs:
1193 1196 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1194 1197 link = linkmapper(cs)
1195 1198 if node in self.nodemap:
1196 1199 # this can happen if two branches make the same change
1197 1200 chain = node
1198 1201 continue
1199 1202 delta = buffer(chunk, 80)
1200 1203 del chunk
1201 1204
1202 1205 for p in (p1, p2):
1203 1206 if not p in self.nodemap:
1204 1207 raise LookupError(p, self.indexfile, _('unknown parent'))
1205 1208
1206 1209 if not chain:
1207 1210 # retrieve the parent revision of the delta chain
1208 1211 chain = p1
1209 1212 if not chain in self.nodemap:
1210 1213 raise LookupError(chain, self.indexfile, _('unknown base'))
1211 1214
1212 1215 # full versions are inserted when the needed deltas become
1213 1216 # comparable to the uncompressed text or when the previous
1214 1217 # version is not the one we have a delta against. We use
1215 1218 # the size of the previous full rev as a proxy for the
1216 1219 # current size.
1217 1220
1218 1221 if chain == prev:
1219 1222 cdelta = compress(delta)
1220 1223 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1221 1224 textlen = mdiff.patchedsize(textlen, delta)
1222 1225
1223 1226 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1224 1227 # flush our writes here so we can read it in revision
1225 1228 if dfh:
1226 1229 dfh.flush()
1227 1230 ifh.flush()
1228 1231 text = self.revision(chain)
1229 1232 if len(text) == 0:
1230 1233 # skip over trivial delta header
1231 1234 text = buffer(delta, 12)
1232 1235 else:
1233 1236 text = mdiff.patches(text, [delta])
1234 1237 del delta
1235 1238 chk = self._addrevision(text, transaction, link, p1, p2, None,
1236 1239 ifh, dfh)
1237 1240 if not dfh and not self._inline:
1238 1241 # addrevision switched from inline to conventional
1239 1242 # reopen the index
1240 1243 dfh = self.opener(self.datafile, "a")
1241 1244 ifh = self.opener(self.indexfile, "a")
1242 1245 if chk != node:
1243 1246 raise RevlogError(_("consistency error adding group"))
1244 1247 textlen = len(text)
1245 1248 else:
1246 1249 e = (offset_type(end, 0), cdeltalen, textlen, base,
1247 1250 link, self.rev(p1), self.rev(p2), node)
1248 1251 self.index.insert(-1, e)
1249 1252 self.nodemap[node] = r
1250 1253 entry = self._io.packentry(e, self.node, self.version, r)
1251 1254 if self._inline:
1252 1255 ifh.write(entry)
1253 1256 ifh.write(cdelta[0])
1254 1257 ifh.write(cdelta[1])
1255 1258 self.checkinlinesize(transaction, ifh)
1256 1259 if not self._inline:
1257 1260 dfh = self.opener(self.datafile, "a")
1258 1261 ifh = self.opener(self.indexfile, "a")
1259 1262 else:
1260 1263 dfh.write(cdelta[0])
1261 1264 dfh.write(cdelta[1])
1262 1265 ifh.write(entry)
1263 1266
1264 1267 t, r, chain, prev = r, r + 1, node, node
1265 1268 base = self.base(t)
1266 1269 start = self.start(base)
1267 1270 end = self.end(t)
1268 1271 finally:
1269 1272 if dfh:
1270 1273 dfh.close()
1271 1274 ifh.close()
1272 1275
1273 1276 return node
1274 1277
1275 1278 def strip(self, minlink):
1276 1279 """truncate the revlog on the first revision with a linkrev >= minlink
1277 1280
1278 1281 This function is called when we're stripping revision minlink and
1279 1282 its descendants from the repository.
1280 1283
1281 1284 We have to remove all revisions with linkrev >= minlink, because
1282 1285 the equivalent changelog revisions will be renumbered after the
1283 1286 strip.
1284 1287
1285 1288 So we truncate the revlog on the first of these revisions, and
1286 1289 trust that the caller has saved the revisions that shouldn't be
1287 1290 removed and that it'll readd them after this truncation.
1288 1291 """
1289 1292 if len(self) == 0:
1290 1293 return
1291 1294
1292 1295 if isinstance(self.index, lazyindex):
1293 1296 self._loadindexmap()
1294 1297
1295 1298 for rev in self:
1296 1299 if self.index[rev][4] >= minlink:
1297 1300 break
1298 1301 else:
1299 1302 return
1300 1303
1301 1304 # first truncate the files on disk
1302 1305 end = self.start(rev)
1303 1306 if not self._inline:
1304 1307 df = self.opener(self.datafile, "a")
1305 1308 df.truncate(end)
1306 1309 end = rev * self._io.size
1307 1310 else:
1308 1311 end += rev * self._io.size
1309 1312
1310 1313 indexf = self.opener(self.indexfile, "a")
1311 1314 indexf.truncate(end)
1312 1315
1313 1316 # then reset internal state in memory to forget those revisions
1314 1317 self._cache = None
1315 1318 self._chunkcache = None
1316 1319 for x in xrange(rev, len(self)):
1317 1320 del self.nodemap[self.node(x)]
1318 1321
1319 1322 del self.index[rev:-1]
1320 1323
1321 1324 def checksize(self):
1322 1325 expected = 0
1323 1326 if len(self):
1324 1327 expected = max(0, self.end(len(self) - 1))
1325 1328
1326 1329 try:
1327 1330 f = self.opener(self.datafile)
1328 1331 f.seek(0, 2)
1329 1332 actual = f.tell()
1330 1333 dd = actual - expected
1331 1334 except IOError, inst:
1332 1335 if inst.errno != errno.ENOENT:
1333 1336 raise
1334 1337 dd = 0
1335 1338
1336 1339 try:
1337 1340 f = self.opener(self.indexfile)
1338 1341 f.seek(0, 2)
1339 1342 actual = f.tell()
1340 1343 s = self._io.size
1341 1344 i = max(0, actual / s)
1342 1345 di = actual - (i * s)
1343 1346 if self._inline:
1344 1347 databytes = 0
1345 1348 for r in self:
1346 1349 databytes += max(0, self.length(r))
1347 1350 dd = 0
1348 1351 di = actual - len(self) * s - databytes
1349 1352 except IOError, inst:
1350 1353 if inst.errno != errno.ENOENT:
1351 1354 raise
1352 1355 di = 0
1353 1356
1354 1357 return (dd, di)
1355 1358
1356 1359 def files(self):
1357 1360 res = [ self.indexfile ]
1358 1361 if not self._inline:
1359 1362 res.append(self.datafile)
1360 1363 return res
@@ -1,26 +1,45 b''
1 1 #!/bin/sh
2 2
3 3 echo % prepare repo
4 hg init
4 hg init a
5 cd a
5 6 echo "some text" > FOO.txt
6 7 echo "another text" > bar.txt
7 8 echo "more text" > QUICK.txt
8 9 hg add
9 10 hg ci -d '0 0' -mtest1
10 11
11 12 echo
12 13 echo % verify
13 14 hg verify
14 15
15 16 echo
16 17 echo % introduce some bugs in repo
17 18 cd .hg/store/data
18 19 mv _f_o_o.txt.i X_f_o_o.txt.i
19 20 mv bar.txt.i xbar.txt.i
20 21 rm _q_u_i_c_k.txt.i
21 22
22 23 echo
23 24 echo % verify
24 25 hg verify
25 26
27 cd ..
28
29 echo % test revlog corruption
30 hg init b
31 cd b
32
33 touch a
34 hg add a
35 hg ci -m a
36
37 echo 'corrupted' > b
38 head -c 20 .hg/store/data/a.i > start
39 cat start b > .hg/store/data/a.i
40
41 echo
42 echo % verify
43 hg verify
44
26 45 exit 0
@@ -1,31 +1,44 b''
1 1 % prepare repo
2 2 adding FOO.txt
3 3 adding QUICK.txt
4 4 adding bar.txt
5 5
6 6 % verify
7 7 checking changesets
8 8 checking manifests
9 9 crosschecking files in changesets and manifests
10 10 checking files
11 11 3 files, 1 changesets, 3 total revisions
12 12
13 13 % introduce some bugs in repo
14 14
15 15 % verify
16 16 checking changesets
17 17 checking manifests
18 18 crosschecking files in changesets and manifests
19 19 checking files
20 20 data/FOO.txt.i@0: missing revlog!
21 21 0: empty or missing FOO.txt
22 22 FOO.txt@0: f62022d3d590 in manifests not found
23 23 data/QUICK.txt.i@0: missing revlog!
24 24 0: empty or missing QUICK.txt
25 25 QUICK.txt@0: 88b857db8eba in manifests not found
26 26 data/bar.txt.i@0: missing revlog!
27 27 0: empty or missing bar.txt
28 28 bar.txt@0: 256559129457 in manifests not found
29 29 3 files, 1 changesets, 0 total revisions
30 30 9 integrity errors encountered!
31 31 (first damaged changeset appears to be 0)
32 % test revlog corruption
33
34 % verify
35 checking changesets
36 checking manifests
37 crosschecking files in changesets and manifests
38 checking files
39 a@0: broken revlog! (index data/a.i is corrupted)
40 warning: orphan revlog 'data/a.i'
41 1 files, 1 changesets, 0 total revisions
42 1 warnings encountered!
43 1 integrity errors encountered!
44 (first damaged changeset appears to be 0)
General Comments 0
You need to be logged in to leave comments. Login now