##// END OF EJS Templates
revlog: faster hash computation when one of the parent node is null...
Nicolas Dumazet -
r7883:c63c30ae default
parent child Browse files
Show More
@@ -1,1361 +1,1370 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 # import stuff from node for others to import from revlog
14 14 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 15 from i18n import _
16 16 import changegroup, errno, ancestor, mdiff, parsers
17 17 import struct, util, zlib, error
18 18
19 19 _pack = struct.pack
20 20 _unpack = struct.unpack
21 21 _compress = zlib.compress
22 22 _decompress = zlib.decompress
23 23 _sha = util.sha1
24 24
25 25 # revlog flags
26 26 REVLOGV0 = 0
27 27 REVLOGNG = 1
28 28 REVLOGNGINLINEDATA = (1 << 16)
29 29 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 30 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 32
33 33 RevlogError = error.RevlogError
34 34 LookupError = error.LookupError
35 35
36 36 def getoffset(q):
37 37 return int(q >> 16)
38 38
39 39 def gettype(q):
40 40 return int(q & 0xFFFF)
41 41
42 42 def offset_type(offset, type):
43 43 return long(long(offset) << 16 | type)
44 44
45 nullhash = _sha(nullid)
46
45 47 def hash(text, p1, p2):
46 48 """generate a hash from the given text and its parent hashes
47 49
48 50 This hash combines both the current file contents and its history
49 51 in a manner that makes it easy to distinguish nodes with the same
50 52 content in the revision graph.
51 53 """
52 l = [p1, p2]
53 l.sort()
54 s = _sha(l[0])
55 s.update(l[1])
54 # As of now, if one of the parent node is null, p2 is null
55 if p2 == nullid:
56 # deep copy of a hash is faster than creating one
57 s = nullhash.copy()
58 s.update(p1)
59 else:
60 # none of the parent nodes are nullid
61 l = [p1, p2]
62 l.sort()
63 s = _sha(l[0])
64 s.update(l[1])
56 65 s.update(text)
57 66 return s.digest()
58 67
59 68 def compress(text):
60 69 """ generate a possibly-compressed representation of text """
61 70 if not text:
62 71 return ("", text)
63 72 l = len(text)
64 73 bin = None
65 74 if l < 44:
66 75 pass
67 76 elif l > 1000000:
68 77 # zlib makes an internal copy, thus doubling memory usage for
69 78 # large files, so lets do this in pieces
70 79 z = zlib.compressobj()
71 80 p = []
72 81 pos = 0
73 82 while pos < l:
74 83 pos2 = pos + 2**20
75 84 p.append(z.compress(text[pos:pos2]))
76 85 pos = pos2
77 86 p.append(z.flush())
78 87 if sum(map(len, p)) < l:
79 88 bin = "".join(p)
80 89 else:
81 90 bin = _compress(text)
82 91 if bin is None or len(bin) > l:
83 92 if text[0] == '\0':
84 93 return ("", text)
85 94 return ('u', text)
86 95 return ("", bin)
87 96
88 97 def decompress(bin):
89 98 """ decompress the given input """
90 99 if not bin:
91 100 return bin
92 101 t = bin[0]
93 102 if t == '\0':
94 103 return bin
95 104 if t == 'x':
96 105 return _decompress(bin)
97 106 if t == 'u':
98 107 return bin[1:]
99 108 raise RevlogError(_("unknown compression type %r") % t)
100 109
101 110 class lazyparser(object):
102 111 """
103 112 this class avoids the need to parse the entirety of large indices
104 113 """
105 114
106 115 # lazyparser is not safe to use on windows if win32 extensions not
107 116 # available. it keeps file handle open, which make it not possible
108 117 # to break hardlinks on local cloned repos.
109 118
110 119 def __init__(self, dataf, size):
111 120 self.dataf = dataf
112 121 self.s = struct.calcsize(indexformatng)
113 122 self.datasize = size
114 123 self.l = size/self.s
115 124 self.index = [None] * self.l
116 125 self.map = {nullid: nullrev}
117 126 self.allmap = 0
118 127 self.all = 0
119 128 self.mapfind_count = 0
120 129
121 130 def loadmap(self):
122 131 """
123 132 during a commit, we need to make sure the rev being added is
124 133 not a duplicate. This requires loading the entire index,
125 134 which is fairly slow. loadmap can load up just the node map,
126 135 which takes much less time.
127 136 """
128 137 if self.allmap:
129 138 return
130 139 end = self.datasize
131 140 self.allmap = 1
132 141 cur = 0
133 142 count = 0
134 143 blocksize = self.s * 256
135 144 self.dataf.seek(0)
136 145 while cur < end:
137 146 data = self.dataf.read(blocksize)
138 147 off = 0
139 148 for x in xrange(256):
140 149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
141 150 self.map[n] = count
142 151 count += 1
143 152 if count >= self.l:
144 153 break
145 154 off += self.s
146 155 cur += blocksize
147 156
148 157 def loadblock(self, blockstart, blocksize, data=None):
149 158 if self.all:
150 159 return
151 160 if data is None:
152 161 self.dataf.seek(blockstart)
153 162 if blockstart + blocksize > self.datasize:
154 163 # the revlog may have grown since we've started running,
155 164 # but we don't have space in self.index for more entries.
156 165 # limit blocksize so that we don't get too much data.
157 166 blocksize = max(self.datasize - blockstart, 0)
158 167 data = self.dataf.read(blocksize)
159 168 lend = len(data) / self.s
160 169 i = blockstart / self.s
161 170 off = 0
162 171 # lazyindex supports __delitem__
163 172 if lend > len(self.index) - i:
164 173 lend = len(self.index) - i
165 174 for x in xrange(lend):
166 175 if self.index[i + x] == None:
167 176 b = data[off : off + self.s]
168 177 self.index[i + x] = b
169 178 n = b[ngshaoffset:ngshaoffset + 20]
170 179 self.map[n] = i + x
171 180 off += self.s
172 181
173 182 def findnode(self, node):
174 183 """search backwards through the index file for a specific node"""
175 184 if self.allmap:
176 185 return None
177 186
178 187 # hg log will cause many many searches for the manifest
179 188 # nodes. After we get called a few times, just load the whole
180 189 # thing.
181 190 if self.mapfind_count > 8:
182 191 self.loadmap()
183 192 if node in self.map:
184 193 return node
185 194 return None
186 195 self.mapfind_count += 1
187 196 last = self.l - 1
188 197 while self.index[last] != None:
189 198 if last == 0:
190 199 self.all = 1
191 200 self.allmap = 1
192 201 return None
193 202 last -= 1
194 203 end = (last + 1) * self.s
195 204 blocksize = self.s * 256
196 205 while end >= 0:
197 206 start = max(end - blocksize, 0)
198 207 self.dataf.seek(start)
199 208 data = self.dataf.read(end - start)
200 209 findend = end - start
201 210 while True:
202 211 # we're searching backwards, so we have to make sure
203 212 # we don't find a changeset where this node is a parent
204 213 off = data.find(node, 0, findend)
205 214 findend = off
206 215 if off >= 0:
207 216 i = off / self.s
208 217 off = i * self.s
209 218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
210 219 if n == node:
211 220 self.map[n] = i + start / self.s
212 221 return node
213 222 else:
214 223 break
215 224 end -= blocksize
216 225 return None
217 226
218 227 def loadindex(self, i=None, end=None):
219 228 if self.all:
220 229 return
221 230 all = False
222 231 if i == None:
223 232 blockstart = 0
224 233 blocksize = (65536 / self.s) * self.s
225 234 end = self.datasize
226 235 all = True
227 236 else:
228 237 if end:
229 238 blockstart = i * self.s
230 239 end = end * self.s
231 240 blocksize = end - blockstart
232 241 else:
233 242 blockstart = (i & ~1023) * self.s
234 243 blocksize = self.s * 1024
235 244 end = blockstart + blocksize
236 245 while blockstart < end:
237 246 self.loadblock(blockstart, blocksize)
238 247 blockstart += blocksize
239 248 if all:
240 249 self.all = True
241 250
242 251 class lazyindex(object):
243 252 """a lazy version of the index array"""
244 253 def __init__(self, parser):
245 254 self.p = parser
246 255 def __len__(self):
247 256 return len(self.p.index)
248 257 def load(self, pos):
249 258 if pos < 0:
250 259 pos += len(self.p.index)
251 260 self.p.loadindex(pos)
252 261 return self.p.index[pos]
253 262 def __getitem__(self, pos):
254 263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
255 264 def __setitem__(self, pos, item):
256 265 self.p.index[pos] = _pack(indexformatng, *item)
257 266 def __delitem__(self, pos):
258 267 del self.p.index[pos]
259 268 def insert(self, pos, e):
260 269 self.p.index.insert(pos, _pack(indexformatng, *e))
261 270 def append(self, e):
262 271 self.p.index.append(_pack(indexformatng, *e))
263 272
264 273 class lazymap(object):
265 274 """a lazy version of the node map"""
266 275 def __init__(self, parser):
267 276 self.p = parser
268 277 def load(self, key):
269 278 n = self.p.findnode(key)
270 279 if n == None:
271 280 raise KeyError(key)
272 281 def __contains__(self, key):
273 282 if key in self.p.map:
274 283 return True
275 284 self.p.loadmap()
276 285 return key in self.p.map
277 286 def __iter__(self):
278 287 yield nullid
279 288 for i in xrange(self.p.l):
280 289 ret = self.p.index[i]
281 290 if not ret:
282 291 self.p.loadindex(i)
283 292 ret = self.p.index[i]
284 293 if isinstance(ret, str):
285 294 ret = _unpack(indexformatng, ret)
286 295 yield ret[7]
287 296 def __getitem__(self, key):
288 297 try:
289 298 return self.p.map[key]
290 299 except KeyError:
291 300 try:
292 301 self.load(key)
293 302 return self.p.map[key]
294 303 except KeyError:
295 304 raise KeyError("node " + hex(key))
296 305 def __setitem__(self, key, val):
297 306 self.p.map[key] = val
298 307 def __delitem__(self, key):
299 308 del self.p.map[key]
300 309
301 310 indexformatv0 = ">4l20s20s20s"
302 311 v0shaoffset = 56
303 312
304 313 class revlogoldio(object):
305 314 def __init__(self):
306 315 self.size = struct.calcsize(indexformatv0)
307 316
308 317 def parseindex(self, fp, inline):
309 318 s = self.size
310 319 index = []
311 320 nodemap = {nullid: nullrev}
312 321 n = off = 0
313 322 data = fp.read()
314 323 l = len(data)
315 324 while off + s <= l:
316 325 cur = data[off:off + s]
317 326 off += s
318 327 e = _unpack(indexformatv0, cur)
319 328 # transform to revlogv1 format
320 329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
321 330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
322 331 index.append(e2)
323 332 nodemap[e[6]] = n
324 333 n += 1
325 334
326 335 return index, nodemap, None
327 336
328 337 def packentry(self, entry, node, version, rev):
329 338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
330 339 node(entry[5]), node(entry[6]), entry[7])
331 340 return _pack(indexformatv0, *e2)
332 341
333 342 # index ng:
334 343 # 6 bytes offset
335 344 # 2 bytes flags
336 345 # 4 bytes compressed length
337 346 # 4 bytes uncompressed length
338 347 # 4 bytes: base rev
339 348 # 4 bytes link rev
340 349 # 4 bytes parent 1 rev
341 350 # 4 bytes parent 2 rev
342 351 # 32 bytes: nodeid
343 352 indexformatng = ">Qiiiiii20s12x"
344 353 ngshaoffset = 32
345 354 versionformat = ">I"
346 355
347 356 class revlogio(object):
348 357 def __init__(self):
349 358 self.size = struct.calcsize(indexformatng)
350 359
351 360 def parseindex(self, fp, inline):
352 361 try:
353 362 size = util.fstat(fp).st_size
354 363 except AttributeError:
355 364 size = 0
356 365
357 366 if util.openhardlinks() and not inline and size > 1000000:
358 367 # big index, let's parse it on demand
359 368 parser = lazyparser(fp, size)
360 369 index = lazyindex(parser)
361 370 nodemap = lazymap(parser)
362 371 e = list(index[0])
363 372 type = gettype(e[0])
364 373 e[0] = offset_type(0, type)
365 374 index[0] = e
366 375 return index, nodemap, None
367 376
368 377 data = fp.read()
369 378 # call the C implementation to parse the index data
370 379 index, nodemap, cache = parsers.parse_index(data, inline)
371 380 return index, nodemap, cache
372 381
373 382 def packentry(self, entry, node, version, rev):
374 383 p = _pack(indexformatng, *entry)
375 384 if rev == 0:
376 385 p = _pack(versionformat, version) + p[4:]
377 386 return p
378 387
379 388 class revlog(object):
380 389 """
381 390 the underlying revision storage object
382 391
383 392 A revlog consists of two parts, an index and the revision data.
384 393
385 394 The index is a file with a fixed record size containing
386 395 information on each revision, including its nodeid (hash), the
387 396 nodeids of its parents, the position and offset of its data within
388 397 the data file, and the revision it's based on. Finally, each entry
389 398 contains a linkrev entry that can serve as a pointer to external
390 399 data.
391 400
392 401 The revision data itself is a linear collection of data chunks.
393 402 Each chunk represents a revision and is usually represented as a
394 403 delta against the previous chunk. To bound lookup time, runs of
395 404 deltas are limited to about 2 times the length of the original
396 405 version data. This makes retrieval of a version proportional to
397 406 its size, or O(1) relative to the number of revisions.
398 407
399 408 Both pieces of the revlog are written to in an append-only
400 409 fashion, which means we never need to rewrite a file to insert or
401 410 remove data, and can use some simple techniques to avoid the need
402 411 for locking while reading.
403 412 """
404 413 def __init__(self, opener, indexfile):
405 414 """
406 415 create a revlog object
407 416
408 417 opener is a function that abstracts the file opening operation
409 418 and can be used to implement COW semantics or the like.
410 419 """
411 420 self.indexfile = indexfile
412 421 self.datafile = indexfile[:-2] + ".d"
413 422 self.opener = opener
414 423 self._cache = None
415 424 self._chunkcache = None
416 425 self.nodemap = {nullid: nullrev}
417 426 self.index = []
418 427
419 428 v = REVLOG_DEFAULT_VERSION
420 429 if hasattr(opener, "defversion"):
421 430 v = opener.defversion
422 431 if v & REVLOGNG:
423 432 v |= REVLOGNGINLINEDATA
424 433
425 434 i = ""
426 435 try:
427 436 f = self.opener(self.indexfile)
428 437 i = f.read(4)
429 438 f.seek(0)
430 439 if len(i) > 0:
431 440 v = struct.unpack(versionformat, i)[0]
432 441 except IOError, inst:
433 442 if inst.errno != errno.ENOENT:
434 443 raise
435 444
436 445 self.version = v
437 446 self._inline = v & REVLOGNGINLINEDATA
438 447 flags = v & ~0xFFFF
439 448 fmt = v & 0xFFFF
440 449 if fmt == REVLOGV0 and flags:
441 450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
442 451 % (self.indexfile, flags >> 16))
443 452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
444 453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
445 454 % (self.indexfile, flags >> 16))
446 455 elif fmt > REVLOGNG:
447 456 raise RevlogError(_("index %s unknown format %d")
448 457 % (self.indexfile, fmt))
449 458
450 459 self._io = revlogio()
451 460 if self.version == REVLOGV0:
452 461 self._io = revlogoldio()
453 462 if i:
454 463 d = self._io.parseindex(f, self._inline)
455 464 self.index, self.nodemap, self._chunkcache = d
456 465
457 466 # add the magic null revision at -1 (if it hasn't been done already)
458 467 if (self.index == [] or isinstance(self.index, lazyindex) or
459 468 self.index[-1][7] != nullid) :
460 469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
461 470
462 471 def _loadindex(self, start, end):
463 472 """load a block of indexes all at once from the lazy parser"""
464 473 if isinstance(self.index, lazyindex):
465 474 self.index.p.loadindex(start, end)
466 475
467 476 def _loadindexmap(self):
468 477 """loads both the map and the index from the lazy parser"""
469 478 if isinstance(self.index, lazyindex):
470 479 p = self.index.p
471 480 p.loadindex()
472 481 self.nodemap = p.map
473 482
474 483 def _loadmap(self):
475 484 """loads the map from the lazy parser"""
476 485 if isinstance(self.nodemap, lazymap):
477 486 self.nodemap.p.loadmap()
478 487 self.nodemap = self.nodemap.p.map
479 488
480 489 def tip(self):
481 490 return self.node(len(self.index) - 2)
482 491 def __len__(self):
483 492 return len(self.index) - 1
484 493 def __iter__(self):
485 494 for i in xrange(len(self)):
486 495 yield i
487 496 def rev(self, node):
488 497 try:
489 498 return self.nodemap[node]
490 499 except KeyError:
491 500 raise LookupError(node, self.indexfile, _('no node'))
492 501 def node(self, rev):
493 502 return self.index[rev][7]
494 503 def linkrev(self, rev):
495 504 return self.index[rev][4]
496 505 def parents(self, node):
497 506 i = self.index
498 507 d = i[self.rev(node)]
499 508 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
500 509 def parentrevs(self, rev):
501 510 return self.index[rev][5:7]
502 511 def start(self, rev):
503 512 return int(self.index[rev][0] >> 16)
504 513 def end(self, rev):
505 514 return self.start(rev) + self.length(rev)
506 515 def length(self, rev):
507 516 return self.index[rev][1]
508 517 def base(self, rev):
509 518 return self.index[rev][3]
510 519
511 520 def size(self, rev):
512 521 """return the length of the uncompressed text for a given revision"""
513 522 l = self.index[rev][2]
514 523 if l >= 0:
515 524 return l
516 525
517 526 t = self.revision(self.node(rev))
518 527 return len(t)
519 528
520 529 # alternate implementation, The advantage to this code is it
521 530 # will be faster for a single revision. But, the results are not
522 531 # cached, so finding the size of every revision will be slower.
523 532 """
524 533 if self.cache and self.cache[1] == rev:
525 534 return len(self.cache[2])
526 535
527 536 base = self.base(rev)
528 537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
529 538 base = self.cache[1]
530 539 text = self.cache[2]
531 540 else:
532 541 text = self.revision(self.node(base))
533 542
534 543 l = len(text)
535 544 for x in xrange(base + 1, rev + 1):
536 545 l = mdiff.patchedsize(l, self.chunk(x))
537 546 return l
538 547 """
539 548
540 549 def reachable(self, node, stop=None):
541 550 """return a hash of all nodes ancestral to a given node, including
542 551 the node itself, stopping when stop is matched"""
543 552 reachable = {}
544 553 visit = [node]
545 554 reachable[node] = 1
546 555 if stop:
547 556 stopn = self.rev(stop)
548 557 else:
549 558 stopn = 0
550 559 while visit:
551 560 n = visit.pop(0)
552 561 if n == stop:
553 562 continue
554 563 if n == nullid:
555 564 continue
556 565 for p in self.parents(n):
557 566 if self.rev(p) < stopn:
558 567 continue
559 568 if p not in reachable:
560 569 reachable[p] = 1
561 570 visit.append(p)
562 571 return reachable
563 572
564 573 def ancestors(self, *revs):
565 574 'Generate the ancestors of revs using a breadth-first visit'
566 575 visit = list(revs)
567 576 seen = util.set([nullrev])
568 577 while visit:
569 578 for parent in self.parentrevs(visit.pop(0)):
570 579 if parent not in seen:
571 580 visit.append(parent)
572 581 seen.add(parent)
573 582 yield parent
574 583
575 584 def descendants(self, *revs):
576 585 'Generate the descendants of revs in topological order'
577 586 seen = util.set(revs)
578 587 for i in xrange(min(revs) + 1, len(self)):
579 588 for x in self.parentrevs(i):
580 589 if x != nullrev and x in seen:
581 590 seen.add(i)
582 591 yield i
583 592 break
584 593
585 594 def findmissing(self, common=None, heads=None):
586 595 '''
587 596 returns the topologically sorted list of nodes from the set:
588 597 missing = (ancestors(heads) \ ancestors(common))
589 598
590 599 where ancestors() is the set of ancestors from heads, heads included
591 600
592 601 if heads is None, the heads of the revlog are used
593 602 if common is None, nullid is assumed to be a common node
594 603 '''
595 604 if common is None:
596 605 common = [nullid]
597 606 if heads is None:
598 607 heads = self.heads()
599 608
600 609 common = [self.rev(n) for n in common]
601 610 heads = [self.rev(n) for n in heads]
602 611
603 612 # we want the ancestors, but inclusive
604 613 has = dict.fromkeys(self.ancestors(*common))
605 614 has[nullrev] = None
606 615 for r in common:
607 616 has[r] = None
608 617
609 618 # take all ancestors from heads that aren't in has
610 619 missing = {}
611 620 visit = [r for r in heads if r not in has]
612 621 while visit:
613 622 r = visit.pop(0)
614 623 if r in missing:
615 624 continue
616 625 else:
617 626 missing[r] = None
618 627 for p in self.parentrevs(r):
619 628 if p not in has:
620 629 visit.append(p)
621 630 missing = missing.keys()
622 631 missing.sort()
623 632 return [self.node(r) for r in missing]
624 633
625 634 def nodesbetween(self, roots=None, heads=None):
626 635 """Return a tuple containing three elements. Elements 1 and 2 contain
627 636 a final list bases and heads after all the unreachable ones have been
628 637 pruned. Element 0 contains a topologically sorted list of all
629 638
630 639 nodes that satisfy these constraints:
631 640 1. All nodes must be descended from a node in roots (the nodes on
632 641 roots are considered descended from themselves).
633 642 2. All nodes must also be ancestors of a node in heads (the nodes in
634 643 heads are considered to be their own ancestors).
635 644
636 645 If roots is unspecified, nullid is assumed as the only root.
637 646 If heads is unspecified, it is taken to be the output of the
638 647 heads method (i.e. a list of all nodes in the repository that
639 648 have no children)."""
640 649 nonodes = ([], [], [])
641 650 if roots is not None:
642 651 roots = list(roots)
643 652 if not roots:
644 653 return nonodes
645 654 lowestrev = min([self.rev(n) for n in roots])
646 655 else:
647 656 roots = [nullid] # Everybody's a descendent of nullid
648 657 lowestrev = nullrev
649 658 if (lowestrev == nullrev) and (heads is None):
650 659 # We want _all_ the nodes!
651 660 return ([self.node(r) for r in self], [nullid], list(self.heads()))
652 661 if heads is None:
653 662 # All nodes are ancestors, so the latest ancestor is the last
654 663 # node.
655 664 highestrev = len(self) - 1
656 665 # Set ancestors to None to signal that every node is an ancestor.
657 666 ancestors = None
658 667 # Set heads to an empty dictionary for later discovery of heads
659 668 heads = {}
660 669 else:
661 670 heads = list(heads)
662 671 if not heads:
663 672 return nonodes
664 673 ancestors = {}
665 674 # Turn heads into a dictionary so we can remove 'fake' heads.
666 675 # Also, later we will be using it to filter out the heads we can't
667 676 # find from roots.
668 677 heads = dict.fromkeys(heads, 0)
669 678 # Start at the top and keep marking parents until we're done.
670 679 nodestotag = heads.keys()
671 680 # Remember where the top was so we can use it as a limit later.
672 681 highestrev = max([self.rev(n) for n in nodestotag])
673 682 while nodestotag:
674 683 # grab a node to tag
675 684 n = nodestotag.pop()
676 685 # Never tag nullid
677 686 if n == nullid:
678 687 continue
679 688 # A node's revision number represents its place in a
680 689 # topologically sorted list of nodes.
681 690 r = self.rev(n)
682 691 if r >= lowestrev:
683 692 if n not in ancestors:
684 693 # If we are possibly a descendent of one of the roots
685 694 # and we haven't already been marked as an ancestor
686 695 ancestors[n] = 1 # Mark as ancestor
687 696 # Add non-nullid parents to list of nodes to tag.
688 697 nodestotag.extend([p for p in self.parents(n) if
689 698 p != nullid])
690 699 elif n in heads: # We've seen it before, is it a fake head?
691 700 # So it is, real heads should not be the ancestors of
692 701 # any other heads.
693 702 heads.pop(n)
694 703 if not ancestors:
695 704 return nonodes
696 705 # Now that we have our set of ancestors, we want to remove any
697 706 # roots that are not ancestors.
698 707
699 708 # If one of the roots was nullid, everything is included anyway.
700 709 if lowestrev > nullrev:
701 710 # But, since we weren't, let's recompute the lowest rev to not
702 711 # include roots that aren't ancestors.
703 712
704 713 # Filter out roots that aren't ancestors of heads
705 714 roots = [n for n in roots if n in ancestors]
706 715 # Recompute the lowest revision
707 716 if roots:
708 717 lowestrev = min([self.rev(n) for n in roots])
709 718 else:
710 719 # No more roots? Return empty list
711 720 return nonodes
712 721 else:
713 722 # We are descending from nullid, and don't need to care about
714 723 # any other roots.
715 724 lowestrev = nullrev
716 725 roots = [nullid]
717 726 # Transform our roots list into a 'set' (i.e. a dictionary where the
718 727 # values don't matter.
719 728 descendents = dict.fromkeys(roots, 1)
720 729 # Also, keep the original roots so we can filter out roots that aren't
721 730 # 'real' roots (i.e. are descended from other roots).
722 731 roots = descendents.copy()
723 732 # Our topologically sorted list of output nodes.
724 733 orderedout = []
725 734 # Don't start at nullid since we don't want nullid in our output list,
726 735 # and if nullid shows up in descedents, empty parents will look like
727 736 # they're descendents.
728 737 for r in xrange(max(lowestrev, 0), highestrev + 1):
729 738 n = self.node(r)
730 739 isdescendent = False
731 740 if lowestrev == nullrev: # Everybody is a descendent of nullid
732 741 isdescendent = True
733 742 elif n in descendents:
734 743 # n is already a descendent
735 744 isdescendent = True
736 745 # This check only needs to be done here because all the roots
737 746 # will start being marked is descendents before the loop.
738 747 if n in roots:
739 748 # If n was a root, check if it's a 'real' root.
740 749 p = tuple(self.parents(n))
741 750 # If any of its parents are descendents, it's not a root.
742 751 if (p[0] in descendents) or (p[1] in descendents):
743 752 roots.pop(n)
744 753 else:
745 754 p = tuple(self.parents(n))
746 755 # A node is a descendent if either of its parents are
747 756 # descendents. (We seeded the dependents list with the roots
748 757 # up there, remember?)
749 758 if (p[0] in descendents) or (p[1] in descendents):
750 759 descendents[n] = 1
751 760 isdescendent = True
752 761 if isdescendent and ((ancestors is None) or (n in ancestors)):
753 762 # Only include nodes that are both descendents and ancestors.
754 763 orderedout.append(n)
755 764 if (ancestors is not None) and (n in heads):
756 765 # We're trying to figure out which heads are reachable
757 766 # from roots.
758 767 # Mark this head as having been reached
759 768 heads[n] = 1
760 769 elif ancestors is None:
761 770 # Otherwise, we're trying to discover the heads.
762 771 # Assume this is a head because if it isn't, the next step
763 772 # will eventually remove it.
764 773 heads[n] = 1
765 774 # But, obviously its parents aren't.
766 775 for p in self.parents(n):
767 776 heads.pop(p, None)
768 777 heads = [n for n in heads.iterkeys() if heads[n] != 0]
769 778 roots = roots.keys()
770 779 assert orderedout
771 780 assert roots
772 781 assert heads
773 782 return (orderedout, roots, heads)
774 783
775 784 def heads(self, start=None, stop=None):
776 785 """return the list of all nodes that have no children
777 786
778 787 if start is specified, only heads that are descendants of
779 788 start will be returned
780 789 if stop is specified, it will consider all the revs from stop
781 790 as if they had no children
782 791 """
783 792 if start is None and stop is None:
784 793 count = len(self)
785 794 if not count:
786 795 return [nullid]
787 796 ishead = [1] * (count + 1)
788 797 index = self.index
789 798 for r in xrange(count):
790 799 e = index[r]
791 800 ishead[e[5]] = ishead[e[6]] = 0
792 801 return [self.node(r) for r in xrange(count) if ishead[r]]
793 802
794 803 if start is None:
795 804 start = nullid
796 805 if stop is None:
797 806 stop = []
798 807 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
799 808 startrev = self.rev(start)
800 809 reachable = {startrev: 1}
801 810 heads = {startrev: 1}
802 811
803 812 parentrevs = self.parentrevs
804 813 for r in xrange(startrev + 1, len(self)):
805 814 for p in parentrevs(r):
806 815 if p in reachable:
807 816 if r not in stoprevs:
808 817 reachable[r] = 1
809 818 heads[r] = 1
810 819 if p in heads and p not in stoprevs:
811 820 del heads[p]
812 821
813 822 return [self.node(r) for r in heads]
814 823
815 824 def children(self, node):
816 825 """find the children of a given node"""
817 826 c = []
818 827 p = self.rev(node)
819 828 for r in range(p + 1, len(self)):
820 829 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
821 830 if prevs:
822 831 for pr in prevs:
823 832 if pr == p:
824 833 c.append(self.node(r))
825 834 elif p == nullrev:
826 835 c.append(self.node(r))
827 836 return c
828 837
829 838 def _match(self, id):
830 839 if isinstance(id, (long, int)):
831 840 # rev
832 841 return self.node(id)
833 842 if len(id) == 20:
834 843 # possibly a binary node
835 844 # odds of a binary node being all hex in ASCII are 1 in 10**25
836 845 try:
837 846 node = id
838 847 self.rev(node) # quick search the index
839 848 return node
840 849 except LookupError:
841 850 pass # may be partial hex id
842 851 try:
843 852 # str(rev)
844 853 rev = int(id)
845 854 if str(rev) != id:
846 855 raise ValueError
847 856 if rev < 0:
848 857 rev = len(self) + rev
849 858 if rev < 0 or rev >= len(self):
850 859 raise ValueError
851 860 return self.node(rev)
852 861 except (ValueError, OverflowError):
853 862 pass
854 863 if len(id) == 40:
855 864 try:
856 865 # a full hex nodeid?
857 866 node = bin(id)
858 867 self.rev(node)
859 868 return node
860 869 except (TypeError, LookupError):
861 870 pass
862 871
863 872 def _partialmatch(self, id):
864 873 if len(id) < 40:
865 874 try:
866 875 # hex(node)[:...]
867 876 l = len(id) / 2 # grab an even number of digits
868 877 bin_id = bin(id[:l*2])
869 878 nl = [n for n in self.nodemap if n[:l] == bin_id]
870 879 nl = [n for n in nl if hex(n).startswith(id)]
871 880 if len(nl) > 0:
872 881 if len(nl) == 1:
873 882 return nl[0]
874 883 raise LookupError(id, self.indexfile,
875 884 _('ambiguous identifier'))
876 885 return None
877 886 except TypeError:
878 887 pass
879 888
880 889 def lookup(self, id):
881 890 """locate a node based on:
882 891 - revision number or str(revision number)
883 892 - nodeid or subset of hex nodeid
884 893 """
885 894 n = self._match(id)
886 895 if n is not None:
887 896 return n
888 897 n = self._partialmatch(id)
889 898 if n:
890 899 return n
891 900
892 901 raise LookupError(id, self.indexfile, _('no match found'))
893 902
894 903 def cmp(self, node, text):
895 904 """compare text with a given file revision"""
896 905 p1, p2 = self.parents(node)
897 906 return hash(text, p1, p2) != node
898 907
899 908 def chunk(self, rev, df=None):
900 909 def loadcache(df):
901 910 if not df:
902 911 if self._inline:
903 912 df = self.opener(self.indexfile)
904 913 else:
905 914 df = self.opener(self.datafile)
906 915 df.seek(start)
907 916 self._chunkcache = (start, df.read(cache_length))
908 917
909 918 start, length = self.start(rev), self.length(rev)
910 919 if self._inline:
911 920 start += (rev + 1) * self._io.size
912 921 end = start + length
913 922
914 923 offset = 0
915 924 if not self._chunkcache:
916 925 cache_length = max(65536, length)
917 926 loadcache(df)
918 927 else:
919 928 cache_start = self._chunkcache[0]
920 929 cache_length = len(self._chunkcache[1])
921 930 cache_end = cache_start + cache_length
922 931 if start >= cache_start and end <= cache_end:
923 932 # it is cached
924 933 offset = start - cache_start
925 934 else:
926 935 cache_length = max(65536, length)
927 936 loadcache(df)
928 937
929 938 # avoid copying large chunks
930 939 c = self._chunkcache[1]
931 940 if cache_length != length:
932 941 c = c[offset:offset + length]
933 942
934 943 return decompress(c)
935 944
936 945 def revdiff(self, rev1, rev2):
937 946 """return or calculate a delta between two revisions"""
938 947 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
939 948 return self.chunk(rev2)
940 949
941 950 return mdiff.textdiff(self.revision(self.node(rev1)),
942 951 self.revision(self.node(rev2)))
943 952
944 953 def revision(self, node):
945 954 """return an uncompressed revision of a given node"""
946 955 if node == nullid:
947 956 return ""
948 957 if self._cache and self._cache[0] == node:
949 958 return str(self._cache[2])
950 959
951 960 # look up what we need to read
952 961 text = None
953 962 rev = self.rev(node)
954 963 base = self.base(rev)
955 964
956 965 # check rev flags
957 966 if self.index[rev][0] & 0xFFFF:
958 967 raise RevlogError(_('incompatible revision flag %x') %
959 968 (self.index[rev][0] & 0xFFFF))
960 969
961 970 df = None
962 971
963 972 # do we have useful data cached?
964 973 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
965 974 base = self._cache[1]
966 975 text = str(self._cache[2])
967 976 self._loadindex(base, rev + 1)
968 977 if not self._inline and rev > base + 1:
969 978 df = self.opener(self.datafile)
970 979 else:
971 980 self._loadindex(base, rev + 1)
972 981 if not self._inline and rev > base:
973 982 df = self.opener(self.datafile)
974 983 text = self.chunk(base, df=df)
975 984
976 985 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
977 986 text = mdiff.patches(text, bins)
978 987 p1, p2 = self.parents(node)
979 988 if node != hash(text, p1, p2):
980 989 raise RevlogError(_("integrity check failed on %s:%d")
981 990 % (self.datafile, rev))
982 991
983 992 self._cache = (node, rev, text)
984 993 return text
985 994
986 995 def checkinlinesize(self, tr, fp=None):
987 996 if not self._inline:
988 997 return
989 998 if not fp:
990 999 fp = self.opener(self.indexfile, 'r')
991 1000 fp.seek(0, 2)
992 1001 size = fp.tell()
993 1002 if size < 131072:
994 1003 return
995 1004 trinfo = tr.find(self.indexfile)
996 1005 if trinfo == None:
997 1006 raise RevlogError(_("%s not found in the transaction")
998 1007 % self.indexfile)
999 1008
1000 1009 trindex = trinfo[2]
1001 1010 dataoff = self.start(trindex)
1002 1011
1003 1012 tr.add(self.datafile, dataoff)
1004 1013 df = self.opener(self.datafile, 'w')
1005 1014 try:
1006 1015 calc = self._io.size
1007 1016 for r in self:
1008 1017 start = self.start(r) + (r + 1) * calc
1009 1018 length = self.length(r)
1010 1019 fp.seek(start)
1011 1020 d = fp.read(length)
1012 1021 df.write(d)
1013 1022 finally:
1014 1023 df.close()
1015 1024
1016 1025 fp.close()
1017 1026 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1018 1027 self.version &= ~(REVLOGNGINLINEDATA)
1019 1028 self._inline = False
1020 1029 for i in self:
1021 1030 e = self._io.packentry(self.index[i], self.node, self.version, i)
1022 1031 fp.write(e)
1023 1032
1024 1033 # if we don't call rename, the temp file will never replace the
1025 1034 # real index
1026 1035 fp.rename()
1027 1036
1028 1037 tr.replace(self.indexfile, trindex * calc)
1029 1038 self._chunkcache = None
1030 1039
1031 1040 def addrevision(self, text, transaction, link, p1, p2, d=None):
1032 1041 """add a revision to the log
1033 1042
1034 1043 text - the revision data to add
1035 1044 transaction - the transaction object used for rollback
1036 1045 link - the linkrev data to add
1037 1046 p1, p2 - the parent nodeids of the revision
1038 1047 d - an optional precomputed delta
1039 1048 """
1040 1049 dfh = None
1041 1050 if not self._inline:
1042 1051 dfh = self.opener(self.datafile, "a")
1043 1052 ifh = self.opener(self.indexfile, "a+")
1044 1053 try:
1045 1054 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1046 1055 finally:
1047 1056 if dfh:
1048 1057 dfh.close()
1049 1058 ifh.close()
1050 1059
1051 1060 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1052 1061 node = hash(text, p1, p2)
1053 1062 if node in self.nodemap:
1054 1063 return node
1055 1064
1056 1065 curr = len(self)
1057 1066 prev = curr - 1
1058 1067 base = self.base(prev)
1059 1068 offset = self.end(prev)
1060 1069
1061 1070 if curr:
1062 1071 if not d:
1063 1072 ptext = self.revision(self.node(prev))
1064 1073 d = mdiff.textdiff(ptext, text)
1065 1074 data = compress(d)
1066 1075 l = len(data[1]) + len(data[0])
1067 1076 dist = l + offset - self.start(base)
1068 1077
1069 1078 # full versions are inserted when the needed deltas
1070 1079 # become comparable to the uncompressed text
1071 1080 if not curr or dist > len(text) * 2:
1072 1081 data = compress(text)
1073 1082 l = len(data[1]) + len(data[0])
1074 1083 base = curr
1075 1084
1076 1085 e = (offset_type(offset, 0), l, len(text),
1077 1086 base, link, self.rev(p1), self.rev(p2), node)
1078 1087 self.index.insert(-1, e)
1079 1088 self.nodemap[node] = curr
1080 1089
1081 1090 entry = self._io.packentry(e, self.node, self.version, curr)
1082 1091 if not self._inline:
1083 1092 transaction.add(self.datafile, offset)
1084 1093 transaction.add(self.indexfile, curr * len(entry))
1085 1094 if data[0]:
1086 1095 dfh.write(data[0])
1087 1096 dfh.write(data[1])
1088 1097 dfh.flush()
1089 1098 ifh.write(entry)
1090 1099 else:
1091 1100 offset += curr * self._io.size
1092 1101 transaction.add(self.indexfile, offset, curr)
1093 1102 ifh.write(entry)
1094 1103 ifh.write(data[0])
1095 1104 ifh.write(data[1])
1096 1105 self.checkinlinesize(transaction, ifh)
1097 1106
1098 1107 self._cache = (node, curr, text)
1099 1108 return node
1100 1109
1101 1110 def ancestor(self, a, b):
1102 1111 """calculate the least common ancestor of nodes a and b"""
1103 1112
1104 1113 def parents(rev):
1105 1114 return [p for p in self.parentrevs(rev) if p != nullrev]
1106 1115
1107 1116 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1108 1117 if c is None:
1109 1118 return nullid
1110 1119
1111 1120 return self.node(c)
1112 1121
1113 1122 def group(self, nodelist, lookup, infocollect=None):
1114 1123 """calculate a delta group
1115 1124
1116 1125 Given a list of changeset revs, return a set of deltas and
1117 1126 metadata corresponding to nodes. the first delta is
1118 1127 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1119 1128 have this parent as it has all history before these
1120 1129 changesets. parent is parent[0]
1121 1130 """
1122 1131 revs = [self.rev(n) for n in nodelist]
1123 1132
1124 1133 # if we don't have any revisions touched by these changesets, bail
1125 1134 if not revs:
1126 1135 yield changegroup.closechunk()
1127 1136 return
1128 1137
1129 1138 # add the parent of the first rev
1130 1139 p = self.parents(self.node(revs[0]))[0]
1131 1140 revs.insert(0, self.rev(p))
1132 1141
1133 1142 # build deltas
1134 1143 for d in xrange(0, len(revs) - 1):
1135 1144 a, b = revs[d], revs[d + 1]
1136 1145 nb = self.node(b)
1137 1146
1138 1147 if infocollect is not None:
1139 1148 infocollect(nb)
1140 1149
1141 1150 p = self.parents(nb)
1142 1151 meta = nb + p[0] + p[1] + lookup(nb)
1143 1152 if a == -1:
1144 1153 d = self.revision(nb)
1145 1154 meta += mdiff.trivialdiffheader(len(d))
1146 1155 else:
1147 1156 d = self.revdiff(a, b)
1148 1157 yield changegroup.chunkheader(len(meta) + len(d))
1149 1158 yield meta
1150 1159 if len(d) > 2**20:
1151 1160 pos = 0
1152 1161 while pos < len(d):
1153 1162 pos2 = pos + 2 ** 18
1154 1163 yield d[pos:pos2]
1155 1164 pos = pos2
1156 1165 else:
1157 1166 yield d
1158 1167
1159 1168 yield changegroup.closechunk()
1160 1169
1161 1170 def addgroup(self, revs, linkmapper, transaction):
1162 1171 """
1163 1172 add a delta group
1164 1173
1165 1174 given a set of deltas, add them to the revision log. the
1166 1175 first delta is against its parent, which should be in our
1167 1176 log, the rest are against the previous delta.
1168 1177 """
1169 1178
1170 1179 #track the base of the current delta log
1171 1180 r = len(self)
1172 1181 t = r - 1
1173 1182 node = None
1174 1183
1175 1184 base = prev = nullrev
1176 1185 start = end = textlen = 0
1177 1186 if r:
1178 1187 end = self.end(t)
1179 1188
1180 1189 ifh = self.opener(self.indexfile, "a+")
1181 1190 isize = r * self._io.size
1182 1191 if self._inline:
1183 1192 transaction.add(self.indexfile, end + isize, r)
1184 1193 dfh = None
1185 1194 else:
1186 1195 transaction.add(self.indexfile, isize, r)
1187 1196 transaction.add(self.datafile, end)
1188 1197 dfh = self.opener(self.datafile, "a")
1189 1198
1190 1199 try:
1191 1200 # loop through our set of deltas
1192 1201 chain = None
1193 1202 for chunk in revs:
1194 1203 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1195 1204 link = linkmapper(cs)
1196 1205 if node in self.nodemap:
1197 1206 # this can happen if two branches make the same change
1198 1207 chain = node
1199 1208 continue
1200 1209 delta = buffer(chunk, 80)
1201 1210 del chunk
1202 1211
1203 1212 for p in (p1, p2):
1204 1213 if not p in self.nodemap:
1205 1214 raise LookupError(p, self.indexfile, _('unknown parent'))
1206 1215
1207 1216 if not chain:
1208 1217 # retrieve the parent revision of the delta chain
1209 1218 chain = p1
1210 1219 if not chain in self.nodemap:
1211 1220 raise LookupError(chain, self.indexfile, _('unknown base'))
1212 1221
1213 1222 # full versions are inserted when the needed deltas become
1214 1223 # comparable to the uncompressed text or when the previous
1215 1224 # version is not the one we have a delta against. We use
1216 1225 # the size of the previous full rev as a proxy for the
1217 1226 # current size.
1218 1227
1219 1228 if chain == prev:
1220 1229 cdelta = compress(delta)
1221 1230 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1222 1231 textlen = mdiff.patchedsize(textlen, delta)
1223 1232
1224 1233 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1225 1234 # flush our writes here so we can read it in revision
1226 1235 if dfh:
1227 1236 dfh.flush()
1228 1237 ifh.flush()
1229 1238 text = self.revision(chain)
1230 1239 if len(text) == 0:
1231 1240 # skip over trivial delta header
1232 1241 text = buffer(delta, 12)
1233 1242 else:
1234 1243 text = mdiff.patches(text, [delta])
1235 1244 del delta
1236 1245 chk = self._addrevision(text, transaction, link, p1, p2, None,
1237 1246 ifh, dfh)
1238 1247 if not dfh and not self._inline:
1239 1248 # addrevision switched from inline to conventional
1240 1249 # reopen the index
1241 1250 dfh = self.opener(self.datafile, "a")
1242 1251 ifh = self.opener(self.indexfile, "a")
1243 1252 if chk != node:
1244 1253 raise RevlogError(_("consistency error adding group"))
1245 1254 textlen = len(text)
1246 1255 else:
1247 1256 e = (offset_type(end, 0), cdeltalen, textlen, base,
1248 1257 link, self.rev(p1), self.rev(p2), node)
1249 1258 self.index.insert(-1, e)
1250 1259 self.nodemap[node] = r
1251 1260 entry = self._io.packentry(e, self.node, self.version, r)
1252 1261 if self._inline:
1253 1262 ifh.write(entry)
1254 1263 ifh.write(cdelta[0])
1255 1264 ifh.write(cdelta[1])
1256 1265 self.checkinlinesize(transaction, ifh)
1257 1266 if not self._inline:
1258 1267 dfh = self.opener(self.datafile, "a")
1259 1268 ifh = self.opener(self.indexfile, "a")
1260 1269 else:
1261 1270 dfh.write(cdelta[0])
1262 1271 dfh.write(cdelta[1])
1263 1272 ifh.write(entry)
1264 1273
1265 1274 t, r, chain, prev = r, r + 1, node, node
1266 1275 base = self.base(t)
1267 1276 start = self.start(base)
1268 1277 end = self.end(t)
1269 1278 finally:
1270 1279 if dfh:
1271 1280 dfh.close()
1272 1281 ifh.close()
1273 1282
1274 1283 return node
1275 1284
1276 1285 def strip(self, minlink):
1277 1286 """truncate the revlog on the first revision with a linkrev >= minlink
1278 1287
1279 1288 This function is called when we're stripping revision minlink and
1280 1289 its descendants from the repository.
1281 1290
1282 1291 We have to remove all revisions with linkrev >= minlink, because
1283 1292 the equivalent changelog revisions will be renumbered after the
1284 1293 strip.
1285 1294
1286 1295 So we truncate the revlog on the first of these revisions, and
1287 1296 trust that the caller has saved the revisions that shouldn't be
1288 1297 removed and that it'll readd them after this truncation.
1289 1298 """
1290 1299 if len(self) == 0:
1291 1300 return
1292 1301
1293 1302 if isinstance(self.index, lazyindex):
1294 1303 self._loadindexmap()
1295 1304
1296 1305 for rev in self:
1297 1306 if self.index[rev][4] >= minlink:
1298 1307 break
1299 1308 else:
1300 1309 return
1301 1310
1302 1311 # first truncate the files on disk
1303 1312 end = self.start(rev)
1304 1313 if not self._inline:
1305 1314 df = self.opener(self.datafile, "a")
1306 1315 df.truncate(end)
1307 1316 end = rev * self._io.size
1308 1317 else:
1309 1318 end += rev * self._io.size
1310 1319
1311 1320 indexf = self.opener(self.indexfile, "a")
1312 1321 indexf.truncate(end)
1313 1322
1314 1323 # then reset internal state in memory to forget those revisions
1315 1324 self._cache = None
1316 1325 self._chunkcache = None
1317 1326 for x in xrange(rev, len(self)):
1318 1327 del self.nodemap[self.node(x)]
1319 1328
1320 1329 del self.index[rev:-1]
1321 1330
1322 1331 def checksize(self):
1323 1332 expected = 0
1324 1333 if len(self):
1325 1334 expected = max(0, self.end(len(self) - 1))
1326 1335
1327 1336 try:
1328 1337 f = self.opener(self.datafile)
1329 1338 f.seek(0, 2)
1330 1339 actual = f.tell()
1331 1340 dd = actual - expected
1332 1341 except IOError, inst:
1333 1342 if inst.errno != errno.ENOENT:
1334 1343 raise
1335 1344 dd = 0
1336 1345
1337 1346 try:
1338 1347 f = self.opener(self.indexfile)
1339 1348 f.seek(0, 2)
1340 1349 actual = f.tell()
1341 1350 s = self._io.size
1342 1351 i = max(0, actual / s)
1343 1352 di = actual - (i * s)
1344 1353 if self._inline:
1345 1354 databytes = 0
1346 1355 for r in self:
1347 1356 databytes += max(0, self.length(r))
1348 1357 dd = 0
1349 1358 di = actual - len(self) * s - databytes
1350 1359 except IOError, inst:
1351 1360 if inst.errno != errno.ENOENT:
1352 1361 raise
1353 1362 di = 0
1354 1363
1355 1364 return (dd, di)
1356 1365
1357 1366 def files(self):
1358 1367 res = [ self.indexfile ]
1359 1368 if not self._inline:
1360 1369 res.append(self.datafile)
1361 1370 return res
General Comments 0
You need to be logged in to leave comments. Login now