##// END OF EJS Templates
revlog: fix reading of larger revlog indices on Windows
Matt Mackall -
r8558:5726bb29 default
parent child Browse files
Show More
@@ -1,1389 +1,1385 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 33
34 34 _prereadsize = 1048576
35 35
36 36 RevlogError = error.RevlogError
37 37 LookupError = error.LookupError
38 38
39 39 def getoffset(q):
40 40 return int(q >> 16)
41 41
42 42 def gettype(q):
43 43 return int(q & 0xFFFF)
44 44
45 45 def offset_type(offset, type):
46 46 return long(long(offset) << 16 | type)
47 47
48 48 nullhash = _sha(nullid)
49 49
50 50 def hash(text, p1, p2):
51 51 """generate a hash from the given text and its parent hashes
52 52
53 53 This hash combines both the current file contents and its history
54 54 in a manner that makes it easy to distinguish nodes with the same
55 55 content in the revision graph.
56 56 """
57 57 # As of now, if one of the parent node is null, p2 is null
58 58 if p2 == nullid:
59 59 # deep copy of a hash is faster than creating one
60 60 s = nullhash.copy()
61 61 s.update(p1)
62 62 else:
63 63 # none of the parent nodes are nullid
64 64 l = [p1, p2]
65 65 l.sort()
66 66 s = _sha(l[0])
67 67 s.update(l[1])
68 68 s.update(text)
69 69 return s.digest()
70 70
71 71 def compress(text):
72 72 """ generate a possibly-compressed representation of text """
73 73 if not text:
74 74 return ("", text)
75 75 l = len(text)
76 76 bin = None
77 77 if l < 44:
78 78 pass
79 79 elif l > 1000000:
80 80 # zlib makes an internal copy, thus doubling memory usage for
81 81 # large files, so lets do this in pieces
82 82 z = zlib.compressobj()
83 83 p = []
84 84 pos = 0
85 85 while pos < l:
86 86 pos2 = pos + 2**20
87 87 p.append(z.compress(text[pos:pos2]))
88 88 pos = pos2
89 89 p.append(z.flush())
90 90 if sum(map(len, p)) < l:
91 91 bin = "".join(p)
92 92 else:
93 93 bin = _compress(text)
94 94 if bin is None or len(bin) > l:
95 95 if text[0] == '\0':
96 96 return ("", text)
97 97 return ('u', text)
98 98 return ("", bin)
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 return _decompress(bin)
109 109 if t == 'u':
110 110 return bin[1:]
111 111 raise RevlogError(_("unknown compression type %r") % t)
112 112
113 113 class lazyparser(object):
114 114 """
115 115 this class avoids the need to parse the entirety of large indices
116 116 """
117 117
118 118 # lazyparser is not safe to use on windows if win32 extensions not
119 119 # available. it keeps file handle open, which make it not possible
120 120 # to break hardlinks on local cloned repos.
121 121
122 122 def __init__(self, dataf, size):
123 123 self.dataf = dataf
124 124 self.s = struct.calcsize(indexformatng)
125 125 self.datasize = size
126 126 self.l = size/self.s
127 127 self.index = [None] * self.l
128 128 self.map = {nullid: nullrev}
129 129 self.allmap = 0
130 130 self.all = 0
131 131 self.mapfind_count = 0
132 132
133 133 def loadmap(self):
134 134 """
135 135 during a commit, we need to make sure the rev being added is
136 136 not a duplicate. This requires loading the entire index,
137 137 which is fairly slow. loadmap can load up just the node map,
138 138 which takes much less time.
139 139 """
140 140 if self.allmap:
141 141 return
142 142 end = self.datasize
143 143 self.allmap = 1
144 144 cur = 0
145 145 count = 0
146 146 blocksize = self.s * 256
147 147 self.dataf.seek(0)
148 148 while cur < end:
149 149 data = self.dataf.read(blocksize)
150 150 off = 0
151 151 for x in xrange(256):
152 152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
153 153 self.map[n] = count
154 154 count += 1
155 155 if count >= self.l:
156 156 break
157 157 off += self.s
158 158 cur += blocksize
159 159
160 160 def loadblock(self, blockstart, blocksize, data=None):
161 161 if self.all:
162 162 return
163 163 if data is None:
164 164 self.dataf.seek(blockstart)
165 165 if blockstart + blocksize > self.datasize:
166 166 # the revlog may have grown since we've started running,
167 167 # but we don't have space in self.index for more entries.
168 168 # limit blocksize so that we don't get too much data.
169 169 blocksize = max(self.datasize - blockstart, 0)
170 170 data = self.dataf.read(blocksize)
171 171 lend = len(data) / self.s
172 172 i = blockstart / self.s
173 173 off = 0
174 174 # lazyindex supports __delitem__
175 175 if lend > len(self.index) - i:
176 176 lend = len(self.index) - i
177 177 for x in xrange(lend):
178 178 if self.index[i + x] is None:
179 179 b = data[off : off + self.s]
180 180 self.index[i + x] = b
181 181 n = b[ngshaoffset:ngshaoffset + 20]
182 182 self.map[n] = i + x
183 183 off += self.s
184 184
185 185 def findnode(self, node):
186 186 """search backwards through the index file for a specific node"""
187 187 if self.allmap:
188 188 return None
189 189
190 190 # hg log will cause many many searches for the manifest
191 191 # nodes. After we get called a few times, just load the whole
192 192 # thing.
193 193 if self.mapfind_count > 8:
194 194 self.loadmap()
195 195 if node in self.map:
196 196 return node
197 197 return None
198 198 self.mapfind_count += 1
199 199 last = self.l - 1
200 200 while self.index[last] != None:
201 201 if last == 0:
202 202 self.all = 1
203 203 self.allmap = 1
204 204 return None
205 205 last -= 1
206 206 end = (last + 1) * self.s
207 207 blocksize = self.s * 256
208 208 while end >= 0:
209 209 start = max(end - blocksize, 0)
210 210 self.dataf.seek(start)
211 211 data = self.dataf.read(end - start)
212 212 findend = end - start
213 213 while True:
214 214 # we're searching backwards, so we have to make sure
215 215 # we don't find a changeset where this node is a parent
216 216 off = data.find(node, 0, findend)
217 217 findend = off
218 218 if off >= 0:
219 219 i = off / self.s
220 220 off = i * self.s
221 221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
222 222 if n == node:
223 223 self.map[n] = i + start / self.s
224 224 return node
225 225 else:
226 226 break
227 227 end -= blocksize
228 228 return None
229 229
230 230 def loadindex(self, i=None, end=None):
231 231 if self.all:
232 232 return
233 233 all = False
234 234 if i is None:
235 235 blockstart = 0
236 236 blocksize = (65536 / self.s) * self.s
237 237 end = self.datasize
238 238 all = True
239 239 else:
240 240 if end:
241 241 blockstart = i * self.s
242 242 end = end * self.s
243 243 blocksize = end - blockstart
244 244 else:
245 245 blockstart = (i & ~1023) * self.s
246 246 blocksize = self.s * 1024
247 247 end = blockstart + blocksize
248 248 while blockstart < end:
249 249 self.loadblock(blockstart, blocksize)
250 250 blockstart += blocksize
251 251 if all:
252 252 self.all = True
253 253
254 254 class lazyindex(object):
255 255 """a lazy version of the index array"""
256 256 def __init__(self, parser):
257 257 self.p = parser
258 258 def __len__(self):
259 259 return len(self.p.index)
260 260 def load(self, pos):
261 261 if pos < 0:
262 262 pos += len(self.p.index)
263 263 self.p.loadindex(pos)
264 264 return self.p.index[pos]
265 265 def __getitem__(self, pos):
266 266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
267 267 def __setitem__(self, pos, item):
268 268 self.p.index[pos] = _pack(indexformatng, *item)
269 269 def __delitem__(self, pos):
270 270 del self.p.index[pos]
271 271 def insert(self, pos, e):
272 272 self.p.index.insert(pos, _pack(indexformatng, *e))
273 273 def append(self, e):
274 274 self.p.index.append(_pack(indexformatng, *e))
275 275
276 276 class lazymap(object):
277 277 """a lazy version of the node map"""
278 278 def __init__(self, parser):
279 279 self.p = parser
280 280 def load(self, key):
281 281 n = self.p.findnode(key)
282 282 if n is None:
283 283 raise KeyError(key)
284 284 def __contains__(self, key):
285 285 if key in self.p.map:
286 286 return True
287 287 self.p.loadmap()
288 288 return key in self.p.map
289 289 def __iter__(self):
290 290 yield nullid
291 291 for i in xrange(self.p.l):
292 292 ret = self.p.index[i]
293 293 if not ret:
294 294 self.p.loadindex(i)
295 295 ret = self.p.index[i]
296 296 if isinstance(ret, str):
297 297 ret = _unpack(indexformatng, ret)
298 298 yield ret[7]
299 299 def __getitem__(self, key):
300 300 try:
301 301 return self.p.map[key]
302 302 except KeyError:
303 303 try:
304 304 self.load(key)
305 305 return self.p.map[key]
306 306 except KeyError:
307 307 raise KeyError("node " + hex(key))
308 308 def __setitem__(self, key, val):
309 309 self.p.map[key] = val
310 310 def __delitem__(self, key):
311 311 del self.p.map[key]
312 312
313 313 indexformatv0 = ">4l20s20s20s"
314 314 v0shaoffset = 56
315 315
316 316 class revlogoldio(object):
317 317 def __init__(self):
318 318 self.size = struct.calcsize(indexformatv0)
319 319
320 320 def parseindex(self, fp, data, inline):
321 321 s = self.size
322 322 index = []
323 323 nodemap = {nullid: nullrev}
324 324 n = off = 0
325 if len(data) < _prereadsize:
325 if len(data) == _prereadsize:
326 326 data += fp.read() # read the rest
327 327 l = len(data)
328 328 while off + s <= l:
329 329 cur = data[off:off + s]
330 330 off += s
331 331 e = _unpack(indexformatv0, cur)
332 332 # transform to revlogv1 format
333 333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
334 334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
335 335 index.append(e2)
336 336 nodemap[e[6]] = n
337 337 n += 1
338 338
339 339 return index, nodemap, None
340 340
341 341 def packentry(self, entry, node, version, rev):
342 342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
343 343 node(entry[5]), node(entry[6]), entry[7])
344 344 return _pack(indexformatv0, *e2)
345 345
346 346 # index ng:
347 347 # 6 bytes offset
348 348 # 2 bytes flags
349 349 # 4 bytes compressed length
350 350 # 4 bytes uncompressed length
351 351 # 4 bytes: base rev
352 352 # 4 bytes link rev
353 353 # 4 bytes parent 1 rev
354 354 # 4 bytes parent 2 rev
355 355 # 32 bytes: nodeid
356 356 indexformatng = ">Qiiiiii20s12x"
357 357 ngshaoffset = 32
358 358 versionformat = ">I"
359 359
360 360 class revlogio(object):
361 361 def __init__(self):
362 362 self.size = struct.calcsize(indexformatng)
363 363
364 364 def parseindex(self, fp, data, inline):
365 try:
366 size = len(data)
367 if size == _prereadsize:
368 size = util.fstat(fp).st_size
369 except AttributeError:
370 size = 0
371
372 if util.openhardlinks() and not inline and size > _prereadsize:
365 if len(data) == _prereadsize:
366 if util.openhardlinks() and not inline:
373 367 # big index, let's parse it on demand
374 368 parser = lazyparser(fp, size)
375 369 index = lazyindex(parser)
376 370 nodemap = lazymap(parser)
377 371 e = list(index[0])
378 372 type = gettype(e[0])
379 373 e[0] = offset_type(0, type)
380 374 index[0] = e
381 375 return index, nodemap, None
376 else:
377 data += fp.read()
382 378
383 379 # call the C implementation to parse the index data
384 380 index, nodemap, cache = parsers.parse_index(data, inline)
385 381 return index, nodemap, cache
386 382
387 383 def packentry(self, entry, node, version, rev):
388 384 p = _pack(indexformatng, *entry)
389 385 if rev == 0:
390 386 p = _pack(versionformat, version) + p[4:]
391 387 return p
392 388
393 389 class revlog(object):
394 390 """
395 391 the underlying revision storage object
396 392
397 393 A revlog consists of two parts, an index and the revision data.
398 394
399 395 The index is a file with a fixed record size containing
400 396 information on each revision, including its nodeid (hash), the
401 397 nodeids of its parents, the position and offset of its data within
402 398 the data file, and the revision it's based on. Finally, each entry
403 399 contains a linkrev entry that can serve as a pointer to external
404 400 data.
405 401
406 402 The revision data itself is a linear collection of data chunks.
407 403 Each chunk represents a revision and is usually represented as a
408 404 delta against the previous chunk. To bound lookup time, runs of
409 405 deltas are limited to about 2 times the length of the original
410 406 version data. This makes retrieval of a version proportional to
411 407 its size, or O(1) relative to the number of revisions.
412 408
413 409 Both pieces of the revlog are written to in an append-only
414 410 fashion, which means we never need to rewrite a file to insert or
415 411 remove data, and can use some simple techniques to avoid the need
416 412 for locking while reading.
417 413 """
418 414 def __init__(self, opener, indexfile):
419 415 """
420 416 create a revlog object
421 417
422 418 opener is a function that abstracts the file opening operation
423 419 and can be used to implement COW semantics or the like.
424 420 """
425 421 self.indexfile = indexfile
426 422 self.datafile = indexfile[:-2] + ".d"
427 423 self.opener = opener
428 424 self._cache = None
429 425 self._chunkcache = (0, '')
430 426 self.nodemap = {nullid: nullrev}
431 427 self.index = []
432 428
433 429 v = REVLOG_DEFAULT_VERSION
434 430 if hasattr(opener, "defversion"):
435 431 v = opener.defversion
436 432 if v & REVLOGNG:
437 433 v |= REVLOGNGINLINEDATA
438 434
439 435 i = ''
440 436 try:
441 437 f = self.opener(self.indexfile)
442 438 i = f.read(_prereadsize)
443 439 if len(i) > 0:
444 440 v = struct.unpack(versionformat, i[:4])[0]
445 441 except IOError, inst:
446 442 if inst.errno != errno.ENOENT:
447 443 raise
448 444
449 445 self.version = v
450 446 self._inline = v & REVLOGNGINLINEDATA
451 447 flags = v & ~0xFFFF
452 448 fmt = v & 0xFFFF
453 449 if fmt == REVLOGV0 and flags:
454 450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 451 % (self.indexfile, flags >> 16))
456 452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 454 % (self.indexfile, flags >> 16))
459 455 elif fmt > REVLOGNG:
460 456 raise RevlogError(_("index %s unknown format %d")
461 457 % (self.indexfile, fmt))
462 458
463 459 self._io = revlogio()
464 460 if self.version == REVLOGV0:
465 461 self._io = revlogoldio()
466 462 if i:
467 463 try:
468 464 d = self._io.parseindex(f, i, self._inline)
469 465 except (ValueError, IndexError), e:
470 466 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 467 self.index, self.nodemap, self._chunkcache = d
472 468 if not self._chunkcache:
473 469 self._chunkcache = (0, '')
474 470
475 471 # add the magic null revision at -1 (if it hasn't been done already)
476 472 if (self.index == [] or isinstance(self.index, lazyindex) or
477 473 self.index[-1][7] != nullid) :
478 474 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479 475
480 476 def _loadindex(self, start, end):
481 477 """load a block of indexes all at once from the lazy parser"""
482 478 if isinstance(self.index, lazyindex):
483 479 self.index.p.loadindex(start, end)
484 480
485 481 def _loadindexmap(self):
486 482 """loads both the map and the index from the lazy parser"""
487 483 if isinstance(self.index, lazyindex):
488 484 p = self.index.p
489 485 p.loadindex()
490 486 self.nodemap = p.map
491 487
492 488 def _loadmap(self):
493 489 """loads the map from the lazy parser"""
494 490 if isinstance(self.nodemap, lazymap):
495 491 self.nodemap.p.loadmap()
496 492 self.nodemap = self.nodemap.p.map
497 493
498 494 def tip(self):
499 495 return self.node(len(self.index) - 2)
500 496 def __len__(self):
501 497 return len(self.index) - 1
502 498 def __iter__(self):
503 499 for i in xrange(len(self)):
504 500 yield i
505 501 def rev(self, node):
506 502 try:
507 503 return self.nodemap[node]
508 504 except KeyError:
509 505 raise LookupError(node, self.indexfile, _('no node'))
510 506 def node(self, rev):
511 507 return self.index[rev][7]
512 508 def linkrev(self, rev):
513 509 return self.index[rev][4]
514 510 def parents(self, node):
515 511 i = self.index
516 512 d = i[self.rev(node)]
517 513 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 514 def parentrevs(self, rev):
519 515 return self.index[rev][5:7]
520 516 def start(self, rev):
521 517 return int(self.index[rev][0] >> 16)
522 518 def end(self, rev):
523 519 return self.start(rev) + self.length(rev)
524 520 def length(self, rev):
525 521 return self.index[rev][1]
526 522 def base(self, rev):
527 523 return self.index[rev][3]
528 524
529 525 def size(self, rev):
530 526 """return the length of the uncompressed text for a given revision"""
531 527 l = self.index[rev][2]
532 528 if l >= 0:
533 529 return l
534 530
535 531 t = self.revision(self.node(rev))
536 532 return len(t)
537 533
538 534 # alternate implementation, The advantage to this code is it
539 535 # will be faster for a single revision. But, the results are not
540 536 # cached, so finding the size of every revision will be slower.
541 537 """
542 538 if self.cache and self.cache[1] == rev:
543 539 return len(self.cache[2])
544 540
545 541 base = self.base(rev)
546 542 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 543 base = self.cache[1]
548 544 text = self.cache[2]
549 545 else:
550 546 text = self.revision(self.node(base))
551 547
552 548 l = len(text)
553 549 for x in xrange(base + 1, rev + 1):
554 550 l = mdiff.patchedsize(l, self.chunk(x))
555 551 return l
556 552 """
557 553
558 554 def reachable(self, node, stop=None):
559 555 """return the set of all nodes ancestral to a given node, including
560 556 the node itself, stopping when stop is matched"""
561 557 reachable = set((node,))
562 558 visit = [node]
563 559 if stop:
564 560 stopn = self.rev(stop)
565 561 else:
566 562 stopn = 0
567 563 while visit:
568 564 n = visit.pop(0)
569 565 if n == stop:
570 566 continue
571 567 if n == nullid:
572 568 continue
573 569 for p in self.parents(n):
574 570 if self.rev(p) < stopn:
575 571 continue
576 572 if p not in reachable:
577 573 reachable.add(p)
578 574 visit.append(p)
579 575 return reachable
580 576
581 577 def ancestors(self, *revs):
582 578 'Generate the ancestors of revs using a breadth-first visit'
583 579 visit = list(revs)
584 580 seen = set([nullrev])
585 581 while visit:
586 582 for parent in self.parentrevs(visit.pop(0)):
587 583 if parent not in seen:
588 584 visit.append(parent)
589 585 seen.add(parent)
590 586 yield parent
591 587
592 588 def descendants(self, *revs):
593 589 'Generate the descendants of revs in topological order'
594 590 seen = set(revs)
595 591 for i in xrange(min(revs) + 1, len(self)):
596 592 for x in self.parentrevs(i):
597 593 if x != nullrev and x in seen:
598 594 seen.add(i)
599 595 yield i
600 596 break
601 597
602 598 def findmissing(self, common=None, heads=None):
603 599 '''
604 600 returns the topologically sorted list of nodes from the set:
605 601 missing = (ancestors(heads) \ ancestors(common))
606 602
607 603 where ancestors() is the set of ancestors from heads, heads included
608 604
609 605 if heads is None, the heads of the revlog are used
610 606 if common is None, nullid is assumed to be a common node
611 607 '''
612 608 if common is None:
613 609 common = [nullid]
614 610 if heads is None:
615 611 heads = self.heads()
616 612
617 613 common = [self.rev(n) for n in common]
618 614 heads = [self.rev(n) for n in heads]
619 615
620 616 # we want the ancestors, but inclusive
621 617 has = set(self.ancestors(*common))
622 618 has.add(nullrev)
623 619 has.update(common)
624 620
625 621 # take all ancestors from heads that aren't in has
626 622 missing = set()
627 623 visit = [r for r in heads if r not in has]
628 624 while visit:
629 625 r = visit.pop(0)
630 626 if r in missing:
631 627 continue
632 628 else:
633 629 missing.add(r)
634 630 for p in self.parentrevs(r):
635 631 if p not in has:
636 632 visit.append(p)
637 633 missing = list(missing)
638 634 missing.sort()
639 635 return [self.node(r) for r in missing]
640 636
641 637 def nodesbetween(self, roots=None, heads=None):
642 638 """Return a tuple containing three elements. Elements 1 and 2 contain
643 639 a final list bases and heads after all the unreachable ones have been
644 640 pruned. Element 0 contains a topologically sorted list of all
645 641
646 642 nodes that satisfy these constraints:
647 643 1. All nodes must be descended from a node in roots (the nodes on
648 644 roots are considered descended from themselves).
649 645 2. All nodes must also be ancestors of a node in heads (the nodes in
650 646 heads are considered to be their own ancestors).
651 647
652 648 If roots is unspecified, nullid is assumed as the only root.
653 649 If heads is unspecified, it is taken to be the output of the
654 650 heads method (i.e. a list of all nodes in the repository that
655 651 have no children)."""
656 652 nonodes = ([], [], [])
657 653 if roots is not None:
658 654 roots = list(roots)
659 655 if not roots:
660 656 return nonodes
661 657 lowestrev = min([self.rev(n) for n in roots])
662 658 else:
663 659 roots = [nullid] # Everybody's a descendent of nullid
664 660 lowestrev = nullrev
665 661 if (lowestrev == nullrev) and (heads is None):
666 662 # We want _all_ the nodes!
667 663 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 664 if heads is None:
669 665 # All nodes are ancestors, so the latest ancestor is the last
670 666 # node.
671 667 highestrev = len(self) - 1
672 668 # Set ancestors to None to signal that every node is an ancestor.
673 669 ancestors = None
674 670 # Set heads to an empty dictionary for later discovery of heads
675 671 heads = {}
676 672 else:
677 673 heads = list(heads)
678 674 if not heads:
679 675 return nonodes
680 676 ancestors = set()
681 677 # Turn heads into a dictionary so we can remove 'fake' heads.
682 678 # Also, later we will be using it to filter out the heads we can't
683 679 # find from roots.
684 680 heads = dict.fromkeys(heads, 0)
685 681 # Start at the top and keep marking parents until we're done.
686 682 nodestotag = set(heads)
687 683 # Remember where the top was so we can use it as a limit later.
688 684 highestrev = max([self.rev(n) for n in nodestotag])
689 685 while nodestotag:
690 686 # grab a node to tag
691 687 n = nodestotag.pop()
692 688 # Never tag nullid
693 689 if n == nullid:
694 690 continue
695 691 # A node's revision number represents its place in a
696 692 # topologically sorted list of nodes.
697 693 r = self.rev(n)
698 694 if r >= lowestrev:
699 695 if n not in ancestors:
700 696 # If we are possibly a descendent of one of the roots
701 697 # and we haven't already been marked as an ancestor
702 698 ancestors.add(n) # Mark as ancestor
703 699 # Add non-nullid parents to list of nodes to tag.
704 700 nodestotag.update([p for p in self.parents(n) if
705 701 p != nullid])
706 702 elif n in heads: # We've seen it before, is it a fake head?
707 703 # So it is, real heads should not be the ancestors of
708 704 # any other heads.
709 705 heads.pop(n)
710 706 if not ancestors:
711 707 return nonodes
712 708 # Now that we have our set of ancestors, we want to remove any
713 709 # roots that are not ancestors.
714 710
715 711 # If one of the roots was nullid, everything is included anyway.
716 712 if lowestrev > nullrev:
717 713 # But, since we weren't, let's recompute the lowest rev to not
718 714 # include roots that aren't ancestors.
719 715
720 716 # Filter out roots that aren't ancestors of heads
721 717 roots = [n for n in roots if n in ancestors]
722 718 # Recompute the lowest revision
723 719 if roots:
724 720 lowestrev = min([self.rev(n) for n in roots])
725 721 else:
726 722 # No more roots? Return empty list
727 723 return nonodes
728 724 else:
729 725 # We are descending from nullid, and don't need to care about
730 726 # any other roots.
731 727 lowestrev = nullrev
732 728 roots = [nullid]
733 729 # Transform our roots list into a set.
734 730 descendents = set(roots)
735 731 # Also, keep the original roots so we can filter out roots that aren't
736 732 # 'real' roots (i.e. are descended from other roots).
737 733 roots = descendents.copy()
738 734 # Our topologically sorted list of output nodes.
739 735 orderedout = []
740 736 # Don't start at nullid since we don't want nullid in our output list,
741 737 # and if nullid shows up in descedents, empty parents will look like
742 738 # they're descendents.
743 739 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 740 n = self.node(r)
745 741 isdescendent = False
746 742 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 743 isdescendent = True
748 744 elif n in descendents:
749 745 # n is already a descendent
750 746 isdescendent = True
751 747 # This check only needs to be done here because all the roots
752 748 # will start being marked is descendents before the loop.
753 749 if n in roots:
754 750 # If n was a root, check if it's a 'real' root.
755 751 p = tuple(self.parents(n))
756 752 # If any of its parents are descendents, it's not a root.
757 753 if (p[0] in descendents) or (p[1] in descendents):
758 754 roots.remove(n)
759 755 else:
760 756 p = tuple(self.parents(n))
761 757 # A node is a descendent if either of its parents are
762 758 # descendents. (We seeded the dependents list with the roots
763 759 # up there, remember?)
764 760 if (p[0] in descendents) or (p[1] in descendents):
765 761 descendents.add(n)
766 762 isdescendent = True
767 763 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 764 # Only include nodes that are both descendents and ancestors.
769 765 orderedout.append(n)
770 766 if (ancestors is not None) and (n in heads):
771 767 # We're trying to figure out which heads are reachable
772 768 # from roots.
773 769 # Mark this head as having been reached
774 770 heads[n] = 1
775 771 elif ancestors is None:
776 772 # Otherwise, we're trying to discover the heads.
777 773 # Assume this is a head because if it isn't, the next step
778 774 # will eventually remove it.
779 775 heads[n] = 1
780 776 # But, obviously its parents aren't.
781 777 for p in self.parents(n):
782 778 heads.pop(p, None)
783 779 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 780 roots = list(roots)
785 781 assert orderedout
786 782 assert roots
787 783 assert heads
788 784 return (orderedout, roots, heads)
789 785
790 786 def heads(self, start=None, stop=None):
791 787 """return the list of all nodes that have no children
792 788
793 789 if start is specified, only heads that are descendants of
794 790 start will be returned
795 791 if stop is specified, it will consider all the revs from stop
796 792 as if they had no children
797 793 """
798 794 if start is None and stop is None:
799 795 count = len(self)
800 796 if not count:
801 797 return [nullid]
802 798 ishead = [1] * (count + 1)
803 799 index = self.index
804 800 for r in xrange(count):
805 801 e = index[r]
806 802 ishead[e[5]] = ishead[e[6]] = 0
807 803 return [self.node(r) for r in xrange(count) if ishead[r]]
808 804
809 805 if start is None:
810 806 start = nullid
811 807 if stop is None:
812 808 stop = []
813 809 stoprevs = set([self.rev(n) for n in stop])
814 810 startrev = self.rev(start)
815 811 reachable = set((startrev,))
816 812 heads = set((startrev,))
817 813
818 814 parentrevs = self.parentrevs
819 815 for r in xrange(startrev + 1, len(self)):
820 816 for p in parentrevs(r):
821 817 if p in reachable:
822 818 if r not in stoprevs:
823 819 reachable.add(r)
824 820 heads.add(r)
825 821 if p in heads and p not in stoprevs:
826 822 heads.remove(p)
827 823
828 824 return [self.node(r) for r in heads]
829 825
830 826 def children(self, node):
831 827 """find the children of a given node"""
832 828 c = []
833 829 p = self.rev(node)
834 830 for r in range(p + 1, len(self)):
835 831 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 832 if prevs:
837 833 for pr in prevs:
838 834 if pr == p:
839 835 c.append(self.node(r))
840 836 elif p == nullrev:
841 837 c.append(self.node(r))
842 838 return c
843 839
844 840 def _match(self, id):
845 841 if isinstance(id, (long, int)):
846 842 # rev
847 843 return self.node(id)
848 844 if len(id) == 20:
849 845 # possibly a binary node
850 846 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 847 try:
852 848 node = id
853 849 self.rev(node) # quick search the index
854 850 return node
855 851 except LookupError:
856 852 pass # may be partial hex id
857 853 try:
858 854 # str(rev)
859 855 rev = int(id)
860 856 if str(rev) != id:
861 857 raise ValueError
862 858 if rev < 0:
863 859 rev = len(self) + rev
864 860 if rev < 0 or rev >= len(self):
865 861 raise ValueError
866 862 return self.node(rev)
867 863 except (ValueError, OverflowError):
868 864 pass
869 865 if len(id) == 40:
870 866 try:
871 867 # a full hex nodeid?
872 868 node = bin(id)
873 869 self.rev(node)
874 870 return node
875 871 except (TypeError, LookupError):
876 872 pass
877 873
878 874 def _partialmatch(self, id):
879 875 if len(id) < 40:
880 876 try:
881 877 # hex(node)[:...]
882 878 l = len(id) / 2 # grab an even number of digits
883 879 bin_id = bin(id[:l*2])
884 880 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 881 nl = [n for n in nl if hex(n).startswith(id)]
886 882 if len(nl) > 0:
887 883 if len(nl) == 1:
888 884 return nl[0]
889 885 raise LookupError(id, self.indexfile,
890 886 _('ambiguous identifier'))
891 887 return None
892 888 except TypeError:
893 889 pass
894 890
895 891 def lookup(self, id):
896 892 """locate a node based on:
897 893 - revision number or str(revision number)
898 894 - nodeid or subset of hex nodeid
899 895 """
900 896 n = self._match(id)
901 897 if n is not None:
902 898 return n
903 899 n = self._partialmatch(id)
904 900 if n:
905 901 return n
906 902
907 903 raise LookupError(id, self.indexfile, _('no match found'))
908 904
909 905 def cmp(self, node, text):
910 906 """compare text with a given file revision"""
911 907 p1, p2 = self.parents(node)
912 908 return hash(text, p1, p2) != node
913 909
914 910 def _addchunk(self, offset, data):
915 911 o, d = self._chunkcache
916 912 # try to add to existing cache
917 913 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 914 self._chunkcache = o, d + data
919 915 else:
920 916 self._chunkcache = offset, data
921 917
922 918 def _loadchunk(self, offset, length, df=None):
923 919 if not df:
924 920 if self._inline:
925 921 df = self.opener(self.indexfile)
926 922 else:
927 923 df = self.opener(self.datafile)
928 924
929 925 readahead = max(65536, length)
930 926 df.seek(offset)
931 927 d = df.read(readahead)
932 928 self._addchunk(offset, d)
933 929 if readahead > length:
934 930 return d[:length]
935 931 return d
936 932
937 933 def _getchunk(self, offset, length, df=None):
938 934 o, d = self._chunkcache
939 935 l = len(d)
940 936
941 937 # is it in the cache?
942 938 cachestart = offset - o
943 939 cacheend = cachestart + length
944 940 if cachestart >= 0 and cacheend <= l:
945 941 if cachestart == 0 and cacheend == l:
946 942 return d # avoid a copy
947 943 return d[cachestart:cacheend]
948 944
949 945 return self._loadchunk(offset, length, df)
950 946
951 947 def _prime(self, startrev, endrev, df):
952 948 start = self.start(startrev)
953 949 end = self.end(endrev)
954 950 if self._inline:
955 951 start += (startrev + 1) * self._io.size
956 952 end += (startrev + 1) * self._io.size
957 953 self._loadchunk(start, end - start, df)
958 954
959 955 def chunk(self, rev, df=None):
960 956 start, length = self.start(rev), self.length(rev)
961 957 if self._inline:
962 958 start += (rev + 1) * self._io.size
963 959 return decompress(self._getchunk(start, length, df))
964 960
965 961 def revdiff(self, rev1, rev2):
966 962 """return or calculate a delta between two revisions"""
967 963 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 964 return self.chunk(rev2)
969 965
970 966 return mdiff.textdiff(self.revision(self.node(rev1)),
971 967 self.revision(self.node(rev2)))
972 968
973 969 def revision(self, node):
974 970 """return an uncompressed revision of a given node"""
975 971 if node == nullid:
976 972 return ""
977 973 if self._cache and self._cache[0] == node:
978 974 return str(self._cache[2])
979 975
980 976 # look up what we need to read
981 977 text = None
982 978 rev = self.rev(node)
983 979 base = self.base(rev)
984 980
985 981 # check rev flags
986 982 if self.index[rev][0] & 0xFFFF:
987 983 raise RevlogError(_('incompatible revision flag %x') %
988 984 (self.index[rev][0] & 0xFFFF))
989 985
990 986 df = None
991 987
992 988 # do we have useful data cached?
993 989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 990 base = self._cache[1]
995 991 text = str(self._cache[2])
996 992 self._loadindex(base, rev + 1)
997 993 if not self._inline and rev > base + 1:
998 994 df = self.opener(self.datafile)
999 995 self._prime(base, rev, df)
1000 996 else:
1001 997 self._loadindex(base, rev + 1)
1002 998 if not self._inline and rev > base:
1003 999 df = self.opener(self.datafile)
1004 1000 self._prime(base, rev, df)
1005 1001 text = self.chunk(base, df=df)
1006 1002
1007 1003 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1008 1004 text = mdiff.patches(text, bins)
1009 1005 p1, p2 = self.parents(node)
1010 1006 if node != hash(text, p1, p2):
1011 1007 raise RevlogError(_("integrity check failed on %s:%d")
1012 1008 % (self.datafile, rev))
1013 1009
1014 1010 self._cache = (node, rev, text)
1015 1011 return text
1016 1012
1017 1013 def checkinlinesize(self, tr, fp=None):
1018 1014 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1019 1015 return
1020 1016
1021 1017 trinfo = tr.find(self.indexfile)
1022 1018 if trinfo is None:
1023 1019 raise RevlogError(_("%s not found in the transaction")
1024 1020 % self.indexfile)
1025 1021
1026 1022 trindex = trinfo[2]
1027 1023 dataoff = self.start(trindex)
1028 1024
1029 1025 tr.add(self.datafile, dataoff)
1030 1026
1031 1027 if fp:
1032 1028 fp.flush()
1033 1029 fp.close()
1034 1030
1035 1031 df = self.opener(self.datafile, 'w')
1036 1032 try:
1037 1033 calc = self._io.size
1038 1034 for r in self:
1039 1035 start = self.start(r) + (r + 1) * calc
1040 1036 length = self.length(r)
1041 1037 d = self._getchunk(start, length)
1042 1038 df.write(d)
1043 1039 finally:
1044 1040 df.close()
1045 1041
1046 1042 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 1043 self.version &= ~(REVLOGNGINLINEDATA)
1048 1044 self._inline = False
1049 1045 for i in self:
1050 1046 e = self._io.packentry(self.index[i], self.node, self.version, i)
1051 1047 fp.write(e)
1052 1048
1053 1049 # if we don't call rename, the temp file will never replace the
1054 1050 # real index
1055 1051 fp.rename()
1056 1052
1057 1053 tr.replace(self.indexfile, trindex * calc)
1058 1054 self._chunkcache = (0, '')
1059 1055
1060 1056 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 1057 """add a revision to the log
1062 1058
1063 1059 text - the revision data to add
1064 1060 transaction - the transaction object used for rollback
1065 1061 link - the linkrev data to add
1066 1062 p1, p2 - the parent nodeids of the revision
1067 1063 d - an optional precomputed delta
1068 1064 """
1069 1065 dfh = None
1070 1066 if not self._inline:
1071 1067 dfh = self.opener(self.datafile, "a")
1072 1068 ifh = self.opener(self.indexfile, "a+")
1073 1069 try:
1074 1070 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1075 1071 finally:
1076 1072 if dfh:
1077 1073 dfh.close()
1078 1074 ifh.close()
1079 1075
1080 1076 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1081 1077 node = hash(text, p1, p2)
1082 1078 if node in self.nodemap:
1083 1079 return node
1084 1080
1085 1081 curr = len(self)
1086 1082 prev = curr - 1
1087 1083 base = self.base(prev)
1088 1084 offset = self.end(prev)
1089 1085
1090 1086 if curr:
1091 1087 if not d:
1092 1088 ptext = self.revision(self.node(prev))
1093 1089 d = mdiff.textdiff(ptext, text)
1094 1090 data = compress(d)
1095 1091 l = len(data[1]) + len(data[0])
1096 1092 dist = l + offset - self.start(base)
1097 1093
1098 1094 # full versions are inserted when the needed deltas
1099 1095 # become comparable to the uncompressed text
1100 1096 if not curr or dist > len(text) * 2:
1101 1097 data = compress(text)
1102 1098 l = len(data[1]) + len(data[0])
1103 1099 base = curr
1104 1100
1105 1101 e = (offset_type(offset, 0), l, len(text),
1106 1102 base, link, self.rev(p1), self.rev(p2), node)
1107 1103 self.index.insert(-1, e)
1108 1104 self.nodemap[node] = curr
1109 1105
1110 1106 entry = self._io.packentry(e, self.node, self.version, curr)
1111 1107 if not self._inline:
1112 1108 transaction.add(self.datafile, offset)
1113 1109 transaction.add(self.indexfile, curr * len(entry))
1114 1110 if data[0]:
1115 1111 dfh.write(data[0])
1116 1112 dfh.write(data[1])
1117 1113 dfh.flush()
1118 1114 ifh.write(entry)
1119 1115 else:
1120 1116 offset += curr * self._io.size
1121 1117 transaction.add(self.indexfile, offset, curr)
1122 1118 ifh.write(entry)
1123 1119 ifh.write(data[0])
1124 1120 ifh.write(data[1])
1125 1121 self.checkinlinesize(transaction, ifh)
1126 1122
1127 1123 self._cache = (node, curr, text)
1128 1124 return node
1129 1125
1130 1126 def ancestor(self, a, b):
1131 1127 """calculate the least common ancestor of nodes a and b"""
1132 1128
1133 1129 def parents(rev):
1134 1130 return [p for p in self.parentrevs(rev) if p != nullrev]
1135 1131
1136 1132 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1137 1133 if c is None:
1138 1134 return nullid
1139 1135
1140 1136 return self.node(c)
1141 1137
1142 1138 def group(self, nodelist, lookup, infocollect=None):
1143 1139 """calculate a delta group
1144 1140
1145 1141 Given a list of changeset revs, return a set of deltas and
1146 1142 metadata corresponding to nodes. the first delta is
1147 1143 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1148 1144 have this parent as it has all history before these
1149 1145 changesets. parent is parent[0]
1150 1146 """
1151 1147
1152 1148 # if we don't have any revisions touched by these changesets, bail
1153 1149 if not nodelist:
1154 1150 yield changegroup.closechunk()
1155 1151 return
1156 1152
1157 1153 revs = [self.rev(n) for n in nodelist]
1158 1154
1159 1155 # add the parent of the first rev
1160 1156 p = self.parentrevs(revs[0])[0]
1161 1157 revs.insert(0, p)
1162 1158
1163 1159 # build deltas
1164 1160 for d in xrange(0, len(revs) - 1):
1165 1161 a, b = revs[d], revs[d + 1]
1166 1162 nb = self.node(b)
1167 1163
1168 1164 if infocollect is not None:
1169 1165 infocollect(nb)
1170 1166
1171 1167 p = self.parents(nb)
1172 1168 meta = nb + p[0] + p[1] + lookup(nb)
1173 1169 if a == -1:
1174 1170 d = self.revision(nb)
1175 1171 meta += mdiff.trivialdiffheader(len(d))
1176 1172 else:
1177 1173 d = self.revdiff(a, b)
1178 1174 yield changegroup.chunkheader(len(meta) + len(d))
1179 1175 yield meta
1180 1176 if len(d) > 2**20:
1181 1177 pos = 0
1182 1178 while pos < len(d):
1183 1179 pos2 = pos + 2 ** 18
1184 1180 yield d[pos:pos2]
1185 1181 pos = pos2
1186 1182 else:
1187 1183 yield d
1188 1184
1189 1185 yield changegroup.closechunk()
1190 1186
1191 1187 def addgroup(self, revs, linkmapper, transaction):
1192 1188 """
1193 1189 add a delta group
1194 1190
1195 1191 given a set of deltas, add them to the revision log. the
1196 1192 first delta is against its parent, which should be in our
1197 1193 log, the rest are against the previous delta.
1198 1194 """
1199 1195
1200 1196 #track the base of the current delta log
1201 1197 r = len(self)
1202 1198 t = r - 1
1203 1199 node = None
1204 1200
1205 1201 base = prev = nullrev
1206 1202 start = end = textlen = 0
1207 1203 if r:
1208 1204 end = self.end(t)
1209 1205
1210 1206 ifh = self.opener(self.indexfile, "a+")
1211 1207 isize = r * self._io.size
1212 1208 if self._inline:
1213 1209 transaction.add(self.indexfile, end + isize, r)
1214 1210 dfh = None
1215 1211 else:
1216 1212 transaction.add(self.indexfile, isize, r)
1217 1213 transaction.add(self.datafile, end)
1218 1214 dfh = self.opener(self.datafile, "a")
1219 1215
1220 1216 try:
1221 1217 # loop through our set of deltas
1222 1218 chain = None
1223 1219 for chunk in revs:
1224 1220 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1225 1221 link = linkmapper(cs)
1226 1222 if node in self.nodemap:
1227 1223 # this can happen if two branches make the same change
1228 1224 chain = node
1229 1225 continue
1230 1226 delta = buffer(chunk, 80)
1231 1227 del chunk
1232 1228
1233 1229 for p in (p1, p2):
1234 1230 if not p in self.nodemap:
1235 1231 raise LookupError(p, self.indexfile, _('unknown parent'))
1236 1232
1237 1233 if not chain:
1238 1234 # retrieve the parent revision of the delta chain
1239 1235 chain = p1
1240 1236 if not chain in self.nodemap:
1241 1237 raise LookupError(chain, self.indexfile, _('unknown base'))
1242 1238
1243 1239 # full versions are inserted when the needed deltas become
1244 1240 # comparable to the uncompressed text or when the previous
1245 1241 # version is not the one we have a delta against. We use
1246 1242 # the size of the previous full rev as a proxy for the
1247 1243 # current size.
1248 1244
1249 1245 if chain == prev:
1250 1246 cdelta = compress(delta)
1251 1247 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1252 1248 textlen = mdiff.patchedsize(textlen, delta)
1253 1249
1254 1250 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1255 1251 # flush our writes here so we can read it in revision
1256 1252 if dfh:
1257 1253 dfh.flush()
1258 1254 ifh.flush()
1259 1255 text = self.revision(chain)
1260 1256 if len(text) == 0:
1261 1257 # skip over trivial delta header
1262 1258 text = buffer(delta, 12)
1263 1259 else:
1264 1260 text = mdiff.patches(text, [delta])
1265 1261 del delta
1266 1262 chk = self._addrevision(text, transaction, link, p1, p2, None,
1267 1263 ifh, dfh)
1268 1264 if not dfh and not self._inline:
1269 1265 # addrevision switched from inline to conventional
1270 1266 # reopen the index
1271 1267 dfh = self.opener(self.datafile, "a")
1272 1268 ifh = self.opener(self.indexfile, "a")
1273 1269 if chk != node:
1274 1270 raise RevlogError(_("consistency error adding group"))
1275 1271 textlen = len(text)
1276 1272 else:
1277 1273 e = (offset_type(end, 0), cdeltalen, textlen, base,
1278 1274 link, self.rev(p1), self.rev(p2), node)
1279 1275 self.index.insert(-1, e)
1280 1276 self.nodemap[node] = r
1281 1277 entry = self._io.packentry(e, self.node, self.version, r)
1282 1278 if self._inline:
1283 1279 ifh.write(entry)
1284 1280 ifh.write(cdelta[0])
1285 1281 ifh.write(cdelta[1])
1286 1282 self.checkinlinesize(transaction, ifh)
1287 1283 if not self._inline:
1288 1284 dfh = self.opener(self.datafile, "a")
1289 1285 ifh = self.opener(self.indexfile, "a")
1290 1286 else:
1291 1287 dfh.write(cdelta[0])
1292 1288 dfh.write(cdelta[1])
1293 1289 ifh.write(entry)
1294 1290
1295 1291 t, r, chain, prev = r, r + 1, node, node
1296 1292 base = self.base(t)
1297 1293 start = self.start(base)
1298 1294 end = self.end(t)
1299 1295 finally:
1300 1296 if dfh:
1301 1297 dfh.close()
1302 1298 ifh.close()
1303 1299
1304 1300 return node
1305 1301
1306 1302 def strip(self, minlink, transaction):
1307 1303 """truncate the revlog on the first revision with a linkrev >= minlink
1308 1304
1309 1305 This function is called when we're stripping revision minlink and
1310 1306 its descendants from the repository.
1311 1307
1312 1308 We have to remove all revisions with linkrev >= minlink, because
1313 1309 the equivalent changelog revisions will be renumbered after the
1314 1310 strip.
1315 1311
1316 1312 So we truncate the revlog on the first of these revisions, and
1317 1313 trust that the caller has saved the revisions that shouldn't be
1318 1314 removed and that it'll readd them after this truncation.
1319 1315 """
1320 1316 if len(self) == 0:
1321 1317 return
1322 1318
1323 1319 if isinstance(self.index, lazyindex):
1324 1320 self._loadindexmap()
1325 1321
1326 1322 for rev in self:
1327 1323 if self.index[rev][4] >= minlink:
1328 1324 break
1329 1325 else:
1330 1326 return
1331 1327
1332 1328 # first truncate the files on disk
1333 1329 end = self.start(rev)
1334 1330 if not self._inline:
1335 1331 transaction.add(self.datafile, end)
1336 1332 end = rev * self._io.size
1337 1333 else:
1338 1334 end += rev * self._io.size
1339 1335
1340 1336 transaction.add(self.indexfile, end)
1341 1337
1342 1338 # then reset internal state in memory to forget those revisions
1343 1339 self._cache = None
1344 1340 self._chunkcache = (0, '')
1345 1341 for x in xrange(rev, len(self)):
1346 1342 del self.nodemap[self.node(x)]
1347 1343
1348 1344 del self.index[rev:-1]
1349 1345
1350 1346 def checksize(self):
1351 1347 expected = 0
1352 1348 if len(self):
1353 1349 expected = max(0, self.end(len(self) - 1))
1354 1350
1355 1351 try:
1356 1352 f = self.opener(self.datafile)
1357 1353 f.seek(0, 2)
1358 1354 actual = f.tell()
1359 1355 dd = actual - expected
1360 1356 except IOError, inst:
1361 1357 if inst.errno != errno.ENOENT:
1362 1358 raise
1363 1359 dd = 0
1364 1360
1365 1361 try:
1366 1362 f = self.opener(self.indexfile)
1367 1363 f.seek(0, 2)
1368 1364 actual = f.tell()
1369 1365 s = self._io.size
1370 1366 i = max(0, actual / s)
1371 1367 di = actual - (i * s)
1372 1368 if self._inline:
1373 1369 databytes = 0
1374 1370 for r in self:
1375 1371 databytes += max(0, self.length(r))
1376 1372 dd = 0
1377 1373 di = actual - len(self) * s - databytes
1378 1374 except IOError, inst:
1379 1375 if inst.errno != errno.ENOENT:
1380 1376 raise
1381 1377 di = 0
1382 1378
1383 1379 return (dd, di)
1384 1380
1385 1381 def files(self):
1386 1382 res = [ self.indexfile ]
1387 1383 if not self._inline:
1388 1384 res.append(self.datafile)
1389 1385 return res
General Comments 0
You need to be logged in to leave comments. Login now