##// END OF EJS Templates
revlog: hold a private reference to self._cache...
Matt Mackall -
r21750:4ab287c2 stable
parent child Browse files
Show More
@@ -1,1447 +1,1448
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = None
204 204 self._chunkcache = (0, '')
205 205 self._chunkcachesize = 65536
206 206 self.index = []
207 207 self._pcache = {}
208 208 self._nodecache = {nullid: nullrev}
209 209 self._nodepos = None
210 210
211 211 v = REVLOG_DEFAULT_VERSION
212 212 opts = getattr(opener, 'options', None)
213 213 if opts is not None:
214 214 if 'revlogv1' in opts:
215 215 if 'generaldelta' in opts:
216 216 v |= REVLOGGENERALDELTA
217 217 else:
218 218 v = 0
219 219 if 'chunkcachesize' in opts:
220 220 self._chunkcachesize = opts['chunkcachesize']
221 221
222 222 if self._chunkcachesize <= 0:
223 223 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 224 'than 0') % self._chunkcachesize)
225 225 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 226 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 227 'of 2') % self._chunkcachesize)
228 228
229 229 i = ''
230 230 self._initempty = True
231 231 try:
232 232 f = self.opener(self.indexfile)
233 233 i = f.read()
234 234 f.close()
235 235 if len(i) > 0:
236 236 v = struct.unpack(versionformat, i[:4])[0]
237 237 self._initempty = False
238 238 except IOError, inst:
239 239 if inst.errno != errno.ENOENT:
240 240 raise
241 241
242 242 self.version = v
243 243 self._inline = v & REVLOGNGINLINEDATA
244 244 self._generaldelta = v & REVLOGGENERALDELTA
245 245 flags = v & ~0xFFFF
246 246 fmt = v & 0xFFFF
247 247 if fmt == REVLOGV0 and flags:
248 248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 249 % (self.indexfile, flags >> 16))
250 250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 252 % (self.indexfile, flags >> 16))
253 253 elif fmt > REVLOGNG:
254 254 raise RevlogError(_("index %s unknown format %d")
255 255 % (self.indexfile, fmt))
256 256
257 257 self._io = revlogio()
258 258 if self.version == REVLOGV0:
259 259 self._io = revlogoldio()
260 260 try:
261 261 d = self._io.parseindex(i, self._inline)
262 262 except (ValueError, IndexError):
263 263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 264 self.index, nodemap, self._chunkcache = d
265 265 if nodemap is not None:
266 266 self.nodemap = self._nodecache = nodemap
267 267 if not self._chunkcache:
268 268 self._chunkclear()
269 269
270 270 def tip(self):
271 271 return self.node(len(self.index) - 2)
272 272 def __len__(self):
273 273 return len(self.index) - 1
274 274 def __iter__(self):
275 275 return iter(xrange(len(self)))
276 276 def revs(self, start=0, stop=None):
277 277 """iterate over all rev in this revlog (from start to stop)"""
278 278 step = 1
279 279 if stop is not None:
280 280 if start > stop:
281 281 step = -1
282 282 stop += step
283 283 else:
284 284 stop = len(self)
285 285 return xrange(start, stop, step)
286 286
287 287 @util.propertycache
288 288 def nodemap(self):
289 289 self.rev(self.node(0))
290 290 return self._nodecache
291 291
292 292 def hasnode(self, node):
293 293 try:
294 294 self.rev(node)
295 295 return True
296 296 except KeyError:
297 297 return False
298 298
299 299 def clearcaches(self):
300 300 try:
301 301 self._nodecache.clearcaches()
302 302 except AttributeError:
303 303 self._nodecache = {nullid: nullrev}
304 304 self._nodepos = None
305 305
306 306 def rev(self, node):
307 307 try:
308 308 return self._nodecache[node]
309 309 except RevlogError:
310 310 # parsers.c radix tree lookup failed
311 311 raise LookupError(node, self.indexfile, _('no node'))
312 312 except KeyError:
313 313 # pure python cache lookup failed
314 314 n = self._nodecache
315 315 i = self.index
316 316 p = self._nodepos
317 317 if p is None:
318 318 p = len(i) - 2
319 319 for r in xrange(p, -1, -1):
320 320 v = i[r][7]
321 321 n[v] = r
322 322 if v == node:
323 323 self._nodepos = r - 1
324 324 return r
325 325 raise LookupError(node, self.indexfile, _('no node'))
326 326
327 327 def node(self, rev):
328 328 return self.index[rev][7]
329 329 def linkrev(self, rev):
330 330 return self.index[rev][4]
331 331 def parents(self, node):
332 332 i = self.index
333 333 d = i[self.rev(node)]
334 334 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
335 335 def parentrevs(self, rev):
336 336 return self.index[rev][5:7]
337 337 def start(self, rev):
338 338 return int(self.index[rev][0] >> 16)
339 339 def end(self, rev):
340 340 return self.start(rev) + self.length(rev)
341 341 def length(self, rev):
342 342 return self.index[rev][1]
343 343 def chainbase(self, rev):
344 344 index = self.index
345 345 base = index[rev][3]
346 346 while base != rev:
347 347 rev = base
348 348 base = index[rev][3]
349 349 return base
350 350 def flags(self, rev):
351 351 return self.index[rev][0] & 0xFFFF
352 352 def rawsize(self, rev):
353 353 """return the length of the uncompressed text for a given revision"""
354 354 l = self.index[rev][2]
355 355 if l >= 0:
356 356 return l
357 357
358 358 t = self.revision(self.node(rev))
359 359 return len(t)
360 360 size = rawsize
361 361
362 362 def ancestors(self, revs, stoprev=0, inclusive=False):
363 363 """Generate the ancestors of 'revs' in reverse topological order.
364 364 Does not generate revs lower than stoprev.
365 365
366 366 See the documentation for ancestor.lazyancestors for more details."""
367 367
368 368 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
369 369 inclusive=inclusive)
370 370
371 371 def descendants(self, revs):
372 372 """Generate the descendants of 'revs' in revision order.
373 373
374 374 Yield a sequence of revision numbers starting with a child of
375 375 some rev in revs, i.e., each revision is *not* considered a
376 376 descendant of itself. Results are ordered by revision number (a
377 377 topological sort)."""
378 378 first = min(revs)
379 379 if first == nullrev:
380 380 for i in self:
381 381 yield i
382 382 return
383 383
384 384 seen = set(revs)
385 385 for i in self.revs(start=first + 1):
386 386 for x in self.parentrevs(i):
387 387 if x != nullrev and x in seen:
388 388 seen.add(i)
389 389 yield i
390 390 break
391 391
392 392 def findcommonmissing(self, common=None, heads=None):
393 393 """Return a tuple of the ancestors of common and the ancestors of heads
394 394 that are not ancestors of common. In revset terminology, we return the
395 395 tuple:
396 396
397 397 ::common, (::heads) - (::common)
398 398
399 399 The list is sorted by revision number, meaning it is
400 400 topologically sorted.
401 401
402 402 'heads' and 'common' are both lists of node IDs. If heads is
403 403 not supplied, uses all of the revlog's heads. If common is not
404 404 supplied, uses nullid."""
405 405 if common is None:
406 406 common = [nullid]
407 407 if heads is None:
408 408 heads = self.heads()
409 409
410 410 common = [self.rev(n) for n in common]
411 411 heads = [self.rev(n) for n in heads]
412 412
413 413 # we want the ancestors, but inclusive
414 414 class lazyset(object):
415 415 def __init__(self, lazyvalues):
416 416 self.addedvalues = set()
417 417 self.lazyvalues = lazyvalues
418 418
419 419 def __contains__(self, value):
420 420 return value in self.addedvalues or value in self.lazyvalues
421 421
422 422 def __iter__(self):
423 423 added = self.addedvalues
424 424 for r in added:
425 425 yield r
426 426 for r in self.lazyvalues:
427 427 if not r in added:
428 428 yield r
429 429
430 430 def add(self, value):
431 431 self.addedvalues.add(value)
432 432
433 433 def update(self, values):
434 434 self.addedvalues.update(values)
435 435
436 436 has = lazyset(self.ancestors(common))
437 437 has.add(nullrev)
438 438 has.update(common)
439 439
440 440 # take all ancestors from heads that aren't in has
441 441 missing = set()
442 442 visit = util.deque(r for r in heads if r not in has)
443 443 while visit:
444 444 r = visit.popleft()
445 445 if r in missing:
446 446 continue
447 447 else:
448 448 missing.add(r)
449 449 for p in self.parentrevs(r):
450 450 if p not in has:
451 451 visit.append(p)
452 452 missing = list(missing)
453 453 missing.sort()
454 454 return has, [self.node(r) for r in missing]
455 455
456 456 def findmissingrevs(self, common=None, heads=None):
457 457 """Return the revision numbers of the ancestors of heads that
458 458 are not ancestors of common.
459 459
460 460 More specifically, return a list of revision numbers corresponding to
461 461 nodes N such that every N satisfies the following constraints:
462 462
463 463 1. N is an ancestor of some node in 'heads'
464 464 2. N is not an ancestor of any node in 'common'
465 465
466 466 The list is sorted by revision number, meaning it is
467 467 topologically sorted.
468 468
469 469 'heads' and 'common' are both lists of revision numbers. If heads is
470 470 not supplied, uses all of the revlog's heads. If common is not
471 471 supplied, uses nullid."""
472 472 if common is None:
473 473 common = [nullrev]
474 474 if heads is None:
475 475 heads = self.headrevs()
476 476
477 477 return ancestor.missingancestors(heads, common, self.parentrevs)
478 478
479 479 def findmissing(self, common=None, heads=None):
480 480 """Return the ancestors of heads that are not ancestors of common.
481 481
482 482 More specifically, return a list of nodes N such that every N
483 483 satisfies the following constraints:
484 484
485 485 1. N is an ancestor of some node in 'heads'
486 486 2. N is not an ancestor of any node in 'common'
487 487
488 488 The list is sorted by revision number, meaning it is
489 489 topologically sorted.
490 490
491 491 'heads' and 'common' are both lists of node IDs. If heads is
492 492 not supplied, uses all of the revlog's heads. If common is not
493 493 supplied, uses nullid."""
494 494 if common is None:
495 495 common = [nullid]
496 496 if heads is None:
497 497 heads = self.heads()
498 498
499 499 common = [self.rev(n) for n in common]
500 500 heads = [self.rev(n) for n in heads]
501 501
502 502 return [self.node(r) for r in
503 503 ancestor.missingancestors(heads, common, self.parentrevs)]
504 504
505 505 def nodesbetween(self, roots=None, heads=None):
506 506 """Return a topological path from 'roots' to 'heads'.
507 507
508 508 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
509 509 topologically sorted list of all nodes N that satisfy both of
510 510 these constraints:
511 511
512 512 1. N is a descendant of some node in 'roots'
513 513 2. N is an ancestor of some node in 'heads'
514 514
515 515 Every node is considered to be both a descendant and an ancestor
516 516 of itself, so every reachable node in 'roots' and 'heads' will be
517 517 included in 'nodes'.
518 518
519 519 'outroots' is the list of reachable nodes in 'roots', i.e., the
520 520 subset of 'roots' that is returned in 'nodes'. Likewise,
521 521 'outheads' is the subset of 'heads' that is also in 'nodes'.
522 522
523 523 'roots' and 'heads' are both lists of node IDs. If 'roots' is
524 524 unspecified, uses nullid as the only root. If 'heads' is
525 525 unspecified, uses list of all of the revlog's heads."""
526 526 nonodes = ([], [], [])
527 527 if roots is not None:
528 528 roots = list(roots)
529 529 if not roots:
530 530 return nonodes
531 531 lowestrev = min([self.rev(n) for n in roots])
532 532 else:
533 533 roots = [nullid] # Everybody's a descendant of nullid
534 534 lowestrev = nullrev
535 535 if (lowestrev == nullrev) and (heads is None):
536 536 # We want _all_ the nodes!
537 537 return ([self.node(r) for r in self], [nullid], list(self.heads()))
538 538 if heads is None:
539 539 # All nodes are ancestors, so the latest ancestor is the last
540 540 # node.
541 541 highestrev = len(self) - 1
542 542 # Set ancestors to None to signal that every node is an ancestor.
543 543 ancestors = None
544 544 # Set heads to an empty dictionary for later discovery of heads
545 545 heads = {}
546 546 else:
547 547 heads = list(heads)
548 548 if not heads:
549 549 return nonodes
550 550 ancestors = set()
551 551 # Turn heads into a dictionary so we can remove 'fake' heads.
552 552 # Also, later we will be using it to filter out the heads we can't
553 553 # find from roots.
554 554 heads = dict.fromkeys(heads, False)
555 555 # Start at the top and keep marking parents until we're done.
556 556 nodestotag = set(heads)
557 557 # Remember where the top was so we can use it as a limit later.
558 558 highestrev = max([self.rev(n) for n in nodestotag])
559 559 while nodestotag:
560 560 # grab a node to tag
561 561 n = nodestotag.pop()
562 562 # Never tag nullid
563 563 if n == nullid:
564 564 continue
565 565 # A node's revision number represents its place in a
566 566 # topologically sorted list of nodes.
567 567 r = self.rev(n)
568 568 if r >= lowestrev:
569 569 if n not in ancestors:
570 570 # If we are possibly a descendant of one of the roots
571 571 # and we haven't already been marked as an ancestor
572 572 ancestors.add(n) # Mark as ancestor
573 573 # Add non-nullid parents to list of nodes to tag.
574 574 nodestotag.update([p for p in self.parents(n) if
575 575 p != nullid])
576 576 elif n in heads: # We've seen it before, is it a fake head?
577 577 # So it is, real heads should not be the ancestors of
578 578 # any other heads.
579 579 heads.pop(n)
580 580 if not ancestors:
581 581 return nonodes
582 582 # Now that we have our set of ancestors, we want to remove any
583 583 # roots that are not ancestors.
584 584
585 585 # If one of the roots was nullid, everything is included anyway.
586 586 if lowestrev > nullrev:
587 587 # But, since we weren't, let's recompute the lowest rev to not
588 588 # include roots that aren't ancestors.
589 589
590 590 # Filter out roots that aren't ancestors of heads
591 591 roots = [n for n in roots if n in ancestors]
592 592 # Recompute the lowest revision
593 593 if roots:
594 594 lowestrev = min([self.rev(n) for n in roots])
595 595 else:
596 596 # No more roots? Return empty list
597 597 return nonodes
598 598 else:
599 599 # We are descending from nullid, and don't need to care about
600 600 # any other roots.
601 601 lowestrev = nullrev
602 602 roots = [nullid]
603 603 # Transform our roots list into a set.
604 604 descendants = set(roots)
605 605 # Also, keep the original roots so we can filter out roots that aren't
606 606 # 'real' roots (i.e. are descended from other roots).
607 607 roots = descendants.copy()
608 608 # Our topologically sorted list of output nodes.
609 609 orderedout = []
610 610 # Don't start at nullid since we don't want nullid in our output list,
611 611 # and if nullid shows up in descendants, empty parents will look like
612 612 # they're descendants.
613 613 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
614 614 n = self.node(r)
615 615 isdescendant = False
616 616 if lowestrev == nullrev: # Everybody is a descendant of nullid
617 617 isdescendant = True
618 618 elif n in descendants:
619 619 # n is already a descendant
620 620 isdescendant = True
621 621 # This check only needs to be done here because all the roots
622 622 # will start being marked is descendants before the loop.
623 623 if n in roots:
624 624 # If n was a root, check if it's a 'real' root.
625 625 p = tuple(self.parents(n))
626 626 # If any of its parents are descendants, it's not a root.
627 627 if (p[0] in descendants) or (p[1] in descendants):
628 628 roots.remove(n)
629 629 else:
630 630 p = tuple(self.parents(n))
631 631 # A node is a descendant if either of its parents are
632 632 # descendants. (We seeded the dependents list with the roots
633 633 # up there, remember?)
634 634 if (p[0] in descendants) or (p[1] in descendants):
635 635 descendants.add(n)
636 636 isdescendant = True
637 637 if isdescendant and ((ancestors is None) or (n in ancestors)):
638 638 # Only include nodes that are both descendants and ancestors.
639 639 orderedout.append(n)
640 640 if (ancestors is not None) and (n in heads):
641 641 # We're trying to figure out which heads are reachable
642 642 # from roots.
643 643 # Mark this head as having been reached
644 644 heads[n] = True
645 645 elif ancestors is None:
646 646 # Otherwise, we're trying to discover the heads.
647 647 # Assume this is a head because if it isn't, the next step
648 648 # will eventually remove it.
649 649 heads[n] = True
650 650 # But, obviously its parents aren't.
651 651 for p in self.parents(n):
652 652 heads.pop(p, None)
653 653 heads = [n for n, flag in heads.iteritems() if flag]
654 654 roots = list(roots)
655 655 assert orderedout
656 656 assert roots
657 657 assert heads
658 658 return (orderedout, roots, heads)
659 659
660 660 def headrevs(self):
661 661 try:
662 662 return self.index.headrevs()
663 663 except AttributeError:
664 664 return self._headrevs()
665 665
666 666 def _headrevs(self):
667 667 count = len(self)
668 668 if not count:
669 669 return [nullrev]
670 670 # we won't iter over filtered rev so nobody is a head at start
671 671 ishead = [0] * (count + 1)
672 672 index = self.index
673 673 for r in self:
674 674 ishead[r] = 1 # I may be an head
675 675 e = index[r]
676 676 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
677 677 return [r for r, val in enumerate(ishead) if val]
678 678
679 679 def heads(self, start=None, stop=None):
680 680 """return the list of all nodes that have no children
681 681
682 682 if start is specified, only heads that are descendants of
683 683 start will be returned
684 684 if stop is specified, it will consider all the revs from stop
685 685 as if they had no children
686 686 """
687 687 if start is None and stop is None:
688 688 if not len(self):
689 689 return [nullid]
690 690 return [self.node(r) for r in self.headrevs()]
691 691
692 692 if start is None:
693 693 start = nullid
694 694 if stop is None:
695 695 stop = []
696 696 stoprevs = set([self.rev(n) for n in stop])
697 697 startrev = self.rev(start)
698 698 reachable = set((startrev,))
699 699 heads = set((startrev,))
700 700
701 701 parentrevs = self.parentrevs
702 702 for r in self.revs(start=startrev + 1):
703 703 for p in parentrevs(r):
704 704 if p in reachable:
705 705 if r not in stoprevs:
706 706 reachable.add(r)
707 707 heads.add(r)
708 708 if p in heads and p not in stoprevs:
709 709 heads.remove(p)
710 710
711 711 return [self.node(r) for r in heads]
712 712
713 713 def children(self, node):
714 714 """find the children of a given node"""
715 715 c = []
716 716 p = self.rev(node)
717 717 for r in self.revs(start=p + 1):
718 718 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
719 719 if prevs:
720 720 for pr in prevs:
721 721 if pr == p:
722 722 c.append(self.node(r))
723 723 elif p == nullrev:
724 724 c.append(self.node(r))
725 725 return c
726 726
727 727 def descendant(self, start, end):
728 728 if start == nullrev:
729 729 return True
730 730 for i in self.descendants([start]):
731 731 if i == end:
732 732 return True
733 733 elif i > end:
734 734 break
735 735 return False
736 736
737 737 def commonancestorsheads(self, a, b):
738 738 """calculate all the heads of the common ancestors of nodes a and b"""
739 739 a, b = self.rev(a), self.rev(b)
740 740 try:
741 741 ancs = self.index.commonancestorsheads(a, b)
742 742 except (AttributeError, OverflowError): # C implementation failed
743 743 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
744 744 return map(self.node, ancs)
745 745
746 746 def ancestor(self, a, b):
747 747 """calculate the least common ancestor of nodes a and b"""
748 748
749 749 a, b = self.rev(a), self.rev(b)
750 750 try:
751 751 ancs = self.index.ancestors(a, b)
752 752 except (AttributeError, OverflowError):
753 753 ancs = ancestor.ancestors(self.parentrevs, a, b)
754 754 if ancs:
755 755 # choose a consistent winner when there's a tie
756 756 return min(map(self.node, ancs))
757 757 return nullid
758 758
759 759 def _match(self, id):
760 760 if isinstance(id, int):
761 761 # rev
762 762 return self.node(id)
763 763 if len(id) == 20:
764 764 # possibly a binary node
765 765 # odds of a binary node being all hex in ASCII are 1 in 10**25
766 766 try:
767 767 node = id
768 768 self.rev(node) # quick search the index
769 769 return node
770 770 except LookupError:
771 771 pass # may be partial hex id
772 772 try:
773 773 # str(rev)
774 774 rev = int(id)
775 775 if str(rev) != id:
776 776 raise ValueError
777 777 if rev < 0:
778 778 rev = len(self) + rev
779 779 if rev < 0 or rev >= len(self):
780 780 raise ValueError
781 781 return self.node(rev)
782 782 except (ValueError, OverflowError):
783 783 pass
784 784 if len(id) == 40:
785 785 try:
786 786 # a full hex nodeid?
787 787 node = bin(id)
788 788 self.rev(node)
789 789 return node
790 790 except (TypeError, LookupError):
791 791 pass
792 792
793 793 def _partialmatch(self, id):
794 794 try:
795 795 n = self.index.partialmatch(id)
796 796 if n and self.hasnode(n):
797 797 return n
798 798 return None
799 799 except RevlogError:
800 800 # parsers.c radix tree lookup gave multiple matches
801 801 # fall through to slow path that filters hidden revisions
802 802 pass
803 803 except (AttributeError, ValueError):
804 804 # we are pure python, or key was too short to search radix tree
805 805 pass
806 806
807 807 if id in self._pcache:
808 808 return self._pcache[id]
809 809
810 810 if len(id) < 40:
811 811 try:
812 812 # hex(node)[:...]
813 813 l = len(id) // 2 # grab an even number of digits
814 814 prefix = bin(id[:l * 2])
815 815 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
816 816 nl = [n for n in nl if hex(n).startswith(id) and
817 817 self.hasnode(n)]
818 818 if len(nl) > 0:
819 819 if len(nl) == 1:
820 820 self._pcache[id] = nl[0]
821 821 return nl[0]
822 822 raise LookupError(id, self.indexfile,
823 823 _('ambiguous identifier'))
824 824 return None
825 825 except TypeError:
826 826 pass
827 827
828 828 def lookup(self, id):
829 829 """locate a node based on:
830 830 - revision number or str(revision number)
831 831 - nodeid or subset of hex nodeid
832 832 """
833 833 n = self._match(id)
834 834 if n is not None:
835 835 return n
836 836 n = self._partialmatch(id)
837 837 if n:
838 838 return n
839 839
840 840 raise LookupError(id, self.indexfile, _('no match found'))
841 841
842 842 def cmp(self, node, text):
843 843 """compare text with a given file revision
844 844
845 845 returns True if text is different than what is stored.
846 846 """
847 847 p1, p2 = self.parents(node)
848 848 return hash(text, p1, p2) != node
849 849
850 850 def _addchunk(self, offset, data):
851 851 o, d = self._chunkcache
852 852 # try to add to existing cache
853 853 if o + len(d) == offset and len(d) + len(data) < _chunksize:
854 854 self._chunkcache = o, d + data
855 855 else:
856 856 self._chunkcache = offset, data
857 857
858 858 def _loadchunk(self, offset, length):
859 859 if self._inline:
860 860 df = self.opener(self.indexfile)
861 861 else:
862 862 df = self.opener(self.datafile)
863 863
864 864 # Cache data both forward and backward around the requested
865 865 # data, in a fixed size window. This helps speed up operations
866 866 # involving reading the revlog backwards.
867 867 cachesize = self._chunkcachesize
868 868 realoffset = offset & ~(cachesize - 1)
869 869 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
870 870 - realoffset)
871 871 df.seek(realoffset)
872 872 d = df.read(reallength)
873 873 df.close()
874 874 self._addchunk(realoffset, d)
875 875 if offset != realoffset or reallength != length:
876 876 return util.buffer(d, offset - realoffset, length)
877 877 return d
878 878
879 879 def _getchunk(self, offset, length):
880 880 o, d = self._chunkcache
881 881 l = len(d)
882 882
883 883 # is it in the cache?
884 884 cachestart = offset - o
885 885 cacheend = cachestart + length
886 886 if cachestart >= 0 and cacheend <= l:
887 887 if cachestart == 0 and cacheend == l:
888 888 return d # avoid a copy
889 889 return util.buffer(d, cachestart, cacheend - cachestart)
890 890
891 891 return self._loadchunk(offset, length)
892 892
893 893 def _chunkraw(self, startrev, endrev):
894 894 start = self.start(startrev)
895 895 end = self.end(endrev)
896 896 if self._inline:
897 897 start += (startrev + 1) * self._io.size
898 898 end += (endrev + 1) * self._io.size
899 899 length = end - start
900 900 return self._getchunk(start, length)
901 901
902 902 def _chunk(self, rev):
903 903 return decompress(self._chunkraw(rev, rev))
904 904
905 905 def _chunks(self, revs):
906 906 '''faster version of [self._chunk(rev) for rev in revs]
907 907
908 908 Assumes that revs is in ascending order.'''
909 909 if not revs:
910 910 return []
911 911 start = self.start
912 912 length = self.length
913 913 inline = self._inline
914 914 iosize = self._io.size
915 915 buffer = util.buffer
916 916
917 917 l = []
918 918 ladd = l.append
919 919
920 920 # preload the cache
921 921 try:
922 922 while 1:
923 923 # ensure that the cache doesn't change out from under us
924 924 _cache = self._chunkcache
925 925 self._chunkraw(revs[0], revs[-1])
926 926 if _cache == self._chunkcache:
927 927 break
928 928 offset, data = _cache
929 929 except OverflowError:
930 930 # issue4215 - we can't cache a run of chunks greater than
931 931 # 2G on Windows
932 932 return [self._chunk(rev) for rev in revs]
933 933
934 934 for rev in revs:
935 935 chunkstart = start(rev)
936 936 if inline:
937 937 chunkstart += (rev + 1) * iosize
938 938 chunklength = length(rev)
939 939 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
940 940
941 941 return l
942 942
943 943 def _chunkclear(self):
944 944 self._chunkcache = (0, '')
945 945
946 946 def deltaparent(self, rev):
947 947 """return deltaparent of the given revision"""
948 948 base = self.index[rev][3]
949 949 if base == rev:
950 950 return nullrev
951 951 elif self._generaldelta:
952 952 return base
953 953 else:
954 954 return rev - 1
955 955
956 956 def revdiff(self, rev1, rev2):
957 957 """return or calculate a delta between two revisions"""
958 958 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
959 959 return str(self._chunk(rev2))
960 960
961 961 return mdiff.textdiff(self.revision(rev1),
962 962 self.revision(rev2))
963 963
964 964 def revision(self, nodeorrev):
965 965 """return an uncompressed revision of a given node or revision
966 966 number.
967 967 """
968 968 if isinstance(nodeorrev, int):
969 969 rev = nodeorrev
970 970 node = self.node(rev)
971 971 else:
972 972 node = nodeorrev
973 973 rev = None
974 974
975 _cache = self._cache # grab local copy of cache to avoid thread race
975 976 cachedrev = None
976 977 if node == nullid:
977 978 return ""
978 if self._cache:
979 if self._cache[0] == node:
980 return self._cache[2]
981 cachedrev = self._cache[1]
979 if _cache:
980 if _cache[0] == node:
981 return _cache[2]
982 cachedrev = _cache[1]
982 983
983 984 # look up what we need to read
984 985 text = None
985 986 if rev is None:
986 987 rev = self.rev(node)
987 988
988 989 # check rev flags
989 990 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
990 991 raise RevlogError(_('incompatible revision flag %x') %
991 992 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
992 993
993 994 # build delta chain
994 995 chain = []
995 996 index = self.index # for performance
996 997 generaldelta = self._generaldelta
997 998 iterrev = rev
998 999 e = index[iterrev]
999 1000 while iterrev != e[3] and iterrev != cachedrev:
1000 1001 chain.append(iterrev)
1001 1002 if generaldelta:
1002 1003 iterrev = e[3]
1003 1004 else:
1004 1005 iterrev -= 1
1005 1006 e = index[iterrev]
1006 1007
1007 1008 if iterrev == cachedrev:
1008 1009 # cache hit
1009 text = self._cache[2]
1010 text = _cache[2]
1010 1011 else:
1011 1012 chain.append(iterrev)
1012 1013 chain.reverse()
1013 1014
1014 1015 # drop cache to save memory
1015 1016 self._cache = None
1016 1017
1017 1018 bins = self._chunks(chain)
1018 1019 if text is None:
1019 1020 text = str(bins[0])
1020 1021 bins = bins[1:]
1021 1022
1022 1023 text = mdiff.patches(text, bins)
1023 1024
1024 1025 text = self._checkhash(text, node, rev)
1025 1026
1026 1027 self._cache = (node, rev, text)
1027 1028 return text
1028 1029
1029 1030 def _checkhash(self, text, node, rev):
1030 1031 p1, p2 = self.parents(node)
1031 1032 self.checkhash(text, p1, p2, node, rev)
1032 1033 return text
1033 1034
1034 1035 def checkhash(self, text, p1, p2, node, rev=None):
1035 1036 if node != hash(text, p1, p2):
1036 1037 revornode = rev
1037 1038 if revornode is None:
1038 1039 revornode = templatefilters.short(hex(node))
1039 1040 raise RevlogError(_("integrity check failed on %s:%s")
1040 1041 % (self.indexfile, revornode))
1041 1042
1042 1043 def checkinlinesize(self, tr, fp=None):
1043 1044 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1044 1045 return
1045 1046
1046 1047 trinfo = tr.find(self.indexfile)
1047 1048 if trinfo is None:
1048 1049 raise RevlogError(_("%s not found in the transaction")
1049 1050 % self.indexfile)
1050 1051
1051 1052 trindex = trinfo[2]
1052 1053 dataoff = self.start(trindex)
1053 1054
1054 1055 tr.add(self.datafile, dataoff)
1055 1056
1056 1057 if fp:
1057 1058 fp.flush()
1058 1059 fp.close()
1059 1060
1060 1061 df = self.opener(self.datafile, 'w')
1061 1062 try:
1062 1063 for r in self:
1063 1064 df.write(self._chunkraw(r, r))
1064 1065 finally:
1065 1066 df.close()
1066 1067
1067 1068 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1068 1069 self.version &= ~(REVLOGNGINLINEDATA)
1069 1070 self._inline = False
1070 1071 for i in self:
1071 1072 e = self._io.packentry(self.index[i], self.node, self.version, i)
1072 1073 fp.write(e)
1073 1074
1074 1075 # if we don't call close, the temp file will never replace the
1075 1076 # real index
1076 1077 fp.close()
1077 1078
1078 1079 tr.replace(self.indexfile, trindex * self._io.size)
1079 1080 self._chunkclear()
1080 1081
1081 1082 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1082 1083 node=None):
1083 1084 """add a revision to the log
1084 1085
1085 1086 text - the revision data to add
1086 1087 transaction - the transaction object used for rollback
1087 1088 link - the linkrev data to add
1088 1089 p1, p2 - the parent nodeids of the revision
1089 1090 cachedelta - an optional precomputed delta
1090 1091 node - nodeid of revision; typically node is not specified, and it is
1091 1092 computed by default as hash(text, p1, p2), however subclasses might
1092 1093 use different hashing method (and override checkhash() in such case)
1093 1094 """
1094 1095 if link == nullrev:
1095 1096 raise RevlogError(_("attempted to add linkrev -1 to %s")
1096 1097 % self.indexfile)
1097 1098 node = node or hash(text, p1, p2)
1098 1099 if node in self.nodemap:
1099 1100 return node
1100 1101
1101 1102 dfh = None
1102 1103 if not self._inline:
1103 1104 dfh = self.opener(self.datafile, "a")
1104 1105 ifh = self.opener(self.indexfile, "a+")
1105 1106 try:
1106 1107 return self._addrevision(node, text, transaction, link, p1, p2,
1107 1108 cachedelta, ifh, dfh)
1108 1109 finally:
1109 1110 if dfh:
1110 1111 dfh.close()
1111 1112 ifh.close()
1112 1113
1113 1114 def compress(self, text):
1114 1115 """ generate a possibly-compressed representation of text """
1115 1116 if not text:
1116 1117 return ("", text)
1117 1118 l = len(text)
1118 1119 bin = None
1119 1120 if l < 44:
1120 1121 pass
1121 1122 elif l > 1000000:
1122 1123 # zlib makes an internal copy, thus doubling memory usage for
1123 1124 # large files, so lets do this in pieces
1124 1125 z = zlib.compressobj()
1125 1126 p = []
1126 1127 pos = 0
1127 1128 while pos < l:
1128 1129 pos2 = pos + 2**20
1129 1130 p.append(z.compress(text[pos:pos2]))
1130 1131 pos = pos2
1131 1132 p.append(z.flush())
1132 1133 if sum(map(len, p)) < l:
1133 1134 bin = "".join(p)
1134 1135 else:
1135 1136 bin = _compress(text)
1136 1137 if bin is None or len(bin) > l:
1137 1138 if text[0] == '\0':
1138 1139 return ("", text)
1139 1140 return ('u', text)
1140 1141 return ("", bin)
1141 1142
1142 1143 def _addrevision(self, node, text, transaction, link, p1, p2,
1143 1144 cachedelta, ifh, dfh):
1144 1145 """internal function to add revisions to the log
1145 1146
1146 1147 see addrevision for argument descriptions.
1147 1148 invariants:
1148 1149 - text is optional (can be None); if not set, cachedelta must be set.
1149 1150 if both are set, they must correspond to each other.
1150 1151 """
1151 1152 btext = [text]
1152 1153 def buildtext():
1153 1154 if btext[0] is not None:
1154 1155 return btext[0]
1155 1156 # flush any pending writes here so we can read it in revision
1156 1157 if dfh:
1157 1158 dfh.flush()
1158 1159 ifh.flush()
1159 1160 basetext = self.revision(self.node(cachedelta[0]))
1160 1161 btext[0] = mdiff.patch(basetext, cachedelta[1])
1161 1162 self.checkhash(btext[0], p1, p2, node)
1162 1163 return btext[0]
1163 1164
1164 1165 def builddelta(rev):
1165 1166 # can we use the cached delta?
1166 1167 if cachedelta and cachedelta[0] == rev:
1167 1168 delta = cachedelta[1]
1168 1169 else:
1169 1170 t = buildtext()
1170 1171 ptext = self.revision(self.node(rev))
1171 1172 delta = mdiff.textdiff(ptext, t)
1172 1173 data = self.compress(delta)
1173 1174 l = len(data[1]) + len(data[0])
1174 1175 if basecache[0] == rev:
1175 1176 chainbase = basecache[1]
1176 1177 else:
1177 1178 chainbase = self.chainbase(rev)
1178 1179 dist = l + offset - self.start(chainbase)
1179 1180 if self._generaldelta:
1180 1181 base = rev
1181 1182 else:
1182 1183 base = chainbase
1183 1184 return dist, l, data, base, chainbase
1184 1185
1185 1186 curr = len(self)
1186 1187 prev = curr - 1
1187 1188 base = chainbase = curr
1188 1189 offset = self.end(prev)
1189 1190 flags = 0
1190 1191 d = None
1191 1192 if self._basecache is None:
1192 1193 self._basecache = (prev, self.chainbase(prev))
1193 1194 basecache = self._basecache
1194 1195 p1r, p2r = self.rev(p1), self.rev(p2)
1195 1196
1196 1197 # should we try to build a delta?
1197 1198 if prev != nullrev:
1198 1199 if self._generaldelta:
1199 1200 if p1r >= basecache[1]:
1200 1201 d = builddelta(p1r)
1201 1202 elif p2r >= basecache[1]:
1202 1203 d = builddelta(p2r)
1203 1204 else:
1204 1205 d = builddelta(prev)
1205 1206 else:
1206 1207 d = builddelta(prev)
1207 1208 dist, l, data, base, chainbase = d
1208 1209
1209 1210 # full versions are inserted when the needed deltas
1210 1211 # become comparable to the uncompressed text
1211 1212 if text is None:
1212 1213 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1213 1214 cachedelta[1])
1214 1215 else:
1215 1216 textlen = len(text)
1216 1217 if d is None or dist > textlen * 2:
1217 1218 text = buildtext()
1218 1219 data = self.compress(text)
1219 1220 l = len(data[1]) + len(data[0])
1220 1221 base = chainbase = curr
1221 1222
1222 1223 e = (offset_type(offset, flags), l, textlen,
1223 1224 base, link, p1r, p2r, node)
1224 1225 self.index.insert(-1, e)
1225 1226 self.nodemap[node] = curr
1226 1227
1227 1228 entry = self._io.packentry(e, self.node, self.version, curr)
1228 1229 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1229 1230
1230 1231 if type(text) == str: # only accept immutable objects
1231 1232 self._cache = (node, curr, text)
1232 1233 self._basecache = (curr, chainbase)
1233 1234 return node
1234 1235
1235 1236 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1236 1237 curr = len(self) - 1
1237 1238 if not self._inline:
1238 1239 transaction.add(self.datafile, offset)
1239 1240 transaction.add(self.indexfile, curr * len(entry))
1240 1241 if data[0]:
1241 1242 dfh.write(data[0])
1242 1243 dfh.write(data[1])
1243 1244 dfh.flush()
1244 1245 ifh.write(entry)
1245 1246 else:
1246 1247 offset += curr * self._io.size
1247 1248 transaction.add(self.indexfile, offset, curr)
1248 1249 ifh.write(entry)
1249 1250 ifh.write(data[0])
1250 1251 ifh.write(data[1])
1251 1252 self.checkinlinesize(transaction, ifh)
1252 1253
1253 1254 def addgroup(self, bundle, linkmapper, transaction):
1254 1255 """
1255 1256 add a delta group
1256 1257
1257 1258 given a set of deltas, add them to the revision log. the
1258 1259 first delta is against its parent, which should be in our
1259 1260 log, the rest are against the previous delta.
1260 1261 """
1261 1262
1262 1263 # track the base of the current delta log
1263 1264 content = []
1264 1265 node = None
1265 1266
1266 1267 r = len(self)
1267 1268 end = 0
1268 1269 if r:
1269 1270 end = self.end(r - 1)
1270 1271 ifh = self.opener(self.indexfile, "a+")
1271 1272 isize = r * self._io.size
1272 1273 if self._inline:
1273 1274 transaction.add(self.indexfile, end + isize, r)
1274 1275 dfh = None
1275 1276 else:
1276 1277 transaction.add(self.indexfile, isize, r)
1277 1278 transaction.add(self.datafile, end)
1278 1279 dfh = self.opener(self.datafile, "a")
1279 1280
1280 1281 try:
1281 1282 # loop through our set of deltas
1282 1283 chain = None
1283 1284 while True:
1284 1285 chunkdata = bundle.deltachunk(chain)
1285 1286 if not chunkdata:
1286 1287 break
1287 1288 node = chunkdata['node']
1288 1289 p1 = chunkdata['p1']
1289 1290 p2 = chunkdata['p2']
1290 1291 cs = chunkdata['cs']
1291 1292 deltabase = chunkdata['deltabase']
1292 1293 delta = chunkdata['delta']
1293 1294
1294 1295 content.append(node)
1295 1296
1296 1297 link = linkmapper(cs)
1297 1298 if node in self.nodemap:
1298 1299 # this can happen if two branches make the same change
1299 1300 chain = node
1300 1301 continue
1301 1302
1302 1303 for p in (p1, p2):
1303 1304 if p not in self.nodemap:
1304 1305 raise LookupError(p, self.indexfile,
1305 1306 _('unknown parent'))
1306 1307
1307 1308 if deltabase not in self.nodemap:
1308 1309 raise LookupError(deltabase, self.indexfile,
1309 1310 _('unknown delta base'))
1310 1311
1311 1312 baserev = self.rev(deltabase)
1312 1313 chain = self._addrevision(node, None, transaction, link,
1313 1314 p1, p2, (baserev, delta), ifh, dfh)
1314 1315 if not dfh and not self._inline:
1315 1316 # addrevision switched from inline to conventional
1316 1317 # reopen the index
1317 1318 ifh.close()
1318 1319 dfh = self.opener(self.datafile, "a")
1319 1320 ifh = self.opener(self.indexfile, "a")
1320 1321 finally:
1321 1322 if dfh:
1322 1323 dfh.close()
1323 1324 ifh.close()
1324 1325
1325 1326 return content
1326 1327
1327 1328 def getstrippoint(self, minlink):
1328 1329 """find the minimum rev that must be stripped to strip the linkrev
1329 1330
1330 1331 Returns a tuple containing the minimum rev and a set of all revs that
1331 1332 have linkrevs that will be broken by this strip.
1332 1333 """
1333 1334 brokenrevs = set()
1334 1335 strippoint = len(self)
1335 1336
1336 1337 heads = {}
1337 1338 futurelargelinkrevs = set()
1338 1339 for head in self.headrevs():
1339 1340 headlinkrev = self.linkrev(head)
1340 1341 heads[head] = headlinkrev
1341 1342 if headlinkrev >= minlink:
1342 1343 futurelargelinkrevs.add(headlinkrev)
1343 1344
1344 1345 # This algorithm involves walking down the rev graph, starting at the
1345 1346 # heads. Since the revs are topologically sorted according to linkrev,
1346 1347 # once all head linkrevs are below the minlink, we know there are
1347 1348 # no more revs that could have a linkrev greater than minlink.
1348 1349 # So we can stop walking.
1349 1350 while futurelargelinkrevs:
1350 1351 strippoint -= 1
1351 1352 linkrev = heads.pop(strippoint)
1352 1353
1353 1354 if linkrev < minlink:
1354 1355 brokenrevs.add(strippoint)
1355 1356 else:
1356 1357 futurelargelinkrevs.remove(linkrev)
1357 1358
1358 1359 for p in self.parentrevs(strippoint):
1359 1360 if p != nullrev:
1360 1361 plinkrev = self.linkrev(p)
1361 1362 heads[p] = plinkrev
1362 1363 if plinkrev >= minlink:
1363 1364 futurelargelinkrevs.add(plinkrev)
1364 1365
1365 1366 return strippoint, brokenrevs
1366 1367
1367 1368 def strip(self, minlink, transaction):
1368 1369 """truncate the revlog on the first revision with a linkrev >= minlink
1369 1370
1370 1371 This function is called when we're stripping revision minlink and
1371 1372 its descendants from the repository.
1372 1373
1373 1374 We have to remove all revisions with linkrev >= minlink, because
1374 1375 the equivalent changelog revisions will be renumbered after the
1375 1376 strip.
1376 1377
1377 1378 So we truncate the revlog on the first of these revisions, and
1378 1379 trust that the caller has saved the revisions that shouldn't be
1379 1380 removed and that it'll re-add them after this truncation.
1380 1381 """
1381 1382 if len(self) == 0:
1382 1383 return
1383 1384
1384 1385 rev, _ = self.getstrippoint(minlink)
1385 1386 if rev == len(self):
1386 1387 return
1387 1388
1388 1389 # first truncate the files on disk
1389 1390 end = self.start(rev)
1390 1391 if not self._inline:
1391 1392 transaction.add(self.datafile, end)
1392 1393 end = rev * self._io.size
1393 1394 else:
1394 1395 end += rev * self._io.size
1395 1396
1396 1397 transaction.add(self.indexfile, end)
1397 1398
1398 1399 # then reset internal state in memory to forget those revisions
1399 1400 self._cache = None
1400 1401 self._chunkclear()
1401 1402 for x in xrange(rev, len(self)):
1402 1403 del self.nodemap[self.node(x)]
1403 1404
1404 1405 del self.index[rev:-1]
1405 1406
1406 1407 def checksize(self):
1407 1408 expected = 0
1408 1409 if len(self):
1409 1410 expected = max(0, self.end(len(self) - 1))
1410 1411
1411 1412 try:
1412 1413 f = self.opener(self.datafile)
1413 1414 f.seek(0, 2)
1414 1415 actual = f.tell()
1415 1416 f.close()
1416 1417 dd = actual - expected
1417 1418 except IOError, inst:
1418 1419 if inst.errno != errno.ENOENT:
1419 1420 raise
1420 1421 dd = 0
1421 1422
1422 1423 try:
1423 1424 f = self.opener(self.indexfile)
1424 1425 f.seek(0, 2)
1425 1426 actual = f.tell()
1426 1427 f.close()
1427 1428 s = self._io.size
1428 1429 i = max(0, actual // s)
1429 1430 di = actual - (i * s)
1430 1431 if self._inline:
1431 1432 databytes = 0
1432 1433 for r in self:
1433 1434 databytes += max(0, self.length(r))
1434 1435 dd = 0
1435 1436 di = actual - len(self) * s - databytes
1436 1437 except IOError, inst:
1437 1438 if inst.errno != errno.ENOENT:
1438 1439 raise
1439 1440 di = 0
1440 1441
1441 1442 return (dd, di)
1442 1443
1443 1444 def files(self):
1444 1445 res = [self.indexfile]
1445 1446 if not self._inline:
1446 1447 res.append(self.datafile)
1447 1448 return res
General Comments 0
You need to be logged in to leave comments. Login now