##// END OF EJS Templates
revlog: move references to revlog.hash to inside the revlog class...
Augie Fackler -
r22785:abc44fcc default
parent child Browse files
Show More
@@ -1,1457 +1,1465 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 _nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = _nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = None
204 204 self._chunkcache = (0, '')
205 205 self._chunkcachesize = 65536
206 206 self.index = []
207 207 self._pcache = {}
208 208 self._nodecache = {nullid: nullrev}
209 209 self._nodepos = None
210 210
211 211 v = REVLOG_DEFAULT_VERSION
212 212 opts = getattr(opener, 'options', None)
213 213 if opts is not None:
214 214 if 'revlogv1' in opts:
215 215 if 'generaldelta' in opts:
216 216 v |= REVLOGGENERALDELTA
217 217 else:
218 218 v = 0
219 219 if 'chunkcachesize' in opts:
220 220 self._chunkcachesize = opts['chunkcachesize']
221 221
222 222 if self._chunkcachesize <= 0:
223 223 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 224 'than 0') % self._chunkcachesize)
225 225 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 226 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 227 'of 2') % self._chunkcachesize)
228 228
229 229 i = ''
230 230 self._initempty = True
231 231 try:
232 232 f = self.opener(self.indexfile)
233 233 i = f.read()
234 234 f.close()
235 235 if len(i) > 0:
236 236 v = struct.unpack(versionformat, i[:4])[0]
237 237 self._initempty = False
238 238 except IOError, inst:
239 239 if inst.errno != errno.ENOENT:
240 240 raise
241 241
242 242 self.version = v
243 243 self._inline = v & REVLOGNGINLINEDATA
244 244 self._generaldelta = v & REVLOGGENERALDELTA
245 245 flags = v & ~0xFFFF
246 246 fmt = v & 0xFFFF
247 247 if fmt == REVLOGV0 and flags:
248 248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 249 % (self.indexfile, flags >> 16))
250 250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 252 % (self.indexfile, flags >> 16))
253 253 elif fmt > REVLOGNG:
254 254 raise RevlogError(_("index %s unknown format %d")
255 255 % (self.indexfile, fmt))
256 256
257 257 self._io = revlogio()
258 258 if self.version == REVLOGV0:
259 259 self._io = revlogoldio()
260 260 try:
261 261 d = self._io.parseindex(i, self._inline)
262 262 except (ValueError, IndexError):
263 263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 264 self.index, nodemap, self._chunkcache = d
265 265 if nodemap is not None:
266 266 self.nodemap = self._nodecache = nodemap
267 267 if not self._chunkcache:
268 268 self._chunkclear()
269 269
270 270 def tip(self):
271 271 return self.node(len(self.index) - 2)
272 272 def __len__(self):
273 273 return len(self.index) - 1
274 274 def __iter__(self):
275 275 return iter(xrange(len(self)))
276 276 def revs(self, start=0, stop=None):
277 277 """iterate over all rev in this revlog (from start to stop)"""
278 278 step = 1
279 279 if stop is not None:
280 280 if start > stop:
281 281 step = -1
282 282 stop += step
283 283 else:
284 284 stop = len(self)
285 285 return xrange(start, stop, step)
286 286
287 287 @util.propertycache
288 288 def nodemap(self):
289 289 self.rev(self.node(0))
290 290 return self._nodecache
291 291
292 292 def hasnode(self, node):
293 293 try:
294 294 self.rev(node)
295 295 return True
296 296 except KeyError:
297 297 return False
298 298
299 299 def clearcaches(self):
300 300 try:
301 301 self._nodecache.clearcaches()
302 302 except AttributeError:
303 303 self._nodecache = {nullid: nullrev}
304 304 self._nodepos = None
305 305
306 306 def rev(self, node):
307 307 try:
308 308 return self._nodecache[node]
309 309 except TypeError:
310 310 raise
311 311 except RevlogError:
312 312 # parsers.c radix tree lookup failed
313 313 raise LookupError(node, self.indexfile, _('no node'))
314 314 except KeyError:
315 315 # pure python cache lookup failed
316 316 n = self._nodecache
317 317 i = self.index
318 318 p = self._nodepos
319 319 if p is None:
320 320 p = len(i) - 2
321 321 for r in xrange(p, -1, -1):
322 322 v = i[r][7]
323 323 n[v] = r
324 324 if v == node:
325 325 self._nodepos = r - 1
326 326 return r
327 327 raise LookupError(node, self.indexfile, _('no node'))
328 328
329 329 def node(self, rev):
330 330 return self.index[rev][7]
331 331 def linkrev(self, rev):
332 332 return self.index[rev][4]
333 333 def parents(self, node):
334 334 i = self.index
335 335 d = i[self.rev(node)]
336 336 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
337 337 def parentrevs(self, rev):
338 338 return self.index[rev][5:7]
339 339 def start(self, rev):
340 340 return int(self.index[rev][0] >> 16)
341 341 def end(self, rev):
342 342 return self.start(rev) + self.length(rev)
343 343 def length(self, rev):
344 344 return self.index[rev][1]
345 345 def chainbase(self, rev):
346 346 index = self.index
347 347 base = index[rev][3]
348 348 while base != rev:
349 349 rev = base
350 350 base = index[rev][3]
351 351 return base
352 352 def flags(self, rev):
353 353 return self.index[rev][0] & 0xFFFF
354 354 def rawsize(self, rev):
355 355 """return the length of the uncompressed text for a given revision"""
356 356 l = self.index[rev][2]
357 357 if l >= 0:
358 358 return l
359 359
360 360 t = self.revision(self.node(rev))
361 361 return len(t)
362 362 size = rawsize
363 363
364 364 def ancestors(self, revs, stoprev=0, inclusive=False):
365 365 """Generate the ancestors of 'revs' in reverse topological order.
366 366 Does not generate revs lower than stoprev.
367 367
368 368 See the documentation for ancestor.lazyancestors for more details."""
369 369
370 370 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
371 371 inclusive=inclusive)
372 372
373 373 def descendants(self, revs):
374 374 """Generate the descendants of 'revs' in revision order.
375 375
376 376 Yield a sequence of revision numbers starting with a child of
377 377 some rev in revs, i.e., each revision is *not* considered a
378 378 descendant of itself. Results are ordered by revision number (a
379 379 topological sort)."""
380 380 first = min(revs)
381 381 if first == nullrev:
382 382 for i in self:
383 383 yield i
384 384 return
385 385
386 386 seen = set(revs)
387 387 for i in self.revs(start=first + 1):
388 388 for x in self.parentrevs(i):
389 389 if x != nullrev and x in seen:
390 390 seen.add(i)
391 391 yield i
392 392 break
393 393
394 394 def findcommonmissing(self, common=None, heads=None):
395 395 """Return a tuple of the ancestors of common and the ancestors of heads
396 396 that are not ancestors of common. In revset terminology, we return the
397 397 tuple:
398 398
399 399 ::common, (::heads) - (::common)
400 400
401 401 The list is sorted by revision number, meaning it is
402 402 topologically sorted.
403 403
404 404 'heads' and 'common' are both lists of node IDs. If heads is
405 405 not supplied, uses all of the revlog's heads. If common is not
406 406 supplied, uses nullid."""
407 407 if common is None:
408 408 common = [nullid]
409 409 if heads is None:
410 410 heads = self.heads()
411 411
412 412 common = [self.rev(n) for n in common]
413 413 heads = [self.rev(n) for n in heads]
414 414
415 415 # we want the ancestors, but inclusive
416 416 class lazyset(object):
417 417 def __init__(self, lazyvalues):
418 418 self.addedvalues = set()
419 419 self.lazyvalues = lazyvalues
420 420
421 421 def __contains__(self, value):
422 422 return value in self.addedvalues or value in self.lazyvalues
423 423
424 424 def __iter__(self):
425 425 added = self.addedvalues
426 426 for r in added:
427 427 yield r
428 428 for r in self.lazyvalues:
429 429 if not r in added:
430 430 yield r
431 431
432 432 def add(self, value):
433 433 self.addedvalues.add(value)
434 434
435 435 def update(self, values):
436 436 self.addedvalues.update(values)
437 437
438 438 has = lazyset(self.ancestors(common))
439 439 has.add(nullrev)
440 440 has.update(common)
441 441
442 442 # take all ancestors from heads that aren't in has
443 443 missing = set()
444 444 visit = util.deque(r for r in heads if r not in has)
445 445 while visit:
446 446 r = visit.popleft()
447 447 if r in missing:
448 448 continue
449 449 else:
450 450 missing.add(r)
451 451 for p in self.parentrevs(r):
452 452 if p not in has:
453 453 visit.append(p)
454 454 missing = list(missing)
455 455 missing.sort()
456 456 return has, [self.node(r) for r in missing]
457 457
458 458 def findmissingrevs(self, common=None, heads=None):
459 459 """Return the revision numbers of the ancestors of heads that
460 460 are not ancestors of common.
461 461
462 462 More specifically, return a list of revision numbers corresponding to
463 463 nodes N such that every N satisfies the following constraints:
464 464
465 465 1. N is an ancestor of some node in 'heads'
466 466 2. N is not an ancestor of any node in 'common'
467 467
468 468 The list is sorted by revision number, meaning it is
469 469 topologically sorted.
470 470
471 471 'heads' and 'common' are both lists of revision numbers. If heads is
472 472 not supplied, uses all of the revlog's heads. If common is not
473 473 supplied, uses nullid."""
474 474 if common is None:
475 475 common = [nullrev]
476 476 if heads is None:
477 477 heads = self.headrevs()
478 478
479 479 return ancestor.missingancestors(heads, common, self.parentrevs)
480 480
481 481 def findmissing(self, common=None, heads=None):
482 482 """Return the ancestors of heads that are not ancestors of common.
483 483
484 484 More specifically, return a list of nodes N such that every N
485 485 satisfies the following constraints:
486 486
487 487 1. N is an ancestor of some node in 'heads'
488 488 2. N is not an ancestor of any node in 'common'
489 489
490 490 The list is sorted by revision number, meaning it is
491 491 topologically sorted.
492 492
493 493 'heads' and 'common' are both lists of node IDs. If heads is
494 494 not supplied, uses all of the revlog's heads. If common is not
495 495 supplied, uses nullid."""
496 496 if common is None:
497 497 common = [nullid]
498 498 if heads is None:
499 499 heads = self.heads()
500 500
501 501 common = [self.rev(n) for n in common]
502 502 heads = [self.rev(n) for n in heads]
503 503
504 504 return [self.node(r) for r in
505 505 ancestor.missingancestors(heads, common, self.parentrevs)]
506 506
507 507 def nodesbetween(self, roots=None, heads=None):
508 508 """Return a topological path from 'roots' to 'heads'.
509 509
510 510 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
511 511 topologically sorted list of all nodes N that satisfy both of
512 512 these constraints:
513 513
514 514 1. N is a descendant of some node in 'roots'
515 515 2. N is an ancestor of some node in 'heads'
516 516
517 517 Every node is considered to be both a descendant and an ancestor
518 518 of itself, so every reachable node in 'roots' and 'heads' will be
519 519 included in 'nodes'.
520 520
521 521 'outroots' is the list of reachable nodes in 'roots', i.e., the
522 522 subset of 'roots' that is returned in 'nodes'. Likewise,
523 523 'outheads' is the subset of 'heads' that is also in 'nodes'.
524 524
525 525 'roots' and 'heads' are both lists of node IDs. If 'roots' is
526 526 unspecified, uses nullid as the only root. If 'heads' is
527 527 unspecified, uses list of all of the revlog's heads."""
528 528 nonodes = ([], [], [])
529 529 if roots is not None:
530 530 roots = list(roots)
531 531 if not roots:
532 532 return nonodes
533 533 lowestrev = min([self.rev(n) for n in roots])
534 534 else:
535 535 roots = [nullid] # Everybody's a descendant of nullid
536 536 lowestrev = nullrev
537 537 if (lowestrev == nullrev) and (heads is None):
538 538 # We want _all_ the nodes!
539 539 return ([self.node(r) for r in self], [nullid], list(self.heads()))
540 540 if heads is None:
541 541 # All nodes are ancestors, so the latest ancestor is the last
542 542 # node.
543 543 highestrev = len(self) - 1
544 544 # Set ancestors to None to signal that every node is an ancestor.
545 545 ancestors = None
546 546 # Set heads to an empty dictionary for later discovery of heads
547 547 heads = {}
548 548 else:
549 549 heads = list(heads)
550 550 if not heads:
551 551 return nonodes
552 552 ancestors = set()
553 553 # Turn heads into a dictionary so we can remove 'fake' heads.
554 554 # Also, later we will be using it to filter out the heads we can't
555 555 # find from roots.
556 556 heads = dict.fromkeys(heads, False)
557 557 # Start at the top and keep marking parents until we're done.
558 558 nodestotag = set(heads)
559 559 # Remember where the top was so we can use it as a limit later.
560 560 highestrev = max([self.rev(n) for n in nodestotag])
561 561 while nodestotag:
562 562 # grab a node to tag
563 563 n = nodestotag.pop()
564 564 # Never tag nullid
565 565 if n == nullid:
566 566 continue
567 567 # A node's revision number represents its place in a
568 568 # topologically sorted list of nodes.
569 569 r = self.rev(n)
570 570 if r >= lowestrev:
571 571 if n not in ancestors:
572 572 # If we are possibly a descendant of one of the roots
573 573 # and we haven't already been marked as an ancestor
574 574 ancestors.add(n) # Mark as ancestor
575 575 # Add non-nullid parents to list of nodes to tag.
576 576 nodestotag.update([p for p in self.parents(n) if
577 577 p != nullid])
578 578 elif n in heads: # We've seen it before, is it a fake head?
579 579 # So it is, real heads should not be the ancestors of
580 580 # any other heads.
581 581 heads.pop(n)
582 582 if not ancestors:
583 583 return nonodes
584 584 # Now that we have our set of ancestors, we want to remove any
585 585 # roots that are not ancestors.
586 586
587 587 # If one of the roots was nullid, everything is included anyway.
588 588 if lowestrev > nullrev:
589 589 # But, since we weren't, let's recompute the lowest rev to not
590 590 # include roots that aren't ancestors.
591 591
592 592 # Filter out roots that aren't ancestors of heads
593 593 roots = [n for n in roots if n in ancestors]
594 594 # Recompute the lowest revision
595 595 if roots:
596 596 lowestrev = min([self.rev(n) for n in roots])
597 597 else:
598 598 # No more roots? Return empty list
599 599 return nonodes
600 600 else:
601 601 # We are descending from nullid, and don't need to care about
602 602 # any other roots.
603 603 lowestrev = nullrev
604 604 roots = [nullid]
605 605 # Transform our roots list into a set.
606 606 descendants = set(roots)
607 607 # Also, keep the original roots so we can filter out roots that aren't
608 608 # 'real' roots (i.e. are descended from other roots).
609 609 roots = descendants.copy()
610 610 # Our topologically sorted list of output nodes.
611 611 orderedout = []
612 612 # Don't start at nullid since we don't want nullid in our output list,
613 613 # and if nullid shows up in descendants, empty parents will look like
614 614 # they're descendants.
615 615 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
616 616 n = self.node(r)
617 617 isdescendant = False
618 618 if lowestrev == nullrev: # Everybody is a descendant of nullid
619 619 isdescendant = True
620 620 elif n in descendants:
621 621 # n is already a descendant
622 622 isdescendant = True
623 623 # This check only needs to be done here because all the roots
624 624 # will start being marked is descendants before the loop.
625 625 if n in roots:
626 626 # If n was a root, check if it's a 'real' root.
627 627 p = tuple(self.parents(n))
628 628 # If any of its parents are descendants, it's not a root.
629 629 if (p[0] in descendants) or (p[1] in descendants):
630 630 roots.remove(n)
631 631 else:
632 632 p = tuple(self.parents(n))
633 633 # A node is a descendant if either of its parents are
634 634 # descendants. (We seeded the dependents list with the roots
635 635 # up there, remember?)
636 636 if (p[0] in descendants) or (p[1] in descendants):
637 637 descendants.add(n)
638 638 isdescendant = True
639 639 if isdescendant and ((ancestors is None) or (n in ancestors)):
640 640 # Only include nodes that are both descendants and ancestors.
641 641 orderedout.append(n)
642 642 if (ancestors is not None) and (n in heads):
643 643 # We're trying to figure out which heads are reachable
644 644 # from roots.
645 645 # Mark this head as having been reached
646 646 heads[n] = True
647 647 elif ancestors is None:
648 648 # Otherwise, we're trying to discover the heads.
649 649 # Assume this is a head because if it isn't, the next step
650 650 # will eventually remove it.
651 651 heads[n] = True
652 652 # But, obviously its parents aren't.
653 653 for p in self.parents(n):
654 654 heads.pop(p, None)
655 655 heads = [n for n, flag in heads.iteritems() if flag]
656 656 roots = list(roots)
657 657 assert orderedout
658 658 assert roots
659 659 assert heads
660 660 return (orderedout, roots, heads)
661 661
662 662 def headrevs(self):
663 663 try:
664 664 return self.index.headrevs()
665 665 except AttributeError:
666 666 return self._headrevs()
667 667
668 668 def _headrevs(self):
669 669 count = len(self)
670 670 if not count:
671 671 return [nullrev]
672 672 # we won't iter over filtered rev so nobody is a head at start
673 673 ishead = [0] * (count + 1)
674 674 index = self.index
675 675 for r in self:
676 676 ishead[r] = 1 # I may be an head
677 677 e = index[r]
678 678 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
679 679 return [r for r, val in enumerate(ishead) if val]
680 680
681 681 def heads(self, start=None, stop=None):
682 682 """return the list of all nodes that have no children
683 683
684 684 if start is specified, only heads that are descendants of
685 685 start will be returned
686 686 if stop is specified, it will consider all the revs from stop
687 687 as if they had no children
688 688 """
689 689 if start is None and stop is None:
690 690 if not len(self):
691 691 return [nullid]
692 692 return [self.node(r) for r in self.headrevs()]
693 693
694 694 if start is None:
695 695 start = nullid
696 696 if stop is None:
697 697 stop = []
698 698 stoprevs = set([self.rev(n) for n in stop])
699 699 startrev = self.rev(start)
700 700 reachable = set((startrev,))
701 701 heads = set((startrev,))
702 702
703 703 parentrevs = self.parentrevs
704 704 for r in self.revs(start=startrev + 1):
705 705 for p in parentrevs(r):
706 706 if p in reachable:
707 707 if r not in stoprevs:
708 708 reachable.add(r)
709 709 heads.add(r)
710 710 if p in heads and p not in stoprevs:
711 711 heads.remove(p)
712 712
713 713 return [self.node(r) for r in heads]
714 714
715 715 def children(self, node):
716 716 """find the children of a given node"""
717 717 c = []
718 718 p = self.rev(node)
719 719 for r in self.revs(start=p + 1):
720 720 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
721 721 if prevs:
722 722 for pr in prevs:
723 723 if pr == p:
724 724 c.append(self.node(r))
725 725 elif p == nullrev:
726 726 c.append(self.node(r))
727 727 return c
728 728
729 729 def descendant(self, start, end):
730 730 if start == nullrev:
731 731 return True
732 732 for i in self.descendants([start]):
733 733 if i == end:
734 734 return True
735 735 elif i > end:
736 736 break
737 737 return False
738 738
739 739 def commonancestorsheads(self, a, b):
740 740 """calculate all the heads of the common ancestors of nodes a and b"""
741 741 a, b = self.rev(a), self.rev(b)
742 742 try:
743 743 ancs = self.index.commonancestorsheads(a, b)
744 744 except (AttributeError, OverflowError): # C implementation failed
745 745 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
746 746 return map(self.node, ancs)
747 747
748 748 def isancestor(self, a, b):
749 749 """return True if node a is an ancestor of node b
750 750
751 751 The implementation of this is trivial but the use of
752 752 commonancestorsheads is not."""
753 753 return a in self.commonancestorsheads(a, b)
754 754
755 755 def ancestor(self, a, b):
756 756 """calculate the "best" common ancestor of nodes a and b"""
757 757
758 758 a, b = self.rev(a), self.rev(b)
759 759 try:
760 760 ancs = self.index.ancestors(a, b)
761 761 except (AttributeError, OverflowError):
762 762 ancs = ancestor.ancestors(self.parentrevs, a, b)
763 763 if ancs:
764 764 # choose a consistent winner when there's a tie
765 765 return min(map(self.node, ancs))
766 766 return nullid
767 767
768 768 def _match(self, id):
769 769 if isinstance(id, int):
770 770 # rev
771 771 return self.node(id)
772 772 if len(id) == 20:
773 773 # possibly a binary node
774 774 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 775 try:
776 776 node = id
777 777 self.rev(node) # quick search the index
778 778 return node
779 779 except LookupError:
780 780 pass # may be partial hex id
781 781 try:
782 782 # str(rev)
783 783 rev = int(id)
784 784 if str(rev) != id:
785 785 raise ValueError
786 786 if rev < 0:
787 787 rev = len(self) + rev
788 788 if rev < 0 or rev >= len(self):
789 789 raise ValueError
790 790 return self.node(rev)
791 791 except (ValueError, OverflowError):
792 792 pass
793 793 if len(id) == 40:
794 794 try:
795 795 # a full hex nodeid?
796 796 node = bin(id)
797 797 self.rev(node)
798 798 return node
799 799 except (TypeError, LookupError):
800 800 pass
801 801
802 802 def _partialmatch(self, id):
803 803 try:
804 804 n = self.index.partialmatch(id)
805 805 if n and self.hasnode(n):
806 806 return n
807 807 return None
808 808 except RevlogError:
809 809 # parsers.c radix tree lookup gave multiple matches
810 810 # fall through to slow path that filters hidden revisions
811 811 pass
812 812 except (AttributeError, ValueError):
813 813 # we are pure python, or key was too short to search radix tree
814 814 pass
815 815
816 816 if id in self._pcache:
817 817 return self._pcache[id]
818 818
819 819 if len(id) < 40:
820 820 try:
821 821 # hex(node)[:...]
822 822 l = len(id) // 2 # grab an even number of digits
823 823 prefix = bin(id[:l * 2])
824 824 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
825 825 nl = [n for n in nl if hex(n).startswith(id) and
826 826 self.hasnode(n)]
827 827 if len(nl) > 0:
828 828 if len(nl) == 1:
829 829 self._pcache[id] = nl[0]
830 830 return nl[0]
831 831 raise LookupError(id, self.indexfile,
832 832 _('ambiguous identifier'))
833 833 return None
834 834 except TypeError:
835 835 pass
836 836
837 837 def lookup(self, id):
838 838 """locate a node based on:
839 839 - revision number or str(revision number)
840 840 - nodeid or subset of hex nodeid
841 841 """
842 842 n = self._match(id)
843 843 if n is not None:
844 844 return n
845 845 n = self._partialmatch(id)
846 846 if n:
847 847 return n
848 848
849 849 raise LookupError(id, self.indexfile, _('no match found'))
850 850
851 851 def cmp(self, node, text):
852 852 """compare text with a given file revision
853 853
854 854 returns True if text is different than what is stored.
855 855 """
856 856 p1, p2 = self.parents(node)
857 857 return hash(text, p1, p2) != node
858 858
859 859 def _addchunk(self, offset, data):
860 860 o, d = self._chunkcache
861 861 # try to add to existing cache
862 862 if o + len(d) == offset and len(d) + len(data) < _chunksize:
863 863 self._chunkcache = o, d + data
864 864 else:
865 865 self._chunkcache = offset, data
866 866
867 867 def _loadchunk(self, offset, length):
868 868 if self._inline:
869 869 df = self.opener(self.indexfile)
870 870 else:
871 871 df = self.opener(self.datafile)
872 872
873 873 # Cache data both forward and backward around the requested
874 874 # data, in a fixed size window. This helps speed up operations
875 875 # involving reading the revlog backwards.
876 876 cachesize = self._chunkcachesize
877 877 realoffset = offset & ~(cachesize - 1)
878 878 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
879 879 - realoffset)
880 880 df.seek(realoffset)
881 881 d = df.read(reallength)
882 882 df.close()
883 883 self._addchunk(realoffset, d)
884 884 if offset != realoffset or reallength != length:
885 885 return util.buffer(d, offset - realoffset, length)
886 886 return d
887 887
888 888 def _getchunk(self, offset, length):
889 889 o, d = self._chunkcache
890 890 l = len(d)
891 891
892 892 # is it in the cache?
893 893 cachestart = offset - o
894 894 cacheend = cachestart + length
895 895 if cachestart >= 0 and cacheend <= l:
896 896 if cachestart == 0 and cacheend == l:
897 897 return d # avoid a copy
898 898 return util.buffer(d, cachestart, cacheend - cachestart)
899 899
900 900 return self._loadchunk(offset, length)
901 901
902 902 def _chunkraw(self, startrev, endrev):
903 903 start = self.start(startrev)
904 904 end = self.end(endrev)
905 905 if self._inline:
906 906 start += (startrev + 1) * self._io.size
907 907 end += (endrev + 1) * self._io.size
908 908 length = end - start
909 909 return self._getchunk(start, length)
910 910
911 911 def _chunk(self, rev):
912 912 return decompress(self._chunkraw(rev, rev))
913 913
914 914 def _chunks(self, revs):
915 915 '''faster version of [self._chunk(rev) for rev in revs]
916 916
917 917 Assumes that revs is in ascending order.'''
918 918 if not revs:
919 919 return []
920 920 start = self.start
921 921 length = self.length
922 922 inline = self._inline
923 923 iosize = self._io.size
924 924 buffer = util.buffer
925 925
926 926 l = []
927 927 ladd = l.append
928 928
929 929 # preload the cache
930 930 try:
931 931 while True:
932 932 # ensure that the cache doesn't change out from under us
933 933 _cache = self._chunkcache
934 934 self._chunkraw(revs[0], revs[-1])
935 935 if _cache == self._chunkcache:
936 936 break
937 937 offset, data = _cache
938 938 except OverflowError:
939 939 # issue4215 - we can't cache a run of chunks greater than
940 940 # 2G on Windows
941 941 return [self._chunk(rev) for rev in revs]
942 942
943 943 for rev in revs:
944 944 chunkstart = start(rev)
945 945 if inline:
946 946 chunkstart += (rev + 1) * iosize
947 947 chunklength = length(rev)
948 948 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
949 949
950 950 return l
951 951
952 952 def _chunkclear(self):
953 953 self._chunkcache = (0, '')
954 954
955 955 def deltaparent(self, rev):
956 956 """return deltaparent of the given revision"""
957 957 base = self.index[rev][3]
958 958 if base == rev:
959 959 return nullrev
960 960 elif self._generaldelta:
961 961 return base
962 962 else:
963 963 return rev - 1
964 964
965 965 def revdiff(self, rev1, rev2):
966 966 """return or calculate a delta between two revisions"""
967 967 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
968 968 return str(self._chunk(rev2))
969 969
970 970 return mdiff.textdiff(self.revision(rev1),
971 971 self.revision(rev2))
972 972
973 973 def revision(self, nodeorrev):
974 974 """return an uncompressed revision of a given node or revision
975 975 number.
976 976 """
977 977 if isinstance(nodeorrev, int):
978 978 rev = nodeorrev
979 979 node = self.node(rev)
980 980 else:
981 981 node = nodeorrev
982 982 rev = None
983 983
984 984 _cache = self._cache # grab local copy of cache to avoid thread race
985 985 cachedrev = None
986 986 if node == nullid:
987 987 return ""
988 988 if _cache:
989 989 if _cache[0] == node:
990 990 return _cache[2]
991 991 cachedrev = _cache[1]
992 992
993 993 # look up what we need to read
994 994 text = None
995 995 if rev is None:
996 996 rev = self.rev(node)
997 997
998 998 # check rev flags
999 999 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1000 1000 raise RevlogError(_('incompatible revision flag %x') %
1001 1001 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1002 1002
1003 1003 # build delta chain
1004 1004 chain = []
1005 1005 index = self.index # for performance
1006 1006 generaldelta = self._generaldelta
1007 1007 iterrev = rev
1008 1008 e = index[iterrev]
1009 1009 while iterrev != e[3] and iterrev != cachedrev:
1010 1010 chain.append(iterrev)
1011 1011 if generaldelta:
1012 1012 iterrev = e[3]
1013 1013 else:
1014 1014 iterrev -= 1
1015 1015 e = index[iterrev]
1016 1016
1017 1017 if iterrev == cachedrev:
1018 1018 # cache hit
1019 1019 text = _cache[2]
1020 1020 else:
1021 1021 chain.append(iterrev)
1022 1022 chain.reverse()
1023 1023
1024 1024 # drop cache to save memory
1025 1025 self._cache = None
1026 1026
1027 1027 bins = self._chunks(chain)
1028 1028 if text is None:
1029 1029 text = str(bins[0])
1030 1030 bins = bins[1:]
1031 1031
1032 1032 text = mdiff.patches(text, bins)
1033 1033
1034 1034 text = self._checkhash(text, node, rev)
1035 1035
1036 1036 self._cache = (node, rev, text)
1037 1037 return text
1038 1038
1039 def hash(self, text, p1, p2):
1040 """Compute a node hash.
1041
1042 Available as a function so that subclasses can replace the hash
1043 as needed.
1044 """
1045 return hash(text, p1, p2)
1046
1039 1047 def _checkhash(self, text, node, rev):
1040 1048 p1, p2 = self.parents(node)
1041 1049 self.checkhash(text, p1, p2, node, rev)
1042 1050 return text
1043 1051
1044 1052 def checkhash(self, text, p1, p2, node, rev=None):
1045 if node != hash(text, p1, p2):
1053 if node != self.hash(text, p1, p2):
1046 1054 revornode = rev
1047 1055 if revornode is None:
1048 1056 revornode = templatefilters.short(hex(node))
1049 1057 raise RevlogError(_("integrity check failed on %s:%s")
1050 1058 % (self.indexfile, revornode))
1051 1059
1052 1060 def checkinlinesize(self, tr, fp=None):
1053 1061 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1054 1062 return
1055 1063
1056 1064 trinfo = tr.find(self.indexfile)
1057 1065 if trinfo is None:
1058 1066 raise RevlogError(_("%s not found in the transaction")
1059 1067 % self.indexfile)
1060 1068
1061 1069 trindex = trinfo[2]
1062 1070 dataoff = self.start(trindex)
1063 1071
1064 1072 tr.add(self.datafile, dataoff)
1065 1073
1066 1074 if fp:
1067 1075 fp.flush()
1068 1076 fp.close()
1069 1077
1070 1078 df = self.opener(self.datafile, 'w')
1071 1079 try:
1072 1080 for r in self:
1073 1081 df.write(self._chunkraw(r, r))
1074 1082 finally:
1075 1083 df.close()
1076 1084
1077 1085 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1078 1086 self.version &= ~(REVLOGNGINLINEDATA)
1079 1087 self._inline = False
1080 1088 for i in self:
1081 1089 e = self._io.packentry(self.index[i], self.node, self.version, i)
1082 1090 fp.write(e)
1083 1091
1084 1092 # if we don't call close, the temp file will never replace the
1085 1093 # real index
1086 1094 fp.close()
1087 1095
1088 1096 tr.replace(self.indexfile, trindex * self._io.size)
1089 1097 self._chunkclear()
1090 1098
1091 1099 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1092 1100 node=None):
1093 1101 """add a revision to the log
1094 1102
1095 1103 text - the revision data to add
1096 1104 transaction - the transaction object used for rollback
1097 1105 link - the linkrev data to add
1098 1106 p1, p2 - the parent nodeids of the revision
1099 1107 cachedelta - an optional precomputed delta
1100 1108 node - nodeid of revision; typically node is not specified, and it is
1101 1109 computed by default as hash(text, p1, p2), however subclasses might
1102 1110 use different hashing method (and override checkhash() in such case)
1103 1111 """
1104 1112 if link == nullrev:
1105 1113 raise RevlogError(_("attempted to add linkrev -1 to %s")
1106 1114 % self.indexfile)
1107 node = node or hash(text, p1, p2)
1115 node = node or self.hash(text, p1, p2)
1108 1116 if node in self.nodemap:
1109 1117 return node
1110 1118
1111 1119 dfh = None
1112 1120 if not self._inline:
1113 1121 dfh = self.opener(self.datafile, "a")
1114 1122 ifh = self.opener(self.indexfile, "a+")
1115 1123 try:
1116 1124 return self._addrevision(node, text, transaction, link, p1, p2,
1117 1125 cachedelta, ifh, dfh)
1118 1126 finally:
1119 1127 if dfh:
1120 1128 dfh.close()
1121 1129 ifh.close()
1122 1130
1123 1131 def compress(self, text):
1124 1132 """ generate a possibly-compressed representation of text """
1125 1133 if not text:
1126 1134 return ("", text)
1127 1135 l = len(text)
1128 1136 bin = None
1129 1137 if l < 44:
1130 1138 pass
1131 1139 elif l > 1000000:
1132 1140 # zlib makes an internal copy, thus doubling memory usage for
1133 1141 # large files, so lets do this in pieces
1134 1142 z = zlib.compressobj()
1135 1143 p = []
1136 1144 pos = 0
1137 1145 while pos < l:
1138 1146 pos2 = pos + 2**20
1139 1147 p.append(z.compress(text[pos:pos2]))
1140 1148 pos = pos2
1141 1149 p.append(z.flush())
1142 1150 if sum(map(len, p)) < l:
1143 1151 bin = "".join(p)
1144 1152 else:
1145 1153 bin = _compress(text)
1146 1154 if bin is None or len(bin) > l:
1147 1155 if text[0] == '\0':
1148 1156 return ("", text)
1149 1157 return ('u', text)
1150 1158 return ("", bin)
1151 1159
1152 1160 def _addrevision(self, node, text, transaction, link, p1, p2,
1153 1161 cachedelta, ifh, dfh):
1154 1162 """internal function to add revisions to the log
1155 1163
1156 1164 see addrevision for argument descriptions.
1157 1165 invariants:
1158 1166 - text is optional (can be None); if not set, cachedelta must be set.
1159 1167 if both are set, they must correspond to each other.
1160 1168 """
1161 1169 btext = [text]
1162 1170 def buildtext():
1163 1171 if btext[0] is not None:
1164 1172 return btext[0]
1165 1173 # flush any pending writes here so we can read it in revision
1166 1174 if dfh:
1167 1175 dfh.flush()
1168 1176 ifh.flush()
1169 1177 basetext = self.revision(self.node(cachedelta[0]))
1170 1178 btext[0] = mdiff.patch(basetext, cachedelta[1])
1171 1179 self.checkhash(btext[0], p1, p2, node)
1172 1180 return btext[0]
1173 1181
1174 1182 def builddelta(rev):
1175 1183 # can we use the cached delta?
1176 1184 if cachedelta and cachedelta[0] == rev:
1177 1185 delta = cachedelta[1]
1178 1186 else:
1179 1187 t = buildtext()
1180 1188 ptext = self.revision(self.node(rev))
1181 1189 delta = mdiff.textdiff(ptext, t)
1182 1190 data = self.compress(delta)
1183 1191 l = len(data[1]) + len(data[0])
1184 1192 if basecache[0] == rev:
1185 1193 chainbase = basecache[1]
1186 1194 else:
1187 1195 chainbase = self.chainbase(rev)
1188 1196 dist = l + offset - self.start(chainbase)
1189 1197 if self._generaldelta:
1190 1198 base = rev
1191 1199 else:
1192 1200 base = chainbase
1193 1201 return dist, l, data, base, chainbase
1194 1202
1195 1203 curr = len(self)
1196 1204 prev = curr - 1
1197 1205 base = chainbase = curr
1198 1206 offset = self.end(prev)
1199 1207 flags = 0
1200 1208 d = None
1201 1209 if self._basecache is None:
1202 1210 self._basecache = (prev, self.chainbase(prev))
1203 1211 basecache = self._basecache
1204 1212 p1r, p2r = self.rev(p1), self.rev(p2)
1205 1213
1206 1214 # should we try to build a delta?
1207 1215 if prev != nullrev:
1208 1216 if self._generaldelta:
1209 1217 if p1r >= basecache[1]:
1210 1218 d = builddelta(p1r)
1211 1219 elif p2r >= basecache[1]:
1212 1220 d = builddelta(p2r)
1213 1221 else:
1214 1222 d = builddelta(prev)
1215 1223 else:
1216 1224 d = builddelta(prev)
1217 1225 dist, l, data, base, chainbase = d
1218 1226
1219 1227 # full versions are inserted when the needed deltas
1220 1228 # become comparable to the uncompressed text
1221 1229 if text is None:
1222 1230 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1223 1231 cachedelta[1])
1224 1232 else:
1225 1233 textlen = len(text)
1226 1234 if d is None or dist > textlen * 2:
1227 1235 text = buildtext()
1228 1236 data = self.compress(text)
1229 1237 l = len(data[1]) + len(data[0])
1230 1238 base = chainbase = curr
1231 1239
1232 1240 e = (offset_type(offset, flags), l, textlen,
1233 1241 base, link, p1r, p2r, node)
1234 1242 self.index.insert(-1, e)
1235 1243 self.nodemap[node] = curr
1236 1244
1237 1245 entry = self._io.packentry(e, self.node, self.version, curr)
1238 1246 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1239 1247
1240 1248 if type(text) == str: # only accept immutable objects
1241 1249 self._cache = (node, curr, text)
1242 1250 self._basecache = (curr, chainbase)
1243 1251 return node
1244 1252
1245 1253 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1246 1254 curr = len(self) - 1
1247 1255 if not self._inline:
1248 1256 transaction.add(self.datafile, offset)
1249 1257 transaction.add(self.indexfile, curr * len(entry))
1250 1258 if data[0]:
1251 1259 dfh.write(data[0])
1252 1260 dfh.write(data[1])
1253 1261 dfh.flush()
1254 1262 ifh.write(entry)
1255 1263 else:
1256 1264 offset += curr * self._io.size
1257 1265 transaction.add(self.indexfile, offset, curr)
1258 1266 ifh.write(entry)
1259 1267 ifh.write(data[0])
1260 1268 ifh.write(data[1])
1261 1269 self.checkinlinesize(transaction, ifh)
1262 1270
1263 1271 def addgroup(self, bundle, linkmapper, transaction):
1264 1272 """
1265 1273 add a delta group
1266 1274
1267 1275 given a set of deltas, add them to the revision log. the
1268 1276 first delta is against its parent, which should be in our
1269 1277 log, the rest are against the previous delta.
1270 1278 """
1271 1279
1272 1280 # track the base of the current delta log
1273 1281 content = []
1274 1282 node = None
1275 1283
1276 1284 r = len(self)
1277 1285 end = 0
1278 1286 if r:
1279 1287 end = self.end(r - 1)
1280 1288 ifh = self.opener(self.indexfile, "a+")
1281 1289 isize = r * self._io.size
1282 1290 if self._inline:
1283 1291 transaction.add(self.indexfile, end + isize, r)
1284 1292 dfh = None
1285 1293 else:
1286 1294 transaction.add(self.indexfile, isize, r)
1287 1295 transaction.add(self.datafile, end)
1288 1296 dfh = self.opener(self.datafile, "a")
1289 1297
1290 1298 try:
1291 1299 # loop through our set of deltas
1292 1300 chain = None
1293 1301 while True:
1294 1302 chunkdata = bundle.deltachunk(chain)
1295 1303 if not chunkdata:
1296 1304 break
1297 1305 node = chunkdata['node']
1298 1306 p1 = chunkdata['p1']
1299 1307 p2 = chunkdata['p2']
1300 1308 cs = chunkdata['cs']
1301 1309 deltabase = chunkdata['deltabase']
1302 1310 delta = chunkdata['delta']
1303 1311
1304 1312 content.append(node)
1305 1313
1306 1314 link = linkmapper(cs)
1307 1315 if node in self.nodemap:
1308 1316 # this can happen if two branches make the same change
1309 1317 chain = node
1310 1318 continue
1311 1319
1312 1320 for p in (p1, p2):
1313 1321 if p not in self.nodemap:
1314 1322 raise LookupError(p, self.indexfile,
1315 1323 _('unknown parent'))
1316 1324
1317 1325 if deltabase not in self.nodemap:
1318 1326 raise LookupError(deltabase, self.indexfile,
1319 1327 _('unknown delta base'))
1320 1328
1321 1329 baserev = self.rev(deltabase)
1322 1330 chain = self._addrevision(node, None, transaction, link,
1323 1331 p1, p2, (baserev, delta), ifh, dfh)
1324 1332 if not dfh and not self._inline:
1325 1333 # addrevision switched from inline to conventional
1326 1334 # reopen the index
1327 1335 ifh.close()
1328 1336 dfh = self.opener(self.datafile, "a")
1329 1337 ifh = self.opener(self.indexfile, "a")
1330 1338 finally:
1331 1339 if dfh:
1332 1340 dfh.close()
1333 1341 ifh.close()
1334 1342
1335 1343 return content
1336 1344
1337 1345 def getstrippoint(self, minlink):
1338 1346 """find the minimum rev that must be stripped to strip the linkrev
1339 1347
1340 1348 Returns a tuple containing the minimum rev and a set of all revs that
1341 1349 have linkrevs that will be broken by this strip.
1342 1350 """
1343 1351 brokenrevs = set()
1344 1352 strippoint = len(self)
1345 1353
1346 1354 heads = {}
1347 1355 futurelargelinkrevs = set()
1348 1356 for head in self.headrevs():
1349 1357 headlinkrev = self.linkrev(head)
1350 1358 heads[head] = headlinkrev
1351 1359 if headlinkrev >= minlink:
1352 1360 futurelargelinkrevs.add(headlinkrev)
1353 1361
1354 1362 # This algorithm involves walking down the rev graph, starting at the
1355 1363 # heads. Since the revs are topologically sorted according to linkrev,
1356 1364 # once all head linkrevs are below the minlink, we know there are
1357 1365 # no more revs that could have a linkrev greater than minlink.
1358 1366 # So we can stop walking.
1359 1367 while futurelargelinkrevs:
1360 1368 strippoint -= 1
1361 1369 linkrev = heads.pop(strippoint)
1362 1370
1363 1371 if linkrev < minlink:
1364 1372 brokenrevs.add(strippoint)
1365 1373 else:
1366 1374 futurelargelinkrevs.remove(linkrev)
1367 1375
1368 1376 for p in self.parentrevs(strippoint):
1369 1377 if p != nullrev:
1370 1378 plinkrev = self.linkrev(p)
1371 1379 heads[p] = plinkrev
1372 1380 if plinkrev >= minlink:
1373 1381 futurelargelinkrevs.add(plinkrev)
1374 1382
1375 1383 return strippoint, brokenrevs
1376 1384
1377 1385 def strip(self, minlink, transaction):
1378 1386 """truncate the revlog on the first revision with a linkrev >= minlink
1379 1387
1380 1388 This function is called when we're stripping revision minlink and
1381 1389 its descendants from the repository.
1382 1390
1383 1391 We have to remove all revisions with linkrev >= minlink, because
1384 1392 the equivalent changelog revisions will be renumbered after the
1385 1393 strip.
1386 1394
1387 1395 So we truncate the revlog on the first of these revisions, and
1388 1396 trust that the caller has saved the revisions that shouldn't be
1389 1397 removed and that it'll re-add them after this truncation.
1390 1398 """
1391 1399 if len(self) == 0:
1392 1400 return
1393 1401
1394 1402 rev, _ = self.getstrippoint(minlink)
1395 1403 if rev == len(self):
1396 1404 return
1397 1405
1398 1406 # first truncate the files on disk
1399 1407 end = self.start(rev)
1400 1408 if not self._inline:
1401 1409 transaction.add(self.datafile, end)
1402 1410 end = rev * self._io.size
1403 1411 else:
1404 1412 end += rev * self._io.size
1405 1413
1406 1414 transaction.add(self.indexfile, end)
1407 1415
1408 1416 # then reset internal state in memory to forget those revisions
1409 1417 self._cache = None
1410 1418 self._chunkclear()
1411 1419 for x in xrange(rev, len(self)):
1412 1420 del self.nodemap[self.node(x)]
1413 1421
1414 1422 del self.index[rev:-1]
1415 1423
1416 1424 def checksize(self):
1417 1425 expected = 0
1418 1426 if len(self):
1419 1427 expected = max(0, self.end(len(self) - 1))
1420 1428
1421 1429 try:
1422 1430 f = self.opener(self.datafile)
1423 1431 f.seek(0, 2)
1424 1432 actual = f.tell()
1425 1433 f.close()
1426 1434 dd = actual - expected
1427 1435 except IOError, inst:
1428 1436 if inst.errno != errno.ENOENT:
1429 1437 raise
1430 1438 dd = 0
1431 1439
1432 1440 try:
1433 1441 f = self.opener(self.indexfile)
1434 1442 f.seek(0, 2)
1435 1443 actual = f.tell()
1436 1444 f.close()
1437 1445 s = self._io.size
1438 1446 i = max(0, actual // s)
1439 1447 di = actual - (i * s)
1440 1448 if self._inline:
1441 1449 databytes = 0
1442 1450 for r in self:
1443 1451 databytes += max(0, self.length(r))
1444 1452 dd = 0
1445 1453 di = actual - len(self) * s - databytes
1446 1454 except IOError, inst:
1447 1455 if inst.errno != errno.ENOENT:
1448 1456 raise
1449 1457 di = 0
1450 1458
1451 1459 return (dd, di)
1452 1460
1453 1461 def files(self):
1454 1462 res = [self.indexfile]
1455 1463 if not self._inline:
1456 1464 res.append(self.datafile)
1457 1465 return res
General Comments 0
You need to be logged in to leave comments. Login now