##// END OF EJS Templates
revlog: support importing censored file revision tombstones...
Mike Edgar -
r22934:8a096d4d default
parent child Browse files
Show More
@@ -1,1465 +1,1469 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
45 46
46 47 def getoffset(q):
47 48 return int(q >> 16)
48 49
49 50 def gettype(q):
50 51 return int(q & 0xFFFF)
51 52
52 53 def offset_type(offset, type):
53 54 return long(long(offset) << 16 | type)
54 55
55 56 _nullhash = _sha(nullid)
56 57
57 58 def hash(text, p1, p2):
58 59 """generate a hash from the given text and its parent hashes
59 60
60 61 This hash combines both the current file contents and its history
61 62 in a manner that makes it easy to distinguish nodes with the same
62 63 content in the revision graph.
63 64 """
64 65 # As of now, if one of the parent node is null, p2 is null
65 66 if p2 == nullid:
66 67 # deep copy of a hash is faster than creating one
67 68 s = _nullhash.copy()
68 69 s.update(p1)
69 70 else:
70 71 # none of the parent nodes are nullid
71 72 l = [p1, p2]
72 73 l.sort()
73 74 s = _sha(l[0])
74 75 s.update(l[1])
75 76 s.update(text)
76 77 return s.digest()
77 78
78 79 def decompress(bin):
79 80 """ decompress the given input """
80 81 if not bin:
81 82 return bin
82 83 t = bin[0]
83 84 if t == '\0':
84 85 return bin
85 86 if t == 'x':
86 87 try:
87 88 return _decompress(bin)
88 89 except zlib.error, e:
89 90 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 91 if t == 'u':
91 92 return bin[1:]
92 93 raise RevlogError(_("unknown compression type %r") % t)
93 94
94 95 # index v0:
95 96 # 4 bytes: offset
96 97 # 4 bytes: compressed length
97 98 # 4 bytes: base rev
98 99 # 4 bytes: link rev
99 100 # 32 bytes: parent 1 nodeid
100 101 # 32 bytes: parent 2 nodeid
101 102 # 32 bytes: nodeid
102 103 indexformatv0 = ">4l20s20s20s"
103 104 v0shaoffset = 56
104 105
105 106 class revlogoldio(object):
106 107 def __init__(self):
107 108 self.size = struct.calcsize(indexformatv0)
108 109
109 110 def parseindex(self, data, inline):
110 111 s = self.size
111 112 index = []
112 113 nodemap = {nullid: nullrev}
113 114 n = off = 0
114 115 l = len(data)
115 116 while off + s <= l:
116 117 cur = data[off:off + s]
117 118 off += s
118 119 e = _unpack(indexformatv0, cur)
119 120 # transform to revlogv1 format
120 121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 123 index.append(e2)
123 124 nodemap[e[6]] = n
124 125 n += 1
125 126
126 127 # add the magic null revision at -1
127 128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 129
129 130 return index, nodemap, None
130 131
131 132 def packentry(self, entry, node, version, rev):
132 133 if gettype(entry[0]):
133 134 raise RevlogError(_("index entry flags need RevlogNG"))
134 135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 136 node(entry[5]), node(entry[6]), entry[7])
136 137 return _pack(indexformatv0, *e2)
137 138
138 139 # index ng:
139 140 # 6 bytes: offset
140 141 # 2 bytes: flags
141 142 # 4 bytes: compressed length
142 143 # 4 bytes: uncompressed length
143 144 # 4 bytes: base rev
144 145 # 4 bytes: link rev
145 146 # 4 bytes: parent 1 rev
146 147 # 4 bytes: parent 2 rev
147 148 # 32 bytes: nodeid
148 149 indexformatng = ">Qiiiiii20s12x"
149 150 ngshaoffset = 32
150 151 versionformat = ">I"
151 152
152 153 class revlogio(object):
153 154 def __init__(self):
154 155 self.size = struct.calcsize(indexformatng)
155 156
156 157 def parseindex(self, data, inline):
157 158 # call the C implementation to parse the index data
158 159 index, cache = parsers.parse_index2(data, inline)
159 160 return index, getattr(index, 'nodemap', None), cache
160 161
161 162 def packentry(self, entry, node, version, rev):
162 163 p = _pack(indexformatng, *entry)
163 164 if rev == 0:
164 165 p = _pack(versionformat, version) + p[4:]
165 166 return p
166 167
167 168 class revlog(object):
168 169 """
169 170 the underlying revision storage object
170 171
171 172 A revlog consists of two parts, an index and the revision data.
172 173
173 174 The index is a file with a fixed record size containing
174 175 information on each revision, including its nodeid (hash), the
175 176 nodeids of its parents, the position and offset of its data within
176 177 the data file, and the revision it's based on. Finally, each entry
177 178 contains a linkrev entry that can serve as a pointer to external
178 179 data.
179 180
180 181 The revision data itself is a linear collection of data chunks.
181 182 Each chunk represents a revision and is usually represented as a
182 183 delta against the previous chunk. To bound lookup time, runs of
183 184 deltas are limited to about 2 times the length of the original
184 185 version data. This makes retrieval of a version proportional to
185 186 its size, or O(1) relative to the number of revisions.
186 187
187 188 Both pieces of the revlog are written to in an append-only
188 189 fashion, which means we never need to rewrite a file to insert or
189 190 remove data, and can use some simple techniques to avoid the need
190 191 for locking while reading.
191 192 """
192 193 def __init__(self, opener, indexfile):
193 194 """
194 195 create a revlog object
195 196
196 197 opener is a function that abstracts the file opening operation
197 198 and can be used to implement COW semantics or the like.
198 199 """
199 200 self.indexfile = indexfile
200 201 self.datafile = indexfile[:-2] + ".d"
201 202 self.opener = opener
202 203 self._cache = None
203 204 self._basecache = None
204 205 self._chunkcache = (0, '')
205 206 self._chunkcachesize = 65536
206 207 self.index = []
207 208 self._pcache = {}
208 209 self._nodecache = {nullid: nullrev}
209 210 self._nodepos = None
210 211
211 212 v = REVLOG_DEFAULT_VERSION
212 213 opts = getattr(opener, 'options', None)
213 214 if opts is not None:
214 215 if 'revlogv1' in opts:
215 216 if 'generaldelta' in opts:
216 217 v |= REVLOGGENERALDELTA
217 218 else:
218 219 v = 0
219 220 if 'chunkcachesize' in opts:
220 221 self._chunkcachesize = opts['chunkcachesize']
221 222
222 223 if self._chunkcachesize <= 0:
223 224 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 225 'than 0') % self._chunkcachesize)
225 226 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 227 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 228 'of 2') % self._chunkcachesize)
228 229
229 230 i = ''
230 231 self._initempty = True
231 232 try:
232 233 f = self.opener(self.indexfile)
233 234 i = f.read()
234 235 f.close()
235 236 if len(i) > 0:
236 237 v = struct.unpack(versionformat, i[:4])[0]
237 238 self._initempty = False
238 239 except IOError, inst:
239 240 if inst.errno != errno.ENOENT:
240 241 raise
241 242
242 243 self.version = v
243 244 self._inline = v & REVLOGNGINLINEDATA
244 245 self._generaldelta = v & REVLOGGENERALDELTA
245 246 flags = v & ~0xFFFF
246 247 fmt = v & 0xFFFF
247 248 if fmt == REVLOGV0 and flags:
248 249 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 250 % (self.indexfile, flags >> 16))
250 251 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 252 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 253 % (self.indexfile, flags >> 16))
253 254 elif fmt > REVLOGNG:
254 255 raise RevlogError(_("index %s unknown format %d")
255 256 % (self.indexfile, fmt))
256 257
257 258 self._io = revlogio()
258 259 if self.version == REVLOGV0:
259 260 self._io = revlogoldio()
260 261 try:
261 262 d = self._io.parseindex(i, self._inline)
262 263 except (ValueError, IndexError):
263 264 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 265 self.index, nodemap, self._chunkcache = d
265 266 if nodemap is not None:
266 267 self.nodemap = self._nodecache = nodemap
267 268 if not self._chunkcache:
268 269 self._chunkclear()
269 270
270 271 def tip(self):
271 272 return self.node(len(self.index) - 2)
272 273 def __len__(self):
273 274 return len(self.index) - 1
274 275 def __iter__(self):
275 276 return iter(xrange(len(self)))
276 277 def revs(self, start=0, stop=None):
277 278 """iterate over all rev in this revlog (from start to stop)"""
278 279 step = 1
279 280 if stop is not None:
280 281 if start > stop:
281 282 step = -1
282 283 stop += step
283 284 else:
284 285 stop = len(self)
285 286 return xrange(start, stop, step)
286 287
287 288 @util.propertycache
288 289 def nodemap(self):
289 290 self.rev(self.node(0))
290 291 return self._nodecache
291 292
292 293 def hasnode(self, node):
293 294 try:
294 295 self.rev(node)
295 296 return True
296 297 except KeyError:
297 298 return False
298 299
299 300 def clearcaches(self):
300 301 try:
301 302 self._nodecache.clearcaches()
302 303 except AttributeError:
303 304 self._nodecache = {nullid: nullrev}
304 305 self._nodepos = None
305 306
306 307 def rev(self, node):
307 308 try:
308 309 return self._nodecache[node]
309 310 except TypeError:
310 311 raise
311 312 except RevlogError:
312 313 # parsers.c radix tree lookup failed
313 314 raise LookupError(node, self.indexfile, _('no node'))
314 315 except KeyError:
315 316 # pure python cache lookup failed
316 317 n = self._nodecache
317 318 i = self.index
318 319 p = self._nodepos
319 320 if p is None:
320 321 p = len(i) - 2
321 322 for r in xrange(p, -1, -1):
322 323 v = i[r][7]
323 324 n[v] = r
324 325 if v == node:
325 326 self._nodepos = r - 1
326 327 return r
327 328 raise LookupError(node, self.indexfile, _('no node'))
328 329
329 330 def node(self, rev):
330 331 return self.index[rev][7]
331 332 def linkrev(self, rev):
332 333 return self.index[rev][4]
333 334 def parents(self, node):
334 335 i = self.index
335 336 d = i[self.rev(node)]
336 337 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
337 338 def parentrevs(self, rev):
338 339 return self.index[rev][5:7]
339 340 def start(self, rev):
340 341 return int(self.index[rev][0] >> 16)
341 342 def end(self, rev):
342 343 return self.start(rev) + self.length(rev)
343 344 def length(self, rev):
344 345 return self.index[rev][1]
345 346 def chainbase(self, rev):
346 347 index = self.index
347 348 base = index[rev][3]
348 349 while base != rev:
349 350 rev = base
350 351 base = index[rev][3]
351 352 return base
352 353 def flags(self, rev):
353 354 return self.index[rev][0] & 0xFFFF
354 355 def rawsize(self, rev):
355 356 """return the length of the uncompressed text for a given revision"""
356 357 l = self.index[rev][2]
357 358 if l >= 0:
358 359 return l
359 360
360 361 t = self.revision(self.node(rev))
361 362 return len(t)
362 363 size = rawsize
363 364
364 365 def ancestors(self, revs, stoprev=0, inclusive=False):
365 366 """Generate the ancestors of 'revs' in reverse topological order.
366 367 Does not generate revs lower than stoprev.
367 368
368 369 See the documentation for ancestor.lazyancestors for more details."""
369 370
370 371 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
371 372 inclusive=inclusive)
372 373
373 374 def descendants(self, revs):
374 375 """Generate the descendants of 'revs' in revision order.
375 376
376 377 Yield a sequence of revision numbers starting with a child of
377 378 some rev in revs, i.e., each revision is *not* considered a
378 379 descendant of itself. Results are ordered by revision number (a
379 380 topological sort)."""
380 381 first = min(revs)
381 382 if first == nullrev:
382 383 for i in self:
383 384 yield i
384 385 return
385 386
386 387 seen = set(revs)
387 388 for i in self.revs(start=first + 1):
388 389 for x in self.parentrevs(i):
389 390 if x != nullrev and x in seen:
390 391 seen.add(i)
391 392 yield i
392 393 break
393 394
394 395 def findcommonmissing(self, common=None, heads=None):
395 396 """Return a tuple of the ancestors of common and the ancestors of heads
396 397 that are not ancestors of common. In revset terminology, we return the
397 398 tuple:
398 399
399 400 ::common, (::heads) - (::common)
400 401
401 402 The list is sorted by revision number, meaning it is
402 403 topologically sorted.
403 404
404 405 'heads' and 'common' are both lists of node IDs. If heads is
405 406 not supplied, uses all of the revlog's heads. If common is not
406 407 supplied, uses nullid."""
407 408 if common is None:
408 409 common = [nullid]
409 410 if heads is None:
410 411 heads = self.heads()
411 412
412 413 common = [self.rev(n) for n in common]
413 414 heads = [self.rev(n) for n in heads]
414 415
415 416 # we want the ancestors, but inclusive
416 417 class lazyset(object):
417 418 def __init__(self, lazyvalues):
418 419 self.addedvalues = set()
419 420 self.lazyvalues = lazyvalues
420 421
421 422 def __contains__(self, value):
422 423 return value in self.addedvalues or value in self.lazyvalues
423 424
424 425 def __iter__(self):
425 426 added = self.addedvalues
426 427 for r in added:
427 428 yield r
428 429 for r in self.lazyvalues:
429 430 if not r in added:
430 431 yield r
431 432
432 433 def add(self, value):
433 434 self.addedvalues.add(value)
434 435
435 436 def update(self, values):
436 437 self.addedvalues.update(values)
437 438
438 439 has = lazyset(self.ancestors(common))
439 440 has.add(nullrev)
440 441 has.update(common)
441 442
442 443 # take all ancestors from heads that aren't in has
443 444 missing = set()
444 445 visit = util.deque(r for r in heads if r not in has)
445 446 while visit:
446 447 r = visit.popleft()
447 448 if r in missing:
448 449 continue
449 450 else:
450 451 missing.add(r)
451 452 for p in self.parentrevs(r):
452 453 if p not in has:
453 454 visit.append(p)
454 455 missing = list(missing)
455 456 missing.sort()
456 457 return has, [self.node(r) for r in missing]
457 458
458 459 def findmissingrevs(self, common=None, heads=None):
459 460 """Return the revision numbers of the ancestors of heads that
460 461 are not ancestors of common.
461 462
462 463 More specifically, return a list of revision numbers corresponding to
463 464 nodes N such that every N satisfies the following constraints:
464 465
465 466 1. N is an ancestor of some node in 'heads'
466 467 2. N is not an ancestor of any node in 'common'
467 468
468 469 The list is sorted by revision number, meaning it is
469 470 topologically sorted.
470 471
471 472 'heads' and 'common' are both lists of revision numbers. If heads is
472 473 not supplied, uses all of the revlog's heads. If common is not
473 474 supplied, uses nullid."""
474 475 if common is None:
475 476 common = [nullrev]
476 477 if heads is None:
477 478 heads = self.headrevs()
478 479
479 480 return ancestor.missingancestors(heads, common, self.parentrevs)
480 481
481 482 def findmissing(self, common=None, heads=None):
482 483 """Return the ancestors of heads that are not ancestors of common.
483 484
484 485 More specifically, return a list of nodes N such that every N
485 486 satisfies the following constraints:
486 487
487 488 1. N is an ancestor of some node in 'heads'
488 489 2. N is not an ancestor of any node in 'common'
489 490
490 491 The list is sorted by revision number, meaning it is
491 492 topologically sorted.
492 493
493 494 'heads' and 'common' are both lists of node IDs. If heads is
494 495 not supplied, uses all of the revlog's heads. If common is not
495 496 supplied, uses nullid."""
496 497 if common is None:
497 498 common = [nullid]
498 499 if heads is None:
499 500 heads = self.heads()
500 501
501 502 common = [self.rev(n) for n in common]
502 503 heads = [self.rev(n) for n in heads]
503 504
504 505 return [self.node(r) for r in
505 506 ancestor.missingancestors(heads, common, self.parentrevs)]
506 507
507 508 def nodesbetween(self, roots=None, heads=None):
508 509 """Return a topological path from 'roots' to 'heads'.
509 510
510 511 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
511 512 topologically sorted list of all nodes N that satisfy both of
512 513 these constraints:
513 514
514 515 1. N is a descendant of some node in 'roots'
515 516 2. N is an ancestor of some node in 'heads'
516 517
517 518 Every node is considered to be both a descendant and an ancestor
518 519 of itself, so every reachable node in 'roots' and 'heads' will be
519 520 included in 'nodes'.
520 521
521 522 'outroots' is the list of reachable nodes in 'roots', i.e., the
522 523 subset of 'roots' that is returned in 'nodes'. Likewise,
523 524 'outheads' is the subset of 'heads' that is also in 'nodes'.
524 525
525 526 'roots' and 'heads' are both lists of node IDs. If 'roots' is
526 527 unspecified, uses nullid as the only root. If 'heads' is
527 528 unspecified, uses list of all of the revlog's heads."""
528 529 nonodes = ([], [], [])
529 530 if roots is not None:
530 531 roots = list(roots)
531 532 if not roots:
532 533 return nonodes
533 534 lowestrev = min([self.rev(n) for n in roots])
534 535 else:
535 536 roots = [nullid] # Everybody's a descendant of nullid
536 537 lowestrev = nullrev
537 538 if (lowestrev == nullrev) and (heads is None):
538 539 # We want _all_ the nodes!
539 540 return ([self.node(r) for r in self], [nullid], list(self.heads()))
540 541 if heads is None:
541 542 # All nodes are ancestors, so the latest ancestor is the last
542 543 # node.
543 544 highestrev = len(self) - 1
544 545 # Set ancestors to None to signal that every node is an ancestor.
545 546 ancestors = None
546 547 # Set heads to an empty dictionary for later discovery of heads
547 548 heads = {}
548 549 else:
549 550 heads = list(heads)
550 551 if not heads:
551 552 return nonodes
552 553 ancestors = set()
553 554 # Turn heads into a dictionary so we can remove 'fake' heads.
554 555 # Also, later we will be using it to filter out the heads we can't
555 556 # find from roots.
556 557 heads = dict.fromkeys(heads, False)
557 558 # Start at the top and keep marking parents until we're done.
558 559 nodestotag = set(heads)
559 560 # Remember where the top was so we can use it as a limit later.
560 561 highestrev = max([self.rev(n) for n in nodestotag])
561 562 while nodestotag:
562 563 # grab a node to tag
563 564 n = nodestotag.pop()
564 565 # Never tag nullid
565 566 if n == nullid:
566 567 continue
567 568 # A node's revision number represents its place in a
568 569 # topologically sorted list of nodes.
569 570 r = self.rev(n)
570 571 if r >= lowestrev:
571 572 if n not in ancestors:
572 573 # If we are possibly a descendant of one of the roots
573 574 # and we haven't already been marked as an ancestor
574 575 ancestors.add(n) # Mark as ancestor
575 576 # Add non-nullid parents to list of nodes to tag.
576 577 nodestotag.update([p for p in self.parents(n) if
577 578 p != nullid])
578 579 elif n in heads: # We've seen it before, is it a fake head?
579 580 # So it is, real heads should not be the ancestors of
580 581 # any other heads.
581 582 heads.pop(n)
582 583 if not ancestors:
583 584 return nonodes
584 585 # Now that we have our set of ancestors, we want to remove any
585 586 # roots that are not ancestors.
586 587
587 588 # If one of the roots was nullid, everything is included anyway.
588 589 if lowestrev > nullrev:
589 590 # But, since we weren't, let's recompute the lowest rev to not
590 591 # include roots that aren't ancestors.
591 592
592 593 # Filter out roots that aren't ancestors of heads
593 594 roots = [n for n in roots if n in ancestors]
594 595 # Recompute the lowest revision
595 596 if roots:
596 597 lowestrev = min([self.rev(n) for n in roots])
597 598 else:
598 599 # No more roots? Return empty list
599 600 return nonodes
600 601 else:
601 602 # We are descending from nullid, and don't need to care about
602 603 # any other roots.
603 604 lowestrev = nullrev
604 605 roots = [nullid]
605 606 # Transform our roots list into a set.
606 607 descendants = set(roots)
607 608 # Also, keep the original roots so we can filter out roots that aren't
608 609 # 'real' roots (i.e. are descended from other roots).
609 610 roots = descendants.copy()
610 611 # Our topologically sorted list of output nodes.
611 612 orderedout = []
612 613 # Don't start at nullid since we don't want nullid in our output list,
613 614 # and if nullid shows up in descendants, empty parents will look like
614 615 # they're descendants.
615 616 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
616 617 n = self.node(r)
617 618 isdescendant = False
618 619 if lowestrev == nullrev: # Everybody is a descendant of nullid
619 620 isdescendant = True
620 621 elif n in descendants:
621 622 # n is already a descendant
622 623 isdescendant = True
623 624 # This check only needs to be done here because all the roots
624 625 # will start being marked is descendants before the loop.
625 626 if n in roots:
626 627 # If n was a root, check if it's a 'real' root.
627 628 p = tuple(self.parents(n))
628 629 # If any of its parents are descendants, it's not a root.
629 630 if (p[0] in descendants) or (p[1] in descendants):
630 631 roots.remove(n)
631 632 else:
632 633 p = tuple(self.parents(n))
633 634 # A node is a descendant if either of its parents are
634 635 # descendants. (We seeded the dependents list with the roots
635 636 # up there, remember?)
636 637 if (p[0] in descendants) or (p[1] in descendants):
637 638 descendants.add(n)
638 639 isdescendant = True
639 640 if isdescendant and ((ancestors is None) or (n in ancestors)):
640 641 # Only include nodes that are both descendants and ancestors.
641 642 orderedout.append(n)
642 643 if (ancestors is not None) and (n in heads):
643 644 # We're trying to figure out which heads are reachable
644 645 # from roots.
645 646 # Mark this head as having been reached
646 647 heads[n] = True
647 648 elif ancestors is None:
648 649 # Otherwise, we're trying to discover the heads.
649 650 # Assume this is a head because if it isn't, the next step
650 651 # will eventually remove it.
651 652 heads[n] = True
652 653 # But, obviously its parents aren't.
653 654 for p in self.parents(n):
654 655 heads.pop(p, None)
655 656 heads = [n for n, flag in heads.iteritems() if flag]
656 657 roots = list(roots)
657 658 assert orderedout
658 659 assert roots
659 660 assert heads
660 661 return (orderedout, roots, heads)
661 662
662 663 def headrevs(self):
663 664 try:
664 665 return self.index.headrevs()
665 666 except AttributeError:
666 667 return self._headrevs()
667 668
668 669 def _headrevs(self):
669 670 count = len(self)
670 671 if not count:
671 672 return [nullrev]
672 673 # we won't iter over filtered rev so nobody is a head at start
673 674 ishead = [0] * (count + 1)
674 675 index = self.index
675 676 for r in self:
676 677 ishead[r] = 1 # I may be an head
677 678 e = index[r]
678 679 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
679 680 return [r for r, val in enumerate(ishead) if val]
680 681
681 682 def heads(self, start=None, stop=None):
682 683 """return the list of all nodes that have no children
683 684
684 685 if start is specified, only heads that are descendants of
685 686 start will be returned
686 687 if stop is specified, it will consider all the revs from stop
687 688 as if they had no children
688 689 """
689 690 if start is None and stop is None:
690 691 if not len(self):
691 692 return [nullid]
692 693 return [self.node(r) for r in self.headrevs()]
693 694
694 695 if start is None:
695 696 start = nullid
696 697 if stop is None:
697 698 stop = []
698 699 stoprevs = set([self.rev(n) for n in stop])
699 700 startrev = self.rev(start)
700 701 reachable = set((startrev,))
701 702 heads = set((startrev,))
702 703
703 704 parentrevs = self.parentrevs
704 705 for r in self.revs(start=startrev + 1):
705 706 for p in parentrevs(r):
706 707 if p in reachable:
707 708 if r not in stoprevs:
708 709 reachable.add(r)
709 710 heads.add(r)
710 711 if p in heads and p not in stoprevs:
711 712 heads.remove(p)
712 713
713 714 return [self.node(r) for r in heads]
714 715
715 716 def children(self, node):
716 717 """find the children of a given node"""
717 718 c = []
718 719 p = self.rev(node)
719 720 for r in self.revs(start=p + 1):
720 721 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
721 722 if prevs:
722 723 for pr in prevs:
723 724 if pr == p:
724 725 c.append(self.node(r))
725 726 elif p == nullrev:
726 727 c.append(self.node(r))
727 728 return c
728 729
729 730 def descendant(self, start, end):
730 731 if start == nullrev:
731 732 return True
732 733 for i in self.descendants([start]):
733 734 if i == end:
734 735 return True
735 736 elif i > end:
736 737 break
737 738 return False
738 739
739 740 def commonancestorsheads(self, a, b):
740 741 """calculate all the heads of the common ancestors of nodes a and b"""
741 742 a, b = self.rev(a), self.rev(b)
742 743 try:
743 744 ancs = self.index.commonancestorsheads(a, b)
744 745 except (AttributeError, OverflowError): # C implementation failed
745 746 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
746 747 return map(self.node, ancs)
747 748
748 749 def isancestor(self, a, b):
749 750 """return True if node a is an ancestor of node b
750 751
751 752 The implementation of this is trivial but the use of
752 753 commonancestorsheads is not."""
753 754 return a in self.commonancestorsheads(a, b)
754 755
755 756 def ancestor(self, a, b):
756 757 """calculate the "best" common ancestor of nodes a and b"""
757 758
758 759 a, b = self.rev(a), self.rev(b)
759 760 try:
760 761 ancs = self.index.ancestors(a, b)
761 762 except (AttributeError, OverflowError):
762 763 ancs = ancestor.ancestors(self.parentrevs, a, b)
763 764 if ancs:
764 765 # choose a consistent winner when there's a tie
765 766 return min(map(self.node, ancs))
766 767 return nullid
767 768
768 769 def _match(self, id):
769 770 if isinstance(id, int):
770 771 # rev
771 772 return self.node(id)
772 773 if len(id) == 20:
773 774 # possibly a binary node
774 775 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 776 try:
776 777 node = id
777 778 self.rev(node) # quick search the index
778 779 return node
779 780 except LookupError:
780 781 pass # may be partial hex id
781 782 try:
782 783 # str(rev)
783 784 rev = int(id)
784 785 if str(rev) != id:
785 786 raise ValueError
786 787 if rev < 0:
787 788 rev = len(self) + rev
788 789 if rev < 0 or rev >= len(self):
789 790 raise ValueError
790 791 return self.node(rev)
791 792 except (ValueError, OverflowError):
792 793 pass
793 794 if len(id) == 40:
794 795 try:
795 796 # a full hex nodeid?
796 797 node = bin(id)
797 798 self.rev(node)
798 799 return node
799 800 except (TypeError, LookupError):
800 801 pass
801 802
802 803 def _partialmatch(self, id):
803 804 try:
804 805 n = self.index.partialmatch(id)
805 806 if n and self.hasnode(n):
806 807 return n
807 808 return None
808 809 except RevlogError:
809 810 # parsers.c radix tree lookup gave multiple matches
810 811 # fall through to slow path that filters hidden revisions
811 812 pass
812 813 except (AttributeError, ValueError):
813 814 # we are pure python, or key was too short to search radix tree
814 815 pass
815 816
816 817 if id in self._pcache:
817 818 return self._pcache[id]
818 819
819 820 if len(id) < 40:
820 821 try:
821 822 # hex(node)[:...]
822 823 l = len(id) // 2 # grab an even number of digits
823 824 prefix = bin(id[:l * 2])
824 825 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
825 826 nl = [n for n in nl if hex(n).startswith(id) and
826 827 self.hasnode(n)]
827 828 if len(nl) > 0:
828 829 if len(nl) == 1:
829 830 self._pcache[id] = nl[0]
830 831 return nl[0]
831 832 raise LookupError(id, self.indexfile,
832 833 _('ambiguous identifier'))
833 834 return None
834 835 except TypeError:
835 836 pass
836 837
837 838 def lookup(self, id):
838 839 """locate a node based on:
839 840 - revision number or str(revision number)
840 841 - nodeid or subset of hex nodeid
841 842 """
842 843 n = self._match(id)
843 844 if n is not None:
844 845 return n
845 846 n = self._partialmatch(id)
846 847 if n:
847 848 return n
848 849
849 850 raise LookupError(id, self.indexfile, _('no match found'))
850 851
851 852 def cmp(self, node, text):
852 853 """compare text with a given file revision
853 854
854 855 returns True if text is different than what is stored.
855 856 """
856 857 p1, p2 = self.parents(node)
857 858 return hash(text, p1, p2) != node
858 859
859 860 def _addchunk(self, offset, data):
860 861 o, d = self._chunkcache
861 862 # try to add to existing cache
862 863 if o + len(d) == offset and len(d) + len(data) < _chunksize:
863 864 self._chunkcache = o, d + data
864 865 else:
865 866 self._chunkcache = offset, data
866 867
867 868 def _loadchunk(self, offset, length):
868 869 if self._inline:
869 870 df = self.opener(self.indexfile)
870 871 else:
871 872 df = self.opener(self.datafile)
872 873
873 874 # Cache data both forward and backward around the requested
874 875 # data, in a fixed size window. This helps speed up operations
875 876 # involving reading the revlog backwards.
876 877 cachesize = self._chunkcachesize
877 878 realoffset = offset & ~(cachesize - 1)
878 879 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
879 880 - realoffset)
880 881 df.seek(realoffset)
881 882 d = df.read(reallength)
882 883 df.close()
883 884 self._addchunk(realoffset, d)
884 885 if offset != realoffset or reallength != length:
885 886 return util.buffer(d, offset - realoffset, length)
886 887 return d
887 888
888 889 def _getchunk(self, offset, length):
889 890 o, d = self._chunkcache
890 891 l = len(d)
891 892
892 893 # is it in the cache?
893 894 cachestart = offset - o
894 895 cacheend = cachestart + length
895 896 if cachestart >= 0 and cacheend <= l:
896 897 if cachestart == 0 and cacheend == l:
897 898 return d # avoid a copy
898 899 return util.buffer(d, cachestart, cacheend - cachestart)
899 900
900 901 return self._loadchunk(offset, length)
901 902
902 903 def _chunkraw(self, startrev, endrev):
903 904 start = self.start(startrev)
904 905 end = self.end(endrev)
905 906 if self._inline:
906 907 start += (startrev + 1) * self._io.size
907 908 end += (endrev + 1) * self._io.size
908 909 length = end - start
909 910 return self._getchunk(start, length)
910 911
911 912 def _chunk(self, rev):
912 913 return decompress(self._chunkraw(rev, rev))
913 914
914 915 def _chunks(self, revs):
915 916 '''faster version of [self._chunk(rev) for rev in revs]
916 917
917 918 Assumes that revs is in ascending order.'''
918 919 if not revs:
919 920 return []
920 921 start = self.start
921 922 length = self.length
922 923 inline = self._inline
923 924 iosize = self._io.size
924 925 buffer = util.buffer
925 926
926 927 l = []
927 928 ladd = l.append
928 929
929 930 # preload the cache
930 931 try:
931 932 while True:
932 933 # ensure that the cache doesn't change out from under us
933 934 _cache = self._chunkcache
934 935 self._chunkraw(revs[0], revs[-1])
935 936 if _cache == self._chunkcache:
936 937 break
937 938 offset, data = _cache
938 939 except OverflowError:
939 940 # issue4215 - we can't cache a run of chunks greater than
940 941 # 2G on Windows
941 942 return [self._chunk(rev) for rev in revs]
942 943
943 944 for rev in revs:
944 945 chunkstart = start(rev)
945 946 if inline:
946 947 chunkstart += (rev + 1) * iosize
947 948 chunklength = length(rev)
948 949 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
949 950
950 951 return l
951 952
952 953 def _chunkclear(self):
953 954 self._chunkcache = (0, '')
954 955
955 956 def deltaparent(self, rev):
956 957 """return deltaparent of the given revision"""
957 958 base = self.index[rev][3]
958 959 if base == rev:
959 960 return nullrev
960 961 elif self._generaldelta:
961 962 return base
962 963 else:
963 964 return rev - 1
964 965
965 966 def revdiff(self, rev1, rev2):
966 967 """return or calculate a delta between two revisions"""
967 968 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
968 969 return str(self._chunk(rev2))
969 970
970 971 return mdiff.textdiff(self.revision(rev1),
971 972 self.revision(rev2))
972 973
973 974 def revision(self, nodeorrev):
974 975 """return an uncompressed revision of a given node or revision
975 976 number.
976 977 """
977 978 if isinstance(nodeorrev, int):
978 979 rev = nodeorrev
979 980 node = self.node(rev)
980 981 else:
981 982 node = nodeorrev
982 983 rev = None
983 984
984 985 _cache = self._cache # grab local copy of cache to avoid thread race
985 986 cachedrev = None
986 987 if node == nullid:
987 988 return ""
988 989 if _cache:
989 990 if _cache[0] == node:
990 991 return _cache[2]
991 992 cachedrev = _cache[1]
992 993
993 994 # look up what we need to read
994 995 text = None
995 996 if rev is None:
996 997 rev = self.rev(node)
997 998
998 999 # check rev flags
999 1000 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1000 1001 raise RevlogError(_('incompatible revision flag %x') %
1001 1002 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1002 1003
1003 1004 # build delta chain
1004 1005 chain = []
1005 1006 index = self.index # for performance
1006 1007 generaldelta = self._generaldelta
1007 1008 iterrev = rev
1008 1009 e = index[iterrev]
1009 1010 while iterrev != e[3] and iterrev != cachedrev:
1010 1011 chain.append(iterrev)
1011 1012 if generaldelta:
1012 1013 iterrev = e[3]
1013 1014 else:
1014 1015 iterrev -= 1
1015 1016 e = index[iterrev]
1016 1017
1017 1018 if iterrev == cachedrev:
1018 1019 # cache hit
1019 1020 text = _cache[2]
1020 1021 else:
1021 1022 chain.append(iterrev)
1022 1023 chain.reverse()
1023 1024
1024 1025 # drop cache to save memory
1025 1026 self._cache = None
1026 1027
1027 1028 bins = self._chunks(chain)
1028 1029 if text is None:
1029 1030 text = str(bins[0])
1030 1031 bins = bins[1:]
1031 1032
1032 1033 text = mdiff.patches(text, bins)
1033 1034
1034 1035 text = self._checkhash(text, node, rev)
1035 1036
1036 1037 self._cache = (node, rev, text)
1037 1038 return text
1038 1039
1039 1040 def hash(self, text, p1, p2):
1040 1041 """Compute a node hash.
1041 1042
1042 1043 Available as a function so that subclasses can replace the hash
1043 1044 as needed.
1044 1045 """
1045 1046 return hash(text, p1, p2)
1046 1047
1047 1048 def _checkhash(self, text, node, rev):
1048 1049 p1, p2 = self.parents(node)
1049 1050 self.checkhash(text, p1, p2, node, rev)
1050 1051 return text
1051 1052
1052 1053 def checkhash(self, text, p1, p2, node, rev=None):
1053 1054 if node != self.hash(text, p1, p2):
1054 1055 revornode = rev
1055 1056 if revornode is None:
1056 1057 revornode = templatefilters.short(hex(node))
1057 1058 raise RevlogError(_("integrity check failed on %s:%s")
1058 1059 % (self.indexfile, revornode))
1059 1060
1060 1061 def checkinlinesize(self, tr, fp=None):
1061 1062 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1062 1063 return
1063 1064
1064 1065 trinfo = tr.find(self.indexfile)
1065 1066 if trinfo is None:
1066 1067 raise RevlogError(_("%s not found in the transaction")
1067 1068 % self.indexfile)
1068 1069
1069 1070 trindex = trinfo[2]
1070 1071 dataoff = self.start(trindex)
1071 1072
1072 1073 tr.add(self.datafile, dataoff)
1073 1074
1074 1075 if fp:
1075 1076 fp.flush()
1076 1077 fp.close()
1077 1078
1078 1079 df = self.opener(self.datafile, 'w')
1079 1080 try:
1080 1081 for r in self:
1081 1082 df.write(self._chunkraw(r, r))
1082 1083 finally:
1083 1084 df.close()
1084 1085
1085 1086 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1086 1087 self.version &= ~(REVLOGNGINLINEDATA)
1087 1088 self._inline = False
1088 1089 for i in self:
1089 1090 e = self._io.packentry(self.index[i], self.node, self.version, i)
1090 1091 fp.write(e)
1091 1092
1092 1093 # if we don't call close, the temp file will never replace the
1093 1094 # real index
1094 1095 fp.close()
1095 1096
1096 1097 tr.replace(self.indexfile, trindex * self._io.size)
1097 1098 self._chunkclear()
1098 1099
1099 1100 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1100 1101 node=None):
1101 1102 """add a revision to the log
1102 1103
1103 1104 text - the revision data to add
1104 1105 transaction - the transaction object used for rollback
1105 1106 link - the linkrev data to add
1106 1107 p1, p2 - the parent nodeids of the revision
1107 1108 cachedelta - an optional precomputed delta
1108 1109 node - nodeid of revision; typically node is not specified, and it is
1109 1110 computed by default as hash(text, p1, p2), however subclasses might
1110 1111 use different hashing method (and override checkhash() in such case)
1111 1112 """
1112 1113 if link == nullrev:
1113 1114 raise RevlogError(_("attempted to add linkrev -1 to %s")
1114 1115 % self.indexfile)
1115 1116 node = node or self.hash(text, p1, p2)
1116 1117 if node in self.nodemap:
1117 1118 return node
1118 1119
1119 1120 dfh = None
1120 1121 if not self._inline:
1121 1122 dfh = self.opener(self.datafile, "a")
1122 1123 ifh = self.opener(self.indexfile, "a+")
1123 1124 try:
1124 1125 return self._addrevision(node, text, transaction, link, p1, p2,
1125 1126 cachedelta, ifh, dfh)
1126 1127 finally:
1127 1128 if dfh:
1128 1129 dfh.close()
1129 1130 ifh.close()
1130 1131
1131 1132 def compress(self, text):
1132 1133 """ generate a possibly-compressed representation of text """
1133 1134 if not text:
1134 1135 return ("", text)
1135 1136 l = len(text)
1136 1137 bin = None
1137 1138 if l < 44:
1138 1139 pass
1139 1140 elif l > 1000000:
1140 1141 # zlib makes an internal copy, thus doubling memory usage for
1141 1142 # large files, so lets do this in pieces
1142 1143 z = zlib.compressobj()
1143 1144 p = []
1144 1145 pos = 0
1145 1146 while pos < l:
1146 1147 pos2 = pos + 2**20
1147 1148 p.append(z.compress(text[pos:pos2]))
1148 1149 pos = pos2
1149 1150 p.append(z.flush())
1150 1151 if sum(map(len, p)) < l:
1151 1152 bin = "".join(p)
1152 1153 else:
1153 1154 bin = _compress(text)
1154 1155 if bin is None or len(bin) > l:
1155 1156 if text[0] == '\0':
1156 1157 return ("", text)
1157 1158 return ('u', text)
1158 1159 return ("", bin)
1159 1160
1160 1161 def _addrevision(self, node, text, transaction, link, p1, p2,
1161 1162 cachedelta, ifh, dfh):
1162 1163 """internal function to add revisions to the log
1163 1164
1164 1165 see addrevision for argument descriptions.
1165 1166 invariants:
1166 1167 - text is optional (can be None); if not set, cachedelta must be set.
1167 1168 if both are set, they must correspond to each other.
1168 1169 """
1169 1170 btext = [text]
1170 1171 def buildtext():
1171 1172 if btext[0] is not None:
1172 1173 return btext[0]
1173 1174 # flush any pending writes here so we can read it in revision
1174 1175 if dfh:
1175 1176 dfh.flush()
1176 1177 ifh.flush()
1177 1178 basetext = self.revision(self.node(cachedelta[0]))
1178 1179 btext[0] = mdiff.patch(basetext, cachedelta[1])
1180 try:
1179 1181 self.checkhash(btext[0], p1, p2, node)
1182 except CensoredNodeError:
1183 pass # always import a censor tombstone.
1180 1184 return btext[0]
1181 1185
1182 1186 def builddelta(rev):
1183 1187 # can we use the cached delta?
1184 1188 if cachedelta and cachedelta[0] == rev:
1185 1189 delta = cachedelta[1]
1186 1190 else:
1187 1191 t = buildtext()
1188 1192 ptext = self.revision(self.node(rev))
1189 1193 delta = mdiff.textdiff(ptext, t)
1190 1194 data = self.compress(delta)
1191 1195 l = len(data[1]) + len(data[0])
1192 1196 if basecache[0] == rev:
1193 1197 chainbase = basecache[1]
1194 1198 else:
1195 1199 chainbase = self.chainbase(rev)
1196 1200 dist = l + offset - self.start(chainbase)
1197 1201 if self._generaldelta:
1198 1202 base = rev
1199 1203 else:
1200 1204 base = chainbase
1201 1205 return dist, l, data, base, chainbase
1202 1206
1203 1207 curr = len(self)
1204 1208 prev = curr - 1
1205 1209 base = chainbase = curr
1206 1210 offset = self.end(prev)
1207 1211 flags = 0
1208 1212 d = None
1209 1213 if self._basecache is None:
1210 1214 self._basecache = (prev, self.chainbase(prev))
1211 1215 basecache = self._basecache
1212 1216 p1r, p2r = self.rev(p1), self.rev(p2)
1213 1217
1214 1218 # should we try to build a delta?
1215 1219 if prev != nullrev:
1216 1220 if self._generaldelta:
1217 1221 if p1r >= basecache[1]:
1218 1222 d = builddelta(p1r)
1219 1223 elif p2r >= basecache[1]:
1220 1224 d = builddelta(p2r)
1221 1225 else:
1222 1226 d = builddelta(prev)
1223 1227 else:
1224 1228 d = builddelta(prev)
1225 1229 dist, l, data, base, chainbase = d
1226 1230
1227 1231 # full versions are inserted when the needed deltas
1228 1232 # become comparable to the uncompressed text
1229 1233 if text is None:
1230 1234 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1231 1235 cachedelta[1])
1232 1236 else:
1233 1237 textlen = len(text)
1234 1238 if d is None or dist > textlen * 2:
1235 1239 text = buildtext()
1236 1240 data = self.compress(text)
1237 1241 l = len(data[1]) + len(data[0])
1238 1242 base = chainbase = curr
1239 1243
1240 1244 e = (offset_type(offset, flags), l, textlen,
1241 1245 base, link, p1r, p2r, node)
1242 1246 self.index.insert(-1, e)
1243 1247 self.nodemap[node] = curr
1244 1248
1245 1249 entry = self._io.packentry(e, self.node, self.version, curr)
1246 1250 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1247 1251
1248 1252 if type(text) == str: # only accept immutable objects
1249 1253 self._cache = (node, curr, text)
1250 1254 self._basecache = (curr, chainbase)
1251 1255 return node
1252 1256
1253 1257 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1254 1258 curr = len(self) - 1
1255 1259 if not self._inline:
1256 1260 transaction.add(self.datafile, offset)
1257 1261 transaction.add(self.indexfile, curr * len(entry))
1258 1262 if data[0]:
1259 1263 dfh.write(data[0])
1260 1264 dfh.write(data[1])
1261 1265 dfh.flush()
1262 1266 ifh.write(entry)
1263 1267 else:
1264 1268 offset += curr * self._io.size
1265 1269 transaction.add(self.indexfile, offset, curr)
1266 1270 ifh.write(entry)
1267 1271 ifh.write(data[0])
1268 1272 ifh.write(data[1])
1269 1273 self.checkinlinesize(transaction, ifh)
1270 1274
1271 1275 def addgroup(self, bundle, linkmapper, transaction):
1272 1276 """
1273 1277 add a delta group
1274 1278
1275 1279 given a set of deltas, add them to the revision log. the
1276 1280 first delta is against its parent, which should be in our
1277 1281 log, the rest are against the previous delta.
1278 1282 """
1279 1283
1280 1284 # track the base of the current delta log
1281 1285 content = []
1282 1286 node = None
1283 1287
1284 1288 r = len(self)
1285 1289 end = 0
1286 1290 if r:
1287 1291 end = self.end(r - 1)
1288 1292 ifh = self.opener(self.indexfile, "a+")
1289 1293 isize = r * self._io.size
1290 1294 if self._inline:
1291 1295 transaction.add(self.indexfile, end + isize, r)
1292 1296 dfh = None
1293 1297 else:
1294 1298 transaction.add(self.indexfile, isize, r)
1295 1299 transaction.add(self.datafile, end)
1296 1300 dfh = self.opener(self.datafile, "a")
1297 1301
1298 1302 try:
1299 1303 # loop through our set of deltas
1300 1304 chain = None
1301 1305 while True:
1302 1306 chunkdata = bundle.deltachunk(chain)
1303 1307 if not chunkdata:
1304 1308 break
1305 1309 node = chunkdata['node']
1306 1310 p1 = chunkdata['p1']
1307 1311 p2 = chunkdata['p2']
1308 1312 cs = chunkdata['cs']
1309 1313 deltabase = chunkdata['deltabase']
1310 1314 delta = chunkdata['delta']
1311 1315
1312 1316 content.append(node)
1313 1317
1314 1318 link = linkmapper(cs)
1315 1319 if node in self.nodemap:
1316 1320 # this can happen if two branches make the same change
1317 1321 chain = node
1318 1322 continue
1319 1323
1320 1324 for p in (p1, p2):
1321 1325 if p not in self.nodemap:
1322 1326 raise LookupError(p, self.indexfile,
1323 1327 _('unknown parent'))
1324 1328
1325 1329 if deltabase not in self.nodemap:
1326 1330 raise LookupError(deltabase, self.indexfile,
1327 1331 _('unknown delta base'))
1328 1332
1329 1333 baserev = self.rev(deltabase)
1330 1334 chain = self._addrevision(node, None, transaction, link,
1331 1335 p1, p2, (baserev, delta), ifh, dfh)
1332 1336 if not dfh and not self._inline:
1333 1337 # addrevision switched from inline to conventional
1334 1338 # reopen the index
1335 1339 ifh.close()
1336 1340 dfh = self.opener(self.datafile, "a")
1337 1341 ifh = self.opener(self.indexfile, "a")
1338 1342 finally:
1339 1343 if dfh:
1340 1344 dfh.close()
1341 1345 ifh.close()
1342 1346
1343 1347 return content
1344 1348
1345 1349 def getstrippoint(self, minlink):
1346 1350 """find the minimum rev that must be stripped to strip the linkrev
1347 1351
1348 1352 Returns a tuple containing the minimum rev and a set of all revs that
1349 1353 have linkrevs that will be broken by this strip.
1350 1354 """
1351 1355 brokenrevs = set()
1352 1356 strippoint = len(self)
1353 1357
1354 1358 heads = {}
1355 1359 futurelargelinkrevs = set()
1356 1360 for head in self.headrevs():
1357 1361 headlinkrev = self.linkrev(head)
1358 1362 heads[head] = headlinkrev
1359 1363 if headlinkrev >= minlink:
1360 1364 futurelargelinkrevs.add(headlinkrev)
1361 1365
1362 1366 # This algorithm involves walking down the rev graph, starting at the
1363 1367 # heads. Since the revs are topologically sorted according to linkrev,
1364 1368 # once all head linkrevs are below the minlink, we know there are
1365 1369 # no more revs that could have a linkrev greater than minlink.
1366 1370 # So we can stop walking.
1367 1371 while futurelargelinkrevs:
1368 1372 strippoint -= 1
1369 1373 linkrev = heads.pop(strippoint)
1370 1374
1371 1375 if linkrev < minlink:
1372 1376 brokenrevs.add(strippoint)
1373 1377 else:
1374 1378 futurelargelinkrevs.remove(linkrev)
1375 1379
1376 1380 for p in self.parentrevs(strippoint):
1377 1381 if p != nullrev:
1378 1382 plinkrev = self.linkrev(p)
1379 1383 heads[p] = plinkrev
1380 1384 if plinkrev >= minlink:
1381 1385 futurelargelinkrevs.add(plinkrev)
1382 1386
1383 1387 return strippoint, brokenrevs
1384 1388
1385 1389 def strip(self, minlink, transaction):
1386 1390 """truncate the revlog on the first revision with a linkrev >= minlink
1387 1391
1388 1392 This function is called when we're stripping revision minlink and
1389 1393 its descendants from the repository.
1390 1394
1391 1395 We have to remove all revisions with linkrev >= minlink, because
1392 1396 the equivalent changelog revisions will be renumbered after the
1393 1397 strip.
1394 1398
1395 1399 So we truncate the revlog on the first of these revisions, and
1396 1400 trust that the caller has saved the revisions that shouldn't be
1397 1401 removed and that it'll re-add them after this truncation.
1398 1402 """
1399 1403 if len(self) == 0:
1400 1404 return
1401 1405
1402 1406 rev, _ = self.getstrippoint(minlink)
1403 1407 if rev == len(self):
1404 1408 return
1405 1409
1406 1410 # first truncate the files on disk
1407 1411 end = self.start(rev)
1408 1412 if not self._inline:
1409 1413 transaction.add(self.datafile, end)
1410 1414 end = rev * self._io.size
1411 1415 else:
1412 1416 end += rev * self._io.size
1413 1417
1414 1418 transaction.add(self.indexfile, end)
1415 1419
1416 1420 # then reset internal state in memory to forget those revisions
1417 1421 self._cache = None
1418 1422 self._chunkclear()
1419 1423 for x in xrange(rev, len(self)):
1420 1424 del self.nodemap[self.node(x)]
1421 1425
1422 1426 del self.index[rev:-1]
1423 1427
1424 1428 def checksize(self):
1425 1429 expected = 0
1426 1430 if len(self):
1427 1431 expected = max(0, self.end(len(self) - 1))
1428 1432
1429 1433 try:
1430 1434 f = self.opener(self.datafile)
1431 1435 f.seek(0, 2)
1432 1436 actual = f.tell()
1433 1437 f.close()
1434 1438 dd = actual - expected
1435 1439 except IOError, inst:
1436 1440 if inst.errno != errno.ENOENT:
1437 1441 raise
1438 1442 dd = 0
1439 1443
1440 1444 try:
1441 1445 f = self.opener(self.indexfile)
1442 1446 f.seek(0, 2)
1443 1447 actual = f.tell()
1444 1448 f.close()
1445 1449 s = self._io.size
1446 1450 i = max(0, actual // s)
1447 1451 di = actual - (i * s)
1448 1452 if self._inline:
1449 1453 databytes = 0
1450 1454 for r in self:
1451 1455 databytes += max(0, self.length(r))
1452 1456 dd = 0
1453 1457 di = actual - len(self) * s - databytes
1454 1458 except IOError, inst:
1455 1459 if inst.errno != errno.ENOENT:
1456 1460 raise
1457 1461 di = 0
1458 1462
1459 1463 return (dd, di)
1460 1464
1461 1465 def files(self):
1462 1466 res = [self.indexfile]
1463 1467 if not self._inline:
1464 1468 res.append(self.datafile)
1465 1469 return res
General Comments 0
You need to be logged in to leave comments. Login now