##// END OF EJS Templates
revlog: compute length of compressed deltas along with chain length...
Siddharth Agarwal -
r23286:40e00678 default
parent child Browse files
Show More
@@ -1,1489 +1,1495 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45 CensoredNodeError = error.CensoredNodeError
46 46
47 47 def getoffset(q):
48 48 return int(q >> 16)
49 49
50 50 def gettype(q):
51 51 return int(q & 0xFFFF)
52 52
53 53 def offset_type(offset, type):
54 54 return long(long(offset) << 16 | type)
55 55
56 56 _nullhash = _sha(nullid)
57 57
58 58 def hash(text, p1, p2):
59 59 """generate a hash from the given text and its parent hashes
60 60
61 61 This hash combines both the current file contents and its history
62 62 in a manner that makes it easy to distinguish nodes with the same
63 63 content in the revision graph.
64 64 """
65 65 # As of now, if one of the parent node is null, p2 is null
66 66 if p2 == nullid:
67 67 # deep copy of a hash is faster than creating one
68 68 s = _nullhash.copy()
69 69 s.update(p1)
70 70 else:
71 71 # none of the parent nodes are nullid
72 72 l = [p1, p2]
73 73 l.sort()
74 74 s = _sha(l[0])
75 75 s.update(l[1])
76 76 s.update(text)
77 77 return s.digest()
78 78
79 79 def decompress(bin):
80 80 """ decompress the given input """
81 81 if not bin:
82 82 return bin
83 83 t = bin[0]
84 84 if t == '\0':
85 85 return bin
86 86 if t == 'x':
87 87 try:
88 88 return _decompress(bin)
89 89 except zlib.error, e:
90 90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 91 if t == 'u':
92 92 return bin[1:]
93 93 raise RevlogError(_("unknown compression type %r") % t)
94 94
95 95 # index v0:
96 96 # 4 bytes: offset
97 97 # 4 bytes: compressed length
98 98 # 4 bytes: base rev
99 99 # 4 bytes: link rev
100 100 # 32 bytes: parent 1 nodeid
101 101 # 32 bytes: parent 2 nodeid
102 102 # 32 bytes: nodeid
103 103 indexformatv0 = ">4l20s20s20s"
104 104 v0shaoffset = 56
105 105
106 106 class revlogoldio(object):
107 107 def __init__(self):
108 108 self.size = struct.calcsize(indexformatv0)
109 109
110 110 def parseindex(self, data, inline):
111 111 s = self.size
112 112 index = []
113 113 nodemap = {nullid: nullrev}
114 114 n = off = 0
115 115 l = len(data)
116 116 while off + s <= l:
117 117 cur = data[off:off + s]
118 118 off += s
119 119 e = _unpack(indexformatv0, cur)
120 120 # transform to revlogv1 format
121 121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 123 index.append(e2)
124 124 nodemap[e[6]] = n
125 125 n += 1
126 126
127 127 # add the magic null revision at -1
128 128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129 129
130 130 return index, nodemap, None
131 131
132 132 def packentry(self, entry, node, version, rev):
133 133 if gettype(entry[0]):
134 134 raise RevlogError(_("index entry flags need RevlogNG"))
135 135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 136 node(entry[5]), node(entry[6]), entry[7])
137 137 return _pack(indexformatv0, *e2)
138 138
139 139 # index ng:
140 140 # 6 bytes: offset
141 141 # 2 bytes: flags
142 142 # 4 bytes: compressed length
143 143 # 4 bytes: uncompressed length
144 144 # 4 bytes: base rev
145 145 # 4 bytes: link rev
146 146 # 4 bytes: parent 1 rev
147 147 # 4 bytes: parent 2 rev
148 148 # 32 bytes: nodeid
149 149 indexformatng = ">Qiiiiii20s12x"
150 150 ngshaoffset = 32
151 151 versionformat = ">I"
152 152
153 153 class revlogio(object):
154 154 def __init__(self):
155 155 self.size = struct.calcsize(indexformatng)
156 156
157 157 def parseindex(self, data, inline):
158 158 # call the C implementation to parse the index data
159 159 index, cache = parsers.parse_index2(data, inline)
160 160 return index, getattr(index, 'nodemap', None), cache
161 161
162 162 def packentry(self, entry, node, version, rev):
163 163 p = _pack(indexformatng, *entry)
164 164 if rev == 0:
165 165 p = _pack(versionformat, version) + p[4:]
166 166 return p
167 167
168 168 class revlog(object):
169 169 """
170 170 the underlying revision storage object
171 171
172 172 A revlog consists of two parts, an index and the revision data.
173 173
174 174 The index is a file with a fixed record size containing
175 175 information on each revision, including its nodeid (hash), the
176 176 nodeids of its parents, the position and offset of its data within
177 177 the data file, and the revision it's based on. Finally, each entry
178 178 contains a linkrev entry that can serve as a pointer to external
179 179 data.
180 180
181 181 The revision data itself is a linear collection of data chunks.
182 182 Each chunk represents a revision and is usually represented as a
183 183 delta against the previous chunk. To bound lookup time, runs of
184 184 deltas are limited to about 2 times the length of the original
185 185 version data. This makes retrieval of a version proportional to
186 186 its size, or O(1) relative to the number of revisions.
187 187
188 188 Both pieces of the revlog are written to in an append-only
189 189 fashion, which means we never need to rewrite a file to insert or
190 190 remove data, and can use some simple techniques to avoid the need
191 191 for locking while reading.
192 192 """
193 193 def __init__(self, opener, indexfile):
194 194 """
195 195 create a revlog object
196 196
197 197 opener is a function that abstracts the file opening operation
198 198 and can be used to implement COW semantics or the like.
199 199 """
200 200 self.indexfile = indexfile
201 201 self.datafile = indexfile[:-2] + ".d"
202 202 self.opener = opener
203 203 self._cache = None
204 204 self._basecache = None
205 205 self._chunkcache = (0, '')
206 206 self._chunkcachesize = 65536
207 207 self._maxchainlen = None
208 208 self.index = []
209 209 self._pcache = {}
210 210 self._nodecache = {nullid: nullrev}
211 211 self._nodepos = None
212 212
213 213 v = REVLOG_DEFAULT_VERSION
214 214 opts = getattr(opener, 'options', None)
215 215 if opts is not None:
216 216 if 'revlogv1' in opts:
217 217 if 'generaldelta' in opts:
218 218 v |= REVLOGGENERALDELTA
219 219 else:
220 220 v = 0
221 221 if 'chunkcachesize' in opts:
222 222 self._chunkcachesize = opts['chunkcachesize']
223 223 if 'maxchainlen' in opts:
224 224 self._maxchainlen = opts['maxchainlen']
225 225
226 226 if self._chunkcachesize <= 0:
227 227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 228 'than 0') % self._chunkcachesize)
229 229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 231 'of 2') % self._chunkcachesize)
232 232
233 233 i = ''
234 234 self._initempty = True
235 235 try:
236 236 f = self.opener(self.indexfile)
237 237 i = f.read()
238 238 f.close()
239 239 if len(i) > 0:
240 240 v = struct.unpack(versionformat, i[:4])[0]
241 241 self._initempty = False
242 242 except IOError, inst:
243 243 if inst.errno != errno.ENOENT:
244 244 raise
245 245
246 246 self.version = v
247 247 self._inline = v & REVLOGNGINLINEDATA
248 248 self._generaldelta = v & REVLOGGENERALDELTA
249 249 flags = v & ~0xFFFF
250 250 fmt = v & 0xFFFF
251 251 if fmt == REVLOGV0 and flags:
252 252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 253 % (self.indexfile, flags >> 16))
254 254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 256 % (self.indexfile, flags >> 16))
257 257 elif fmt > REVLOGNG:
258 258 raise RevlogError(_("index %s unknown format %d")
259 259 % (self.indexfile, fmt))
260 260
261 261 self._io = revlogio()
262 262 if self.version == REVLOGV0:
263 263 self._io = revlogoldio()
264 264 try:
265 265 d = self._io.parseindex(i, self._inline)
266 266 except (ValueError, IndexError):
267 267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 268 self.index, nodemap, self._chunkcache = d
269 269 if nodemap is not None:
270 270 self.nodemap = self._nodecache = nodemap
271 271 if not self._chunkcache:
272 272 self._chunkclear()
273 273
274 274 def tip(self):
275 275 return self.node(len(self.index) - 2)
276 276 def __len__(self):
277 277 return len(self.index) - 1
278 278 def __iter__(self):
279 279 return iter(xrange(len(self)))
280 280 def revs(self, start=0, stop=None):
281 281 """iterate over all rev in this revlog (from start to stop)"""
282 282 step = 1
283 283 if stop is not None:
284 284 if start > stop:
285 285 step = -1
286 286 stop += step
287 287 else:
288 288 stop = len(self)
289 289 return xrange(start, stop, step)
290 290
291 291 @util.propertycache
292 292 def nodemap(self):
293 293 self.rev(self.node(0))
294 294 return self._nodecache
295 295
296 296 def hasnode(self, node):
297 297 try:
298 298 self.rev(node)
299 299 return True
300 300 except KeyError:
301 301 return False
302 302
303 303 def clearcaches(self):
304 304 try:
305 305 self._nodecache.clearcaches()
306 306 except AttributeError:
307 307 self._nodecache = {nullid: nullrev}
308 308 self._nodepos = None
309 309
310 310 def rev(self, node):
311 311 try:
312 312 return self._nodecache[node]
313 313 except TypeError:
314 314 raise
315 315 except RevlogError:
316 316 # parsers.c radix tree lookup failed
317 317 raise LookupError(node, self.indexfile, _('no node'))
318 318 except KeyError:
319 319 # pure python cache lookup failed
320 320 n = self._nodecache
321 321 i = self.index
322 322 p = self._nodepos
323 323 if p is None:
324 324 p = len(i) - 2
325 325 for r in xrange(p, -1, -1):
326 326 v = i[r][7]
327 327 n[v] = r
328 328 if v == node:
329 329 self._nodepos = r - 1
330 330 return r
331 331 raise LookupError(node, self.indexfile, _('no node'))
332 332
333 333 def node(self, rev):
334 334 return self.index[rev][7]
335 335 def linkrev(self, rev):
336 336 return self.index[rev][4]
337 337 def parents(self, node):
338 338 i = self.index
339 339 d = i[self.rev(node)]
340 340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
341 341 def parentrevs(self, rev):
342 342 return self.index[rev][5:7]
343 343 def start(self, rev):
344 344 return int(self.index[rev][0] >> 16)
345 345 def end(self, rev):
346 346 return self.start(rev) + self.length(rev)
347 347 def length(self, rev):
348 348 return self.index[rev][1]
349 349 def chainbase(self, rev):
350 350 index = self.index
351 351 base = index[rev][3]
352 352 while base != rev:
353 353 rev = base
354 354 base = index[rev][3]
355 355 return base
356 356 def chainlen(self, rev):
357 return self._chaininfo(rev)[0]
358 def _chaininfo(self, rev):
357 359 index = self.index
358 360 generaldelta = self._generaldelta
359 361 iterrev = rev
360 362 e = index[iterrev]
361 363 clen = 0
364 compresseddeltalen = 0
362 365 while iterrev != e[3]:
363 366 clen += 1
367 compresseddeltalen += e[1]
364 368 if generaldelta:
365 369 iterrev = e[3]
366 370 else:
367 371 iterrev -= 1
368 372 e = index[iterrev]
369 return clen
373 # add text length of base since decompressing that also takes work
374 compresseddeltalen += e[1]
375 return clen, compresseddeltalen
370 376 def flags(self, rev):
371 377 return self.index[rev][0] & 0xFFFF
372 378 def rawsize(self, rev):
373 379 """return the length of the uncompressed text for a given revision"""
374 380 l = self.index[rev][2]
375 381 if l >= 0:
376 382 return l
377 383
378 384 t = self.revision(self.node(rev))
379 385 return len(t)
380 386 size = rawsize
381 387
382 388 def ancestors(self, revs, stoprev=0, inclusive=False):
383 389 """Generate the ancestors of 'revs' in reverse topological order.
384 390 Does not generate revs lower than stoprev.
385 391
386 392 See the documentation for ancestor.lazyancestors for more details."""
387 393
388 394 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
389 395 inclusive=inclusive)
390 396
391 397 def descendants(self, revs):
392 398 """Generate the descendants of 'revs' in revision order.
393 399
394 400 Yield a sequence of revision numbers starting with a child of
395 401 some rev in revs, i.e., each revision is *not* considered a
396 402 descendant of itself. Results are ordered by revision number (a
397 403 topological sort)."""
398 404 first = min(revs)
399 405 if first == nullrev:
400 406 for i in self:
401 407 yield i
402 408 return
403 409
404 410 seen = set(revs)
405 411 for i in self.revs(start=first + 1):
406 412 for x in self.parentrevs(i):
407 413 if x != nullrev and x in seen:
408 414 seen.add(i)
409 415 yield i
410 416 break
411 417
412 418 def findcommonmissing(self, common=None, heads=None):
413 419 """Return a tuple of the ancestors of common and the ancestors of heads
414 420 that are not ancestors of common. In revset terminology, we return the
415 421 tuple:
416 422
417 423 ::common, (::heads) - (::common)
418 424
419 425 The list is sorted by revision number, meaning it is
420 426 topologically sorted.
421 427
422 428 'heads' and 'common' are both lists of node IDs. If heads is
423 429 not supplied, uses all of the revlog's heads. If common is not
424 430 supplied, uses nullid."""
425 431 if common is None:
426 432 common = [nullid]
427 433 if heads is None:
428 434 heads = self.heads()
429 435
430 436 common = [self.rev(n) for n in common]
431 437 heads = [self.rev(n) for n in heads]
432 438
433 439 # we want the ancestors, but inclusive
434 440 class lazyset(object):
435 441 def __init__(self, lazyvalues):
436 442 self.addedvalues = set()
437 443 self.lazyvalues = lazyvalues
438 444
439 445 def __contains__(self, value):
440 446 return value in self.addedvalues or value in self.lazyvalues
441 447
442 448 def __iter__(self):
443 449 added = self.addedvalues
444 450 for r in added:
445 451 yield r
446 452 for r in self.lazyvalues:
447 453 if not r in added:
448 454 yield r
449 455
450 456 def add(self, value):
451 457 self.addedvalues.add(value)
452 458
453 459 def update(self, values):
454 460 self.addedvalues.update(values)
455 461
456 462 has = lazyset(self.ancestors(common))
457 463 has.add(nullrev)
458 464 has.update(common)
459 465
460 466 # take all ancestors from heads that aren't in has
461 467 missing = set()
462 468 visit = util.deque(r for r in heads if r not in has)
463 469 while visit:
464 470 r = visit.popleft()
465 471 if r in missing:
466 472 continue
467 473 else:
468 474 missing.add(r)
469 475 for p in self.parentrevs(r):
470 476 if p not in has:
471 477 visit.append(p)
472 478 missing = list(missing)
473 479 missing.sort()
474 480 return has, [self.node(r) for r in missing]
475 481
476 482 def findmissingrevs(self, common=None, heads=None):
477 483 """Return the revision numbers of the ancestors of heads that
478 484 are not ancestors of common.
479 485
480 486 More specifically, return a list of revision numbers corresponding to
481 487 nodes N such that every N satisfies the following constraints:
482 488
483 489 1. N is an ancestor of some node in 'heads'
484 490 2. N is not an ancestor of any node in 'common'
485 491
486 492 The list is sorted by revision number, meaning it is
487 493 topologically sorted.
488 494
489 495 'heads' and 'common' are both lists of revision numbers. If heads is
490 496 not supplied, uses all of the revlog's heads. If common is not
491 497 supplied, uses nullid."""
492 498 if common is None:
493 499 common = [nullrev]
494 500 if heads is None:
495 501 heads = self.headrevs()
496 502
497 503 return ancestor.missingancestors(heads, common, self.parentrevs)
498 504
499 505 def findmissing(self, common=None, heads=None):
500 506 """Return the ancestors of heads that are not ancestors of common.
501 507
502 508 More specifically, return a list of nodes N such that every N
503 509 satisfies the following constraints:
504 510
505 511 1. N is an ancestor of some node in 'heads'
506 512 2. N is not an ancestor of any node in 'common'
507 513
508 514 The list is sorted by revision number, meaning it is
509 515 topologically sorted.
510 516
511 517 'heads' and 'common' are both lists of node IDs. If heads is
512 518 not supplied, uses all of the revlog's heads. If common is not
513 519 supplied, uses nullid."""
514 520 if common is None:
515 521 common = [nullid]
516 522 if heads is None:
517 523 heads = self.heads()
518 524
519 525 common = [self.rev(n) for n in common]
520 526 heads = [self.rev(n) for n in heads]
521 527
522 528 return [self.node(r) for r in
523 529 ancestor.missingancestors(heads, common, self.parentrevs)]
524 530
525 531 def nodesbetween(self, roots=None, heads=None):
526 532 """Return a topological path from 'roots' to 'heads'.
527 533
528 534 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
529 535 topologically sorted list of all nodes N that satisfy both of
530 536 these constraints:
531 537
532 538 1. N is a descendant of some node in 'roots'
533 539 2. N is an ancestor of some node in 'heads'
534 540
535 541 Every node is considered to be both a descendant and an ancestor
536 542 of itself, so every reachable node in 'roots' and 'heads' will be
537 543 included in 'nodes'.
538 544
539 545 'outroots' is the list of reachable nodes in 'roots', i.e., the
540 546 subset of 'roots' that is returned in 'nodes'. Likewise,
541 547 'outheads' is the subset of 'heads' that is also in 'nodes'.
542 548
543 549 'roots' and 'heads' are both lists of node IDs. If 'roots' is
544 550 unspecified, uses nullid as the only root. If 'heads' is
545 551 unspecified, uses list of all of the revlog's heads."""
546 552 nonodes = ([], [], [])
547 553 if roots is not None:
548 554 roots = list(roots)
549 555 if not roots:
550 556 return nonodes
551 557 lowestrev = min([self.rev(n) for n in roots])
552 558 else:
553 559 roots = [nullid] # Everybody's a descendant of nullid
554 560 lowestrev = nullrev
555 561 if (lowestrev == nullrev) and (heads is None):
556 562 # We want _all_ the nodes!
557 563 return ([self.node(r) for r in self], [nullid], list(self.heads()))
558 564 if heads is None:
559 565 # All nodes are ancestors, so the latest ancestor is the last
560 566 # node.
561 567 highestrev = len(self) - 1
562 568 # Set ancestors to None to signal that every node is an ancestor.
563 569 ancestors = None
564 570 # Set heads to an empty dictionary for later discovery of heads
565 571 heads = {}
566 572 else:
567 573 heads = list(heads)
568 574 if not heads:
569 575 return nonodes
570 576 ancestors = set()
571 577 # Turn heads into a dictionary so we can remove 'fake' heads.
572 578 # Also, later we will be using it to filter out the heads we can't
573 579 # find from roots.
574 580 heads = dict.fromkeys(heads, False)
575 581 # Start at the top and keep marking parents until we're done.
576 582 nodestotag = set(heads)
577 583 # Remember where the top was so we can use it as a limit later.
578 584 highestrev = max([self.rev(n) for n in nodestotag])
579 585 while nodestotag:
580 586 # grab a node to tag
581 587 n = nodestotag.pop()
582 588 # Never tag nullid
583 589 if n == nullid:
584 590 continue
585 591 # A node's revision number represents its place in a
586 592 # topologically sorted list of nodes.
587 593 r = self.rev(n)
588 594 if r >= lowestrev:
589 595 if n not in ancestors:
590 596 # If we are possibly a descendant of one of the roots
591 597 # and we haven't already been marked as an ancestor
592 598 ancestors.add(n) # Mark as ancestor
593 599 # Add non-nullid parents to list of nodes to tag.
594 600 nodestotag.update([p for p in self.parents(n) if
595 601 p != nullid])
596 602 elif n in heads: # We've seen it before, is it a fake head?
597 603 # So it is, real heads should not be the ancestors of
598 604 # any other heads.
599 605 heads.pop(n)
600 606 if not ancestors:
601 607 return nonodes
602 608 # Now that we have our set of ancestors, we want to remove any
603 609 # roots that are not ancestors.
604 610
605 611 # If one of the roots was nullid, everything is included anyway.
606 612 if lowestrev > nullrev:
607 613 # But, since we weren't, let's recompute the lowest rev to not
608 614 # include roots that aren't ancestors.
609 615
610 616 # Filter out roots that aren't ancestors of heads
611 617 roots = [n for n in roots if n in ancestors]
612 618 # Recompute the lowest revision
613 619 if roots:
614 620 lowestrev = min([self.rev(n) for n in roots])
615 621 else:
616 622 # No more roots? Return empty list
617 623 return nonodes
618 624 else:
619 625 # We are descending from nullid, and don't need to care about
620 626 # any other roots.
621 627 lowestrev = nullrev
622 628 roots = [nullid]
623 629 # Transform our roots list into a set.
624 630 descendants = set(roots)
625 631 # Also, keep the original roots so we can filter out roots that aren't
626 632 # 'real' roots (i.e. are descended from other roots).
627 633 roots = descendants.copy()
628 634 # Our topologically sorted list of output nodes.
629 635 orderedout = []
630 636 # Don't start at nullid since we don't want nullid in our output list,
631 637 # and if nullid shows up in descendants, empty parents will look like
632 638 # they're descendants.
633 639 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
634 640 n = self.node(r)
635 641 isdescendant = False
636 642 if lowestrev == nullrev: # Everybody is a descendant of nullid
637 643 isdescendant = True
638 644 elif n in descendants:
639 645 # n is already a descendant
640 646 isdescendant = True
641 647 # This check only needs to be done here because all the roots
642 648 # will start being marked is descendants before the loop.
643 649 if n in roots:
644 650 # If n was a root, check if it's a 'real' root.
645 651 p = tuple(self.parents(n))
646 652 # If any of its parents are descendants, it's not a root.
647 653 if (p[0] in descendants) or (p[1] in descendants):
648 654 roots.remove(n)
649 655 else:
650 656 p = tuple(self.parents(n))
651 657 # A node is a descendant if either of its parents are
652 658 # descendants. (We seeded the dependents list with the roots
653 659 # up there, remember?)
654 660 if (p[0] in descendants) or (p[1] in descendants):
655 661 descendants.add(n)
656 662 isdescendant = True
657 663 if isdescendant and ((ancestors is None) or (n in ancestors)):
658 664 # Only include nodes that are both descendants and ancestors.
659 665 orderedout.append(n)
660 666 if (ancestors is not None) and (n in heads):
661 667 # We're trying to figure out which heads are reachable
662 668 # from roots.
663 669 # Mark this head as having been reached
664 670 heads[n] = True
665 671 elif ancestors is None:
666 672 # Otherwise, we're trying to discover the heads.
667 673 # Assume this is a head because if it isn't, the next step
668 674 # will eventually remove it.
669 675 heads[n] = True
670 676 # But, obviously its parents aren't.
671 677 for p in self.parents(n):
672 678 heads.pop(p, None)
673 679 heads = [n for n, flag in heads.iteritems() if flag]
674 680 roots = list(roots)
675 681 assert orderedout
676 682 assert roots
677 683 assert heads
678 684 return (orderedout, roots, heads)
679 685
680 686 def headrevs(self):
681 687 try:
682 688 return self.index.headrevs()
683 689 except AttributeError:
684 690 return self._headrevs()
685 691
686 692 def _headrevs(self):
687 693 count = len(self)
688 694 if not count:
689 695 return [nullrev]
690 696 # we won't iter over filtered rev so nobody is a head at start
691 697 ishead = [0] * (count + 1)
692 698 index = self.index
693 699 for r in self:
694 700 ishead[r] = 1 # I may be an head
695 701 e = index[r]
696 702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
697 703 return [r for r, val in enumerate(ishead) if val]
698 704
699 705 def heads(self, start=None, stop=None):
700 706 """return the list of all nodes that have no children
701 707
702 708 if start is specified, only heads that are descendants of
703 709 start will be returned
704 710 if stop is specified, it will consider all the revs from stop
705 711 as if they had no children
706 712 """
707 713 if start is None and stop is None:
708 714 if not len(self):
709 715 return [nullid]
710 716 return [self.node(r) for r in self.headrevs()]
711 717
712 718 if start is None:
713 719 start = nullid
714 720 if stop is None:
715 721 stop = []
716 722 stoprevs = set([self.rev(n) for n in stop])
717 723 startrev = self.rev(start)
718 724 reachable = set((startrev,))
719 725 heads = set((startrev,))
720 726
721 727 parentrevs = self.parentrevs
722 728 for r in self.revs(start=startrev + 1):
723 729 for p in parentrevs(r):
724 730 if p in reachable:
725 731 if r not in stoprevs:
726 732 reachable.add(r)
727 733 heads.add(r)
728 734 if p in heads and p not in stoprevs:
729 735 heads.remove(p)
730 736
731 737 return [self.node(r) for r in heads]
732 738
733 739 def children(self, node):
734 740 """find the children of a given node"""
735 741 c = []
736 742 p = self.rev(node)
737 743 for r in self.revs(start=p + 1):
738 744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
739 745 if prevs:
740 746 for pr in prevs:
741 747 if pr == p:
742 748 c.append(self.node(r))
743 749 elif p == nullrev:
744 750 c.append(self.node(r))
745 751 return c
746 752
747 753 def descendant(self, start, end):
748 754 if start == nullrev:
749 755 return True
750 756 for i in self.descendants([start]):
751 757 if i == end:
752 758 return True
753 759 elif i > end:
754 760 break
755 761 return False
756 762
757 763 def commonancestorsheads(self, a, b):
758 764 """calculate all the heads of the common ancestors of nodes a and b"""
759 765 a, b = self.rev(a), self.rev(b)
760 766 try:
761 767 ancs = self.index.commonancestorsheads(a, b)
762 768 except (AttributeError, OverflowError): # C implementation failed
763 769 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
764 770 return map(self.node, ancs)
765 771
766 772 def isancestor(self, a, b):
767 773 """return True if node a is an ancestor of node b
768 774
769 775 The implementation of this is trivial but the use of
770 776 commonancestorsheads is not."""
771 777 return a in self.commonancestorsheads(a, b)
772 778
773 779 def ancestor(self, a, b):
774 780 """calculate the "best" common ancestor of nodes a and b"""
775 781
776 782 a, b = self.rev(a), self.rev(b)
777 783 try:
778 784 ancs = self.index.ancestors(a, b)
779 785 except (AttributeError, OverflowError):
780 786 ancs = ancestor.ancestors(self.parentrevs, a, b)
781 787 if ancs:
782 788 # choose a consistent winner when there's a tie
783 789 return min(map(self.node, ancs))
784 790 return nullid
785 791
786 792 def _match(self, id):
787 793 if isinstance(id, int):
788 794 # rev
789 795 return self.node(id)
790 796 if len(id) == 20:
791 797 # possibly a binary node
792 798 # odds of a binary node being all hex in ASCII are 1 in 10**25
793 799 try:
794 800 node = id
795 801 self.rev(node) # quick search the index
796 802 return node
797 803 except LookupError:
798 804 pass # may be partial hex id
799 805 try:
800 806 # str(rev)
801 807 rev = int(id)
802 808 if str(rev) != id:
803 809 raise ValueError
804 810 if rev < 0:
805 811 rev = len(self) + rev
806 812 if rev < 0 or rev >= len(self):
807 813 raise ValueError
808 814 return self.node(rev)
809 815 except (ValueError, OverflowError):
810 816 pass
811 817 if len(id) == 40:
812 818 try:
813 819 # a full hex nodeid?
814 820 node = bin(id)
815 821 self.rev(node)
816 822 return node
817 823 except (TypeError, LookupError):
818 824 pass
819 825
820 826 def _partialmatch(self, id):
821 827 try:
822 828 n = self.index.partialmatch(id)
823 829 if n and self.hasnode(n):
824 830 return n
825 831 return None
826 832 except RevlogError:
827 833 # parsers.c radix tree lookup gave multiple matches
828 834 # fall through to slow path that filters hidden revisions
829 835 pass
830 836 except (AttributeError, ValueError):
831 837 # we are pure python, or key was too short to search radix tree
832 838 pass
833 839
834 840 if id in self._pcache:
835 841 return self._pcache[id]
836 842
837 843 if len(id) < 40:
838 844 try:
839 845 # hex(node)[:...]
840 846 l = len(id) // 2 # grab an even number of digits
841 847 prefix = bin(id[:l * 2])
842 848 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
843 849 nl = [n for n in nl if hex(n).startswith(id) and
844 850 self.hasnode(n)]
845 851 if len(nl) > 0:
846 852 if len(nl) == 1:
847 853 self._pcache[id] = nl[0]
848 854 return nl[0]
849 855 raise LookupError(id, self.indexfile,
850 856 _('ambiguous identifier'))
851 857 return None
852 858 except TypeError:
853 859 pass
854 860
855 861 def lookup(self, id):
856 862 """locate a node based on:
857 863 - revision number or str(revision number)
858 864 - nodeid or subset of hex nodeid
859 865 """
860 866 n = self._match(id)
861 867 if n is not None:
862 868 return n
863 869 n = self._partialmatch(id)
864 870 if n:
865 871 return n
866 872
867 873 raise LookupError(id, self.indexfile, _('no match found'))
868 874
869 875 def cmp(self, node, text):
870 876 """compare text with a given file revision
871 877
872 878 returns True if text is different than what is stored.
873 879 """
874 880 p1, p2 = self.parents(node)
875 881 return hash(text, p1, p2) != node
876 882
877 883 def _addchunk(self, offset, data):
878 884 o, d = self._chunkcache
879 885 # try to add to existing cache
880 886 if o + len(d) == offset and len(d) + len(data) < _chunksize:
881 887 self._chunkcache = o, d + data
882 888 else:
883 889 self._chunkcache = offset, data
884 890
885 891 def _loadchunk(self, offset, length):
886 892 if self._inline:
887 893 df = self.opener(self.indexfile)
888 894 else:
889 895 df = self.opener(self.datafile)
890 896
891 897 # Cache data both forward and backward around the requested
892 898 # data, in a fixed size window. This helps speed up operations
893 899 # involving reading the revlog backwards.
894 900 cachesize = self._chunkcachesize
895 901 realoffset = offset & ~(cachesize - 1)
896 902 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
897 903 - realoffset)
898 904 df.seek(realoffset)
899 905 d = df.read(reallength)
900 906 df.close()
901 907 self._addchunk(realoffset, d)
902 908 if offset != realoffset or reallength != length:
903 909 return util.buffer(d, offset - realoffset, length)
904 910 return d
905 911
906 912 def _getchunk(self, offset, length):
907 913 o, d = self._chunkcache
908 914 l = len(d)
909 915
910 916 # is it in the cache?
911 917 cachestart = offset - o
912 918 cacheend = cachestart + length
913 919 if cachestart >= 0 and cacheend <= l:
914 920 if cachestart == 0 and cacheend == l:
915 921 return d # avoid a copy
916 922 return util.buffer(d, cachestart, cacheend - cachestart)
917 923
918 924 return self._loadchunk(offset, length)
919 925
920 926 def _chunkraw(self, startrev, endrev):
921 927 start = self.start(startrev)
922 928 end = self.end(endrev)
923 929 if self._inline:
924 930 start += (startrev + 1) * self._io.size
925 931 end += (endrev + 1) * self._io.size
926 932 length = end - start
927 933 return self._getchunk(start, length)
928 934
929 935 def _chunk(self, rev):
930 936 return decompress(self._chunkraw(rev, rev))
931 937
932 938 def _chunks(self, revs):
933 939 '''faster version of [self._chunk(rev) for rev in revs]
934 940
935 941 Assumes that revs is in ascending order.'''
936 942 if not revs:
937 943 return []
938 944 start = self.start
939 945 length = self.length
940 946 inline = self._inline
941 947 iosize = self._io.size
942 948 buffer = util.buffer
943 949
944 950 l = []
945 951 ladd = l.append
946 952
947 953 # preload the cache
948 954 try:
949 955 while True:
950 956 # ensure that the cache doesn't change out from under us
951 957 _cache = self._chunkcache
952 958 self._chunkraw(revs[0], revs[-1])
953 959 if _cache == self._chunkcache:
954 960 break
955 961 offset, data = _cache
956 962 except OverflowError:
957 963 # issue4215 - we can't cache a run of chunks greater than
958 964 # 2G on Windows
959 965 return [self._chunk(rev) for rev in revs]
960 966
961 967 for rev in revs:
962 968 chunkstart = start(rev)
963 969 if inline:
964 970 chunkstart += (rev + 1) * iosize
965 971 chunklength = length(rev)
966 972 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
967 973
968 974 return l
969 975
970 976 def _chunkclear(self):
971 977 self._chunkcache = (0, '')
972 978
973 979 def deltaparent(self, rev):
974 980 """return deltaparent of the given revision"""
975 981 base = self.index[rev][3]
976 982 if base == rev:
977 983 return nullrev
978 984 elif self._generaldelta:
979 985 return base
980 986 else:
981 987 return rev - 1
982 988
983 989 def revdiff(self, rev1, rev2):
984 990 """return or calculate a delta between two revisions"""
985 991 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
986 992 return str(self._chunk(rev2))
987 993
988 994 return mdiff.textdiff(self.revision(rev1),
989 995 self.revision(rev2))
990 996
991 997 def revision(self, nodeorrev):
992 998 """return an uncompressed revision of a given node or revision
993 999 number.
994 1000 """
995 1001 if isinstance(nodeorrev, int):
996 1002 rev = nodeorrev
997 1003 node = self.node(rev)
998 1004 else:
999 1005 node = nodeorrev
1000 1006 rev = None
1001 1007
1002 1008 _cache = self._cache # grab local copy of cache to avoid thread race
1003 1009 cachedrev = None
1004 1010 if node == nullid:
1005 1011 return ""
1006 1012 if _cache:
1007 1013 if _cache[0] == node:
1008 1014 return _cache[2]
1009 1015 cachedrev = _cache[1]
1010 1016
1011 1017 # look up what we need to read
1012 1018 text = None
1013 1019 if rev is None:
1014 1020 rev = self.rev(node)
1015 1021
1016 1022 # check rev flags
1017 1023 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1018 1024 raise RevlogError(_('incompatible revision flag %x') %
1019 1025 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1020 1026
1021 1027 # build delta chain
1022 1028 chain = []
1023 1029 index = self.index # for performance
1024 1030 generaldelta = self._generaldelta
1025 1031 iterrev = rev
1026 1032 e = index[iterrev]
1027 1033 while iterrev != e[3] and iterrev != cachedrev:
1028 1034 chain.append(iterrev)
1029 1035 if generaldelta:
1030 1036 iterrev = e[3]
1031 1037 else:
1032 1038 iterrev -= 1
1033 1039 e = index[iterrev]
1034 1040
1035 1041 if iterrev == cachedrev:
1036 1042 # cache hit
1037 1043 text = _cache[2]
1038 1044 else:
1039 1045 chain.append(iterrev)
1040 1046 chain.reverse()
1041 1047
1042 1048 # drop cache to save memory
1043 1049 self._cache = None
1044 1050
1045 1051 bins = self._chunks(chain)
1046 1052 if text is None:
1047 1053 text = str(bins[0])
1048 1054 bins = bins[1:]
1049 1055
1050 1056 text = mdiff.patches(text, bins)
1051 1057
1052 1058 text = self._checkhash(text, node, rev)
1053 1059
1054 1060 self._cache = (node, rev, text)
1055 1061 return text
1056 1062
1057 1063 def hash(self, text, p1, p2):
1058 1064 """Compute a node hash.
1059 1065
1060 1066 Available as a function so that subclasses can replace the hash
1061 1067 as needed.
1062 1068 """
1063 1069 return hash(text, p1, p2)
1064 1070
1065 1071 def _checkhash(self, text, node, rev):
1066 1072 p1, p2 = self.parents(node)
1067 1073 self.checkhash(text, p1, p2, node, rev)
1068 1074 return text
1069 1075
1070 1076 def checkhash(self, text, p1, p2, node, rev=None):
1071 1077 if node != self.hash(text, p1, p2):
1072 1078 revornode = rev
1073 1079 if revornode is None:
1074 1080 revornode = templatefilters.short(hex(node))
1075 1081 raise RevlogError(_("integrity check failed on %s:%s")
1076 1082 % (self.indexfile, revornode))
1077 1083
1078 1084 def checkinlinesize(self, tr, fp=None):
1079 1085 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1080 1086 return
1081 1087
1082 1088 trinfo = tr.find(self.indexfile)
1083 1089 if trinfo is None:
1084 1090 raise RevlogError(_("%s not found in the transaction")
1085 1091 % self.indexfile)
1086 1092
1087 1093 trindex = trinfo[2]
1088 1094 dataoff = self.start(trindex)
1089 1095
1090 1096 tr.add(self.datafile, dataoff)
1091 1097
1092 1098 if fp:
1093 1099 fp.flush()
1094 1100 fp.close()
1095 1101
1096 1102 df = self.opener(self.datafile, 'w')
1097 1103 try:
1098 1104 for r in self:
1099 1105 df.write(self._chunkraw(r, r))
1100 1106 finally:
1101 1107 df.close()
1102 1108
1103 1109 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1104 1110 self.version &= ~(REVLOGNGINLINEDATA)
1105 1111 self._inline = False
1106 1112 for i in self:
1107 1113 e = self._io.packentry(self.index[i], self.node, self.version, i)
1108 1114 fp.write(e)
1109 1115
1110 1116 # if we don't call close, the temp file will never replace the
1111 1117 # real index
1112 1118 fp.close()
1113 1119
1114 1120 tr.replace(self.indexfile, trindex * self._io.size)
1115 1121 self._chunkclear()
1116 1122
1117 1123 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1118 1124 node=None):
1119 1125 """add a revision to the log
1120 1126
1121 1127 text - the revision data to add
1122 1128 transaction - the transaction object used for rollback
1123 1129 link - the linkrev data to add
1124 1130 p1, p2 - the parent nodeids of the revision
1125 1131 cachedelta - an optional precomputed delta
1126 1132 node - nodeid of revision; typically node is not specified, and it is
1127 1133 computed by default as hash(text, p1, p2), however subclasses might
1128 1134 use different hashing method (and override checkhash() in such case)
1129 1135 """
1130 1136 if link == nullrev:
1131 1137 raise RevlogError(_("attempted to add linkrev -1 to %s")
1132 1138 % self.indexfile)
1133 1139 node = node or self.hash(text, p1, p2)
1134 1140 if node in self.nodemap:
1135 1141 return node
1136 1142
1137 1143 dfh = None
1138 1144 if not self._inline:
1139 1145 dfh = self.opener(self.datafile, "a")
1140 1146 ifh = self.opener(self.indexfile, "a+")
1141 1147 try:
1142 1148 return self._addrevision(node, text, transaction, link, p1, p2,
1143 1149 cachedelta, ifh, dfh)
1144 1150 finally:
1145 1151 if dfh:
1146 1152 dfh.close()
1147 1153 ifh.close()
1148 1154
1149 1155 def compress(self, text):
1150 1156 """ generate a possibly-compressed representation of text """
1151 1157 if not text:
1152 1158 return ("", text)
1153 1159 l = len(text)
1154 1160 bin = None
1155 1161 if l < 44:
1156 1162 pass
1157 1163 elif l > 1000000:
1158 1164 # zlib makes an internal copy, thus doubling memory usage for
1159 1165 # large files, so lets do this in pieces
1160 1166 z = zlib.compressobj()
1161 1167 p = []
1162 1168 pos = 0
1163 1169 while pos < l:
1164 1170 pos2 = pos + 2**20
1165 1171 p.append(z.compress(text[pos:pos2]))
1166 1172 pos = pos2
1167 1173 p.append(z.flush())
1168 1174 if sum(map(len, p)) < l:
1169 1175 bin = "".join(p)
1170 1176 else:
1171 1177 bin = _compress(text)
1172 1178 if bin is None or len(bin) > l:
1173 1179 if text[0] == '\0':
1174 1180 return ("", text)
1175 1181 return ('u', text)
1176 1182 return ("", bin)
1177 1183
1178 1184 def _addrevision(self, node, text, transaction, link, p1, p2,
1179 1185 cachedelta, ifh, dfh):
1180 1186 """internal function to add revisions to the log
1181 1187
1182 1188 see addrevision for argument descriptions.
1183 1189 invariants:
1184 1190 - text is optional (can be None); if not set, cachedelta must be set.
1185 1191 if both are set, they must correspond to each other.
1186 1192 """
1187 1193 btext = [text]
1188 1194 def buildtext():
1189 1195 if btext[0] is not None:
1190 1196 return btext[0]
1191 1197 # flush any pending writes here so we can read it in revision
1192 1198 if dfh:
1193 1199 dfh.flush()
1194 1200 ifh.flush()
1195 1201 basetext = self.revision(self.node(cachedelta[0]))
1196 1202 btext[0] = mdiff.patch(basetext, cachedelta[1])
1197 1203 try:
1198 1204 self.checkhash(btext[0], p1, p2, node)
1199 1205 except CensoredNodeError:
1200 1206 pass # always import a censor tombstone.
1201 1207 return btext[0]
1202 1208
1203 1209 def builddelta(rev):
1204 1210 # can we use the cached delta?
1205 1211 if cachedelta and cachedelta[0] == rev:
1206 1212 delta = cachedelta[1]
1207 1213 else:
1208 1214 t = buildtext()
1209 1215 ptext = self.revision(self.node(rev))
1210 1216 delta = mdiff.textdiff(ptext, t)
1211 1217 data = self.compress(delta)
1212 1218 l = len(data[1]) + len(data[0])
1213 1219 if basecache[0] == rev:
1214 1220 chainbase = basecache[1]
1215 1221 else:
1216 1222 chainbase = self.chainbase(rev)
1217 1223 dist = l + offset - self.start(chainbase)
1218 1224 if self._generaldelta:
1219 1225 base = rev
1220 1226 else:
1221 1227 base = chainbase
1222 1228 chainlen = self.chainlen(rev) + 1
1223 1229 return dist, l, data, base, chainbase, chainlen
1224 1230
1225 1231 curr = len(self)
1226 1232 prev = curr - 1
1227 1233 base = chainbase = curr
1228 1234 chainlen = None
1229 1235 offset = self.end(prev)
1230 1236 flags = 0
1231 1237 d = None
1232 1238 if self._basecache is None:
1233 1239 self._basecache = (prev, self.chainbase(prev))
1234 1240 basecache = self._basecache
1235 1241 p1r, p2r = self.rev(p1), self.rev(p2)
1236 1242
1237 1243 # should we try to build a delta?
1238 1244 if prev != nullrev:
1239 1245 if self._generaldelta:
1240 1246 if p1r >= basecache[1]:
1241 1247 d = builddelta(p1r)
1242 1248 elif p2r >= basecache[1]:
1243 1249 d = builddelta(p2r)
1244 1250 else:
1245 1251 d = builddelta(prev)
1246 1252 else:
1247 1253 d = builddelta(prev)
1248 1254 dist, l, data, base, chainbase, chainlen = d
1249 1255
1250 1256 # full versions are inserted when the needed deltas
1251 1257 # become comparable to the uncompressed text
1252 1258 if text is None:
1253 1259 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1254 1260 cachedelta[1])
1255 1261 else:
1256 1262 textlen = len(text)
1257 1263 if (d is None or dist > textlen * 2 or l > textlen or
1258 1264 (self._maxchainlen and chainlen > self._maxchainlen)):
1259 1265 text = buildtext()
1260 1266 data = self.compress(text)
1261 1267 l = len(data[1]) + len(data[0])
1262 1268 base = chainbase = curr
1263 1269
1264 1270 e = (offset_type(offset, flags), l, textlen,
1265 1271 base, link, p1r, p2r, node)
1266 1272 self.index.insert(-1, e)
1267 1273 self.nodemap[node] = curr
1268 1274
1269 1275 entry = self._io.packentry(e, self.node, self.version, curr)
1270 1276 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1271 1277
1272 1278 if type(text) == str: # only accept immutable objects
1273 1279 self._cache = (node, curr, text)
1274 1280 self._basecache = (curr, chainbase)
1275 1281 return node
1276 1282
1277 1283 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1278 1284 curr = len(self) - 1
1279 1285 if not self._inline:
1280 1286 transaction.add(self.datafile, offset)
1281 1287 transaction.add(self.indexfile, curr * len(entry))
1282 1288 if data[0]:
1283 1289 dfh.write(data[0])
1284 1290 dfh.write(data[1])
1285 1291 dfh.flush()
1286 1292 ifh.write(entry)
1287 1293 else:
1288 1294 offset += curr * self._io.size
1289 1295 transaction.add(self.indexfile, offset, curr)
1290 1296 ifh.write(entry)
1291 1297 ifh.write(data[0])
1292 1298 ifh.write(data[1])
1293 1299 self.checkinlinesize(transaction, ifh)
1294 1300
1295 1301 def addgroup(self, bundle, linkmapper, transaction):
1296 1302 """
1297 1303 add a delta group
1298 1304
1299 1305 given a set of deltas, add them to the revision log. the
1300 1306 first delta is against its parent, which should be in our
1301 1307 log, the rest are against the previous delta.
1302 1308 """
1303 1309
1304 1310 # track the base of the current delta log
1305 1311 content = []
1306 1312 node = None
1307 1313
1308 1314 r = len(self)
1309 1315 end = 0
1310 1316 if r:
1311 1317 end = self.end(r - 1)
1312 1318 ifh = self.opener(self.indexfile, "a+")
1313 1319 isize = r * self._io.size
1314 1320 if self._inline:
1315 1321 transaction.add(self.indexfile, end + isize, r)
1316 1322 dfh = None
1317 1323 else:
1318 1324 transaction.add(self.indexfile, isize, r)
1319 1325 transaction.add(self.datafile, end)
1320 1326 dfh = self.opener(self.datafile, "a")
1321 1327
1322 1328 try:
1323 1329 # loop through our set of deltas
1324 1330 chain = None
1325 1331 while True:
1326 1332 chunkdata = bundle.deltachunk(chain)
1327 1333 if not chunkdata:
1328 1334 break
1329 1335 node = chunkdata['node']
1330 1336 p1 = chunkdata['p1']
1331 1337 p2 = chunkdata['p2']
1332 1338 cs = chunkdata['cs']
1333 1339 deltabase = chunkdata['deltabase']
1334 1340 delta = chunkdata['delta']
1335 1341
1336 1342 content.append(node)
1337 1343
1338 1344 link = linkmapper(cs)
1339 1345 if node in self.nodemap:
1340 1346 # this can happen if two branches make the same change
1341 1347 chain = node
1342 1348 continue
1343 1349
1344 1350 for p in (p1, p2):
1345 1351 if p not in self.nodemap:
1346 1352 raise LookupError(p, self.indexfile,
1347 1353 _('unknown parent'))
1348 1354
1349 1355 if deltabase not in self.nodemap:
1350 1356 raise LookupError(deltabase, self.indexfile,
1351 1357 _('unknown delta base'))
1352 1358
1353 1359 baserev = self.rev(deltabase)
1354 1360 chain = self._addrevision(node, None, transaction, link,
1355 1361 p1, p2, (baserev, delta), ifh, dfh)
1356 1362 if not dfh and not self._inline:
1357 1363 # addrevision switched from inline to conventional
1358 1364 # reopen the index
1359 1365 ifh.close()
1360 1366 dfh = self.opener(self.datafile, "a")
1361 1367 ifh = self.opener(self.indexfile, "a")
1362 1368 finally:
1363 1369 if dfh:
1364 1370 dfh.close()
1365 1371 ifh.close()
1366 1372
1367 1373 return content
1368 1374
1369 1375 def getstrippoint(self, minlink):
1370 1376 """find the minimum rev that must be stripped to strip the linkrev
1371 1377
1372 1378 Returns a tuple containing the minimum rev and a set of all revs that
1373 1379 have linkrevs that will be broken by this strip.
1374 1380 """
1375 1381 brokenrevs = set()
1376 1382 strippoint = len(self)
1377 1383
1378 1384 heads = {}
1379 1385 futurelargelinkrevs = set()
1380 1386 for head in self.headrevs():
1381 1387 headlinkrev = self.linkrev(head)
1382 1388 heads[head] = headlinkrev
1383 1389 if headlinkrev >= minlink:
1384 1390 futurelargelinkrevs.add(headlinkrev)
1385 1391
1386 1392 # This algorithm involves walking down the rev graph, starting at the
1387 1393 # heads. Since the revs are topologically sorted according to linkrev,
1388 1394 # once all head linkrevs are below the minlink, we know there are
1389 1395 # no more revs that could have a linkrev greater than minlink.
1390 1396 # So we can stop walking.
1391 1397 while futurelargelinkrevs:
1392 1398 strippoint -= 1
1393 1399 linkrev = heads.pop(strippoint)
1394 1400
1395 1401 if linkrev < minlink:
1396 1402 brokenrevs.add(strippoint)
1397 1403 else:
1398 1404 futurelargelinkrevs.remove(linkrev)
1399 1405
1400 1406 for p in self.parentrevs(strippoint):
1401 1407 if p != nullrev:
1402 1408 plinkrev = self.linkrev(p)
1403 1409 heads[p] = plinkrev
1404 1410 if plinkrev >= minlink:
1405 1411 futurelargelinkrevs.add(plinkrev)
1406 1412
1407 1413 return strippoint, brokenrevs
1408 1414
1409 1415 def strip(self, minlink, transaction):
1410 1416 """truncate the revlog on the first revision with a linkrev >= minlink
1411 1417
1412 1418 This function is called when we're stripping revision minlink and
1413 1419 its descendants from the repository.
1414 1420
1415 1421 We have to remove all revisions with linkrev >= minlink, because
1416 1422 the equivalent changelog revisions will be renumbered after the
1417 1423 strip.
1418 1424
1419 1425 So we truncate the revlog on the first of these revisions, and
1420 1426 trust that the caller has saved the revisions that shouldn't be
1421 1427 removed and that it'll re-add them after this truncation.
1422 1428 """
1423 1429 if len(self) == 0:
1424 1430 return
1425 1431
1426 1432 rev, _ = self.getstrippoint(minlink)
1427 1433 if rev == len(self):
1428 1434 return
1429 1435
1430 1436 # first truncate the files on disk
1431 1437 end = self.start(rev)
1432 1438 if not self._inline:
1433 1439 transaction.add(self.datafile, end)
1434 1440 end = rev * self._io.size
1435 1441 else:
1436 1442 end += rev * self._io.size
1437 1443
1438 1444 transaction.add(self.indexfile, end)
1439 1445
1440 1446 # then reset internal state in memory to forget those revisions
1441 1447 self._cache = None
1442 1448 self._chunkclear()
1443 1449 for x in xrange(rev, len(self)):
1444 1450 del self.nodemap[self.node(x)]
1445 1451
1446 1452 del self.index[rev:-1]
1447 1453
1448 1454 def checksize(self):
1449 1455 expected = 0
1450 1456 if len(self):
1451 1457 expected = max(0, self.end(len(self) - 1))
1452 1458
1453 1459 try:
1454 1460 f = self.opener(self.datafile)
1455 1461 f.seek(0, 2)
1456 1462 actual = f.tell()
1457 1463 f.close()
1458 1464 dd = actual - expected
1459 1465 except IOError, inst:
1460 1466 if inst.errno != errno.ENOENT:
1461 1467 raise
1462 1468 dd = 0
1463 1469
1464 1470 try:
1465 1471 f = self.opener(self.indexfile)
1466 1472 f.seek(0, 2)
1467 1473 actual = f.tell()
1468 1474 f.close()
1469 1475 s = self._io.size
1470 1476 i = max(0, actual // s)
1471 1477 di = actual - (i * s)
1472 1478 if self._inline:
1473 1479 databytes = 0
1474 1480 for r in self:
1475 1481 databytes += max(0, self.length(r))
1476 1482 dd = 0
1477 1483 di = actual - len(self) * s - databytes
1478 1484 except IOError, inst:
1479 1485 if inst.errno != errno.ENOENT:
1480 1486 raise
1481 1487 di = 0
1482 1488
1483 1489 return (dd, di)
1484 1490
1485 1491 def files(self):
1486 1492 res = [self.indexfile]
1487 1493 if not self._inline:
1488 1494 res.append(self.datafile)
1489 1495 return res
General Comments 0
You need to be logged in to leave comments. Login now