##// END OF EJS Templates
revlog: cache chain info after calculating it for a rev (issue4452)...
Siddharth Agarwal -
r23306:f7a42f8e default
parent child Browse files
Show More
@@ -1,1503 +1,1520 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45 CensoredNodeError = error.CensoredNodeError
46 46
47 47 def getoffset(q):
48 48 return int(q >> 16)
49 49
50 50 def gettype(q):
51 51 return int(q & 0xFFFF)
52 52
53 53 def offset_type(offset, type):
54 54 return long(long(offset) << 16 | type)
55 55
56 56 _nullhash = _sha(nullid)
57 57
58 58 def hash(text, p1, p2):
59 59 """generate a hash from the given text and its parent hashes
60 60
61 61 This hash combines both the current file contents and its history
62 62 in a manner that makes it easy to distinguish nodes with the same
63 63 content in the revision graph.
64 64 """
65 65 # As of now, if one of the parent node is null, p2 is null
66 66 if p2 == nullid:
67 67 # deep copy of a hash is faster than creating one
68 68 s = _nullhash.copy()
69 69 s.update(p1)
70 70 else:
71 71 # none of the parent nodes are nullid
72 72 l = [p1, p2]
73 73 l.sort()
74 74 s = _sha(l[0])
75 75 s.update(l[1])
76 76 s.update(text)
77 77 return s.digest()
78 78
79 79 def decompress(bin):
80 80 """ decompress the given input """
81 81 if not bin:
82 82 return bin
83 83 t = bin[0]
84 84 if t == '\0':
85 85 return bin
86 86 if t == 'x':
87 87 try:
88 88 return _decompress(bin)
89 89 except zlib.error, e:
90 90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 91 if t == 'u':
92 92 return bin[1:]
93 93 raise RevlogError(_("unknown compression type %r") % t)
94 94
95 95 # index v0:
96 96 # 4 bytes: offset
97 97 # 4 bytes: compressed length
98 98 # 4 bytes: base rev
99 99 # 4 bytes: link rev
100 100 # 32 bytes: parent 1 nodeid
101 101 # 32 bytes: parent 2 nodeid
102 102 # 32 bytes: nodeid
103 103 indexformatv0 = ">4l20s20s20s"
104 104 v0shaoffset = 56
105 105
106 106 class revlogoldio(object):
107 107 def __init__(self):
108 108 self.size = struct.calcsize(indexformatv0)
109 109
110 110 def parseindex(self, data, inline):
111 111 s = self.size
112 112 index = []
113 113 nodemap = {nullid: nullrev}
114 114 n = off = 0
115 115 l = len(data)
116 116 while off + s <= l:
117 117 cur = data[off:off + s]
118 118 off += s
119 119 e = _unpack(indexformatv0, cur)
120 120 # transform to revlogv1 format
121 121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 123 index.append(e2)
124 124 nodemap[e[6]] = n
125 125 n += 1
126 126
127 127 # add the magic null revision at -1
128 128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129 129
130 130 return index, nodemap, None
131 131
132 132 def packentry(self, entry, node, version, rev):
133 133 if gettype(entry[0]):
134 134 raise RevlogError(_("index entry flags need RevlogNG"))
135 135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 136 node(entry[5]), node(entry[6]), entry[7])
137 137 return _pack(indexformatv0, *e2)
138 138
139 139 # index ng:
140 140 # 6 bytes: offset
141 141 # 2 bytes: flags
142 142 # 4 bytes: compressed length
143 143 # 4 bytes: uncompressed length
144 144 # 4 bytes: base rev
145 145 # 4 bytes: link rev
146 146 # 4 bytes: parent 1 rev
147 147 # 4 bytes: parent 2 rev
148 148 # 32 bytes: nodeid
149 149 indexformatng = ">Qiiiiii20s12x"
150 150 ngshaoffset = 32
151 151 versionformat = ">I"
152 152
153 153 class revlogio(object):
154 154 def __init__(self):
155 155 self.size = struct.calcsize(indexformatng)
156 156
157 157 def parseindex(self, data, inline):
158 158 # call the C implementation to parse the index data
159 159 index, cache = parsers.parse_index2(data, inline)
160 160 return index, getattr(index, 'nodemap', None), cache
161 161
162 162 def packentry(self, entry, node, version, rev):
163 163 p = _pack(indexformatng, *entry)
164 164 if rev == 0:
165 165 p = _pack(versionformat, version) + p[4:]
166 166 return p
167 167
168 168 class revlog(object):
169 169 """
170 170 the underlying revision storage object
171 171
172 172 A revlog consists of two parts, an index and the revision data.
173 173
174 174 The index is a file with a fixed record size containing
175 175 information on each revision, including its nodeid (hash), the
176 176 nodeids of its parents, the position and offset of its data within
177 177 the data file, and the revision it's based on. Finally, each entry
178 178 contains a linkrev entry that can serve as a pointer to external
179 179 data.
180 180
181 181 The revision data itself is a linear collection of data chunks.
182 182 Each chunk represents a revision and is usually represented as a
183 183 delta against the previous chunk. To bound lookup time, runs of
184 184 deltas are limited to about 2 times the length of the original
185 185 version data. This makes retrieval of a version proportional to
186 186 its size, or O(1) relative to the number of revisions.
187 187
188 188 Both pieces of the revlog are written to in an append-only
189 189 fashion, which means we never need to rewrite a file to insert or
190 190 remove data, and can use some simple techniques to avoid the need
191 191 for locking while reading.
192 192 """
193 193 def __init__(self, opener, indexfile):
194 194 """
195 195 create a revlog object
196 196
197 197 opener is a function that abstracts the file opening operation
198 198 and can be used to implement COW semantics or the like.
199 199 """
200 200 self.indexfile = indexfile
201 201 self.datafile = indexfile[:-2] + ".d"
202 202 self.opener = opener
203 203 self._cache = None
204 204 self._basecache = None
205 205 self._chunkcache = (0, '')
206 206 self._chunkcachesize = 65536
207 207 self._maxchainlen = None
208 208 self.index = []
209 209 self._pcache = {}
210 210 self._nodecache = {nullid: nullrev}
211 211 self._nodepos = None
212 212
213 213 v = REVLOG_DEFAULT_VERSION
214 214 opts = getattr(opener, 'options', None)
215 215 if opts is not None:
216 216 if 'revlogv1' in opts:
217 217 if 'generaldelta' in opts:
218 218 v |= REVLOGGENERALDELTA
219 219 else:
220 220 v = 0
221 221 if 'chunkcachesize' in opts:
222 222 self._chunkcachesize = opts['chunkcachesize']
223 223 if 'maxchainlen' in opts:
224 224 self._maxchainlen = opts['maxchainlen']
225 225
226 226 if self._chunkcachesize <= 0:
227 227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 228 'than 0') % self._chunkcachesize)
229 229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 231 'of 2') % self._chunkcachesize)
232 232
233 233 i = ''
234 234 self._initempty = True
235 235 try:
236 236 f = self.opener(self.indexfile)
237 237 i = f.read()
238 238 f.close()
239 239 if len(i) > 0:
240 240 v = struct.unpack(versionformat, i[:4])[0]
241 241 self._initempty = False
242 242 except IOError, inst:
243 243 if inst.errno != errno.ENOENT:
244 244 raise
245 245
246 246 self.version = v
247 247 self._inline = v & REVLOGNGINLINEDATA
248 248 self._generaldelta = v & REVLOGGENERALDELTA
249 249 flags = v & ~0xFFFF
250 250 fmt = v & 0xFFFF
251 251 if fmt == REVLOGV0 and flags:
252 252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 253 % (self.indexfile, flags >> 16))
254 254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 256 % (self.indexfile, flags >> 16))
257 257 elif fmt > REVLOGNG:
258 258 raise RevlogError(_("index %s unknown format %d")
259 259 % (self.indexfile, fmt))
260 260
261 261 self._io = revlogio()
262 262 if self.version == REVLOGV0:
263 263 self._io = revlogoldio()
264 264 try:
265 265 d = self._io.parseindex(i, self._inline)
266 266 except (ValueError, IndexError):
267 267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 268 self.index, nodemap, self._chunkcache = d
269 269 if nodemap is not None:
270 270 self.nodemap = self._nodecache = nodemap
271 271 if not self._chunkcache:
272 272 self._chunkclear()
273 # revnum -> (chain-length, sum-delta-length)
274 self._chaininfocache = {}
273 275
274 276 def tip(self):
275 277 return self.node(len(self.index) - 2)
276 278 def __len__(self):
277 279 return len(self.index) - 1
278 280 def __iter__(self):
279 281 return iter(xrange(len(self)))
280 282 def revs(self, start=0, stop=None):
281 283 """iterate over all rev in this revlog (from start to stop)"""
282 284 step = 1
283 285 if stop is not None:
284 286 if start > stop:
285 287 step = -1
286 288 stop += step
287 289 else:
288 290 stop = len(self)
289 291 return xrange(start, stop, step)
290 292
291 293 @util.propertycache
292 294 def nodemap(self):
293 295 self.rev(self.node(0))
294 296 return self._nodecache
295 297
296 298 def hasnode(self, node):
297 299 try:
298 300 self.rev(node)
299 301 return True
300 302 except KeyError:
301 303 return False
302 304
303 305 def clearcaches(self):
304 306 try:
305 307 self._nodecache.clearcaches()
306 308 except AttributeError:
307 309 self._nodecache = {nullid: nullrev}
308 310 self._nodepos = None
309 311
310 312 def rev(self, node):
311 313 try:
312 314 return self._nodecache[node]
313 315 except TypeError:
314 316 raise
315 317 except RevlogError:
316 318 # parsers.c radix tree lookup failed
317 319 raise LookupError(node, self.indexfile, _('no node'))
318 320 except KeyError:
319 321 # pure python cache lookup failed
320 322 n = self._nodecache
321 323 i = self.index
322 324 p = self._nodepos
323 325 if p is None:
324 326 p = len(i) - 2
325 327 for r in xrange(p, -1, -1):
326 328 v = i[r][7]
327 329 n[v] = r
328 330 if v == node:
329 331 self._nodepos = r - 1
330 332 return r
331 333 raise LookupError(node, self.indexfile, _('no node'))
332 334
333 335 def node(self, rev):
334 336 return self.index[rev][7]
335 337 def linkrev(self, rev):
336 338 return self.index[rev][4]
337 339 def parents(self, node):
338 340 i = self.index
339 341 d = i[self.rev(node)]
340 342 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
341 343 def parentrevs(self, rev):
342 344 return self.index[rev][5:7]
343 345 def start(self, rev):
344 346 return int(self.index[rev][0] >> 16)
345 347 def end(self, rev):
346 348 return self.start(rev) + self.length(rev)
347 349 def length(self, rev):
348 350 return self.index[rev][1]
349 351 def chainbase(self, rev):
350 352 index = self.index
351 353 base = index[rev][3]
352 354 while base != rev:
353 355 rev = base
354 356 base = index[rev][3]
355 357 return base
356 358 def chainlen(self, rev):
357 359 return self._chaininfo(rev)[0]
360
358 361 def _chaininfo(self, rev):
362 chaininfocache = self._chaininfocache
363 if rev in chaininfocache:
364 return chaininfocache[rev]
359 365 index = self.index
360 366 generaldelta = self._generaldelta
361 367 iterrev = rev
362 368 e = index[iterrev]
363 369 clen = 0
364 370 compresseddeltalen = 0
365 371 while iterrev != e[3]:
366 372 clen += 1
367 373 compresseddeltalen += e[1]
368 374 if generaldelta:
369 375 iterrev = e[3]
370 376 else:
371 377 iterrev -= 1
378 if iterrev in chaininfocache:
379 t = chaininfocache[iterrev]
380 clen += t[0]
381 compresseddeltalen += t[1]
382 break
372 383 e = index[iterrev]
373 # add text length of base since decompressing that also takes work
384 else:
385 # Add text length of base since decompressing that also takes
386 # work. For cache hits the length is already included.
374 387 compresseddeltalen += e[1]
375 return clen, compresseddeltalen
388 r = (clen, compresseddeltalen)
389 chaininfocache[rev] = r
390 return r
391
376 392 def flags(self, rev):
377 393 return self.index[rev][0] & 0xFFFF
378 394 def rawsize(self, rev):
379 395 """return the length of the uncompressed text for a given revision"""
380 396 l = self.index[rev][2]
381 397 if l >= 0:
382 398 return l
383 399
384 400 t = self.revision(self.node(rev))
385 401 return len(t)
386 402 size = rawsize
387 403
388 404 def ancestors(self, revs, stoprev=0, inclusive=False):
389 405 """Generate the ancestors of 'revs' in reverse topological order.
390 406 Does not generate revs lower than stoprev.
391 407
392 408 See the documentation for ancestor.lazyancestors for more details."""
393 409
394 410 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
395 411 inclusive=inclusive)
396 412
397 413 def descendants(self, revs):
398 414 """Generate the descendants of 'revs' in revision order.
399 415
400 416 Yield a sequence of revision numbers starting with a child of
401 417 some rev in revs, i.e., each revision is *not* considered a
402 418 descendant of itself. Results are ordered by revision number (a
403 419 topological sort)."""
404 420 first = min(revs)
405 421 if first == nullrev:
406 422 for i in self:
407 423 yield i
408 424 return
409 425
410 426 seen = set(revs)
411 427 for i in self.revs(start=first + 1):
412 428 for x in self.parentrevs(i):
413 429 if x != nullrev and x in seen:
414 430 seen.add(i)
415 431 yield i
416 432 break
417 433
418 434 def findcommonmissing(self, common=None, heads=None):
419 435 """Return a tuple of the ancestors of common and the ancestors of heads
420 436 that are not ancestors of common. In revset terminology, we return the
421 437 tuple:
422 438
423 439 ::common, (::heads) - (::common)
424 440
425 441 The list is sorted by revision number, meaning it is
426 442 topologically sorted.
427 443
428 444 'heads' and 'common' are both lists of node IDs. If heads is
429 445 not supplied, uses all of the revlog's heads. If common is not
430 446 supplied, uses nullid."""
431 447 if common is None:
432 448 common = [nullid]
433 449 if heads is None:
434 450 heads = self.heads()
435 451
436 452 common = [self.rev(n) for n in common]
437 453 heads = [self.rev(n) for n in heads]
438 454
439 455 # we want the ancestors, but inclusive
440 456 class lazyset(object):
441 457 def __init__(self, lazyvalues):
442 458 self.addedvalues = set()
443 459 self.lazyvalues = lazyvalues
444 460
445 461 def __contains__(self, value):
446 462 return value in self.addedvalues or value in self.lazyvalues
447 463
448 464 def __iter__(self):
449 465 added = self.addedvalues
450 466 for r in added:
451 467 yield r
452 468 for r in self.lazyvalues:
453 469 if not r in added:
454 470 yield r
455 471
456 472 def add(self, value):
457 473 self.addedvalues.add(value)
458 474
459 475 def update(self, values):
460 476 self.addedvalues.update(values)
461 477
462 478 has = lazyset(self.ancestors(common))
463 479 has.add(nullrev)
464 480 has.update(common)
465 481
466 482 # take all ancestors from heads that aren't in has
467 483 missing = set()
468 484 visit = util.deque(r for r in heads if r not in has)
469 485 while visit:
470 486 r = visit.popleft()
471 487 if r in missing:
472 488 continue
473 489 else:
474 490 missing.add(r)
475 491 for p in self.parentrevs(r):
476 492 if p not in has:
477 493 visit.append(p)
478 494 missing = list(missing)
479 495 missing.sort()
480 496 return has, [self.node(r) for r in missing]
481 497
482 498 def findmissingrevs(self, common=None, heads=None):
483 499 """Return the revision numbers of the ancestors of heads that
484 500 are not ancestors of common.
485 501
486 502 More specifically, return a list of revision numbers corresponding to
487 503 nodes N such that every N satisfies the following constraints:
488 504
489 505 1. N is an ancestor of some node in 'heads'
490 506 2. N is not an ancestor of any node in 'common'
491 507
492 508 The list is sorted by revision number, meaning it is
493 509 topologically sorted.
494 510
495 511 'heads' and 'common' are both lists of revision numbers. If heads is
496 512 not supplied, uses all of the revlog's heads. If common is not
497 513 supplied, uses nullid."""
498 514 if common is None:
499 515 common = [nullrev]
500 516 if heads is None:
501 517 heads = self.headrevs()
502 518
503 519 return ancestor.missingancestors(heads, common, self.parentrevs)
504 520
505 521 def findmissing(self, common=None, heads=None):
506 522 """Return the ancestors of heads that are not ancestors of common.
507 523
508 524 More specifically, return a list of nodes N such that every N
509 525 satisfies the following constraints:
510 526
511 527 1. N is an ancestor of some node in 'heads'
512 528 2. N is not an ancestor of any node in 'common'
513 529
514 530 The list is sorted by revision number, meaning it is
515 531 topologically sorted.
516 532
517 533 'heads' and 'common' are both lists of node IDs. If heads is
518 534 not supplied, uses all of the revlog's heads. If common is not
519 535 supplied, uses nullid."""
520 536 if common is None:
521 537 common = [nullid]
522 538 if heads is None:
523 539 heads = self.heads()
524 540
525 541 common = [self.rev(n) for n in common]
526 542 heads = [self.rev(n) for n in heads]
527 543
528 544 return [self.node(r) for r in
529 545 ancestor.missingancestors(heads, common, self.parentrevs)]
530 546
531 547 def nodesbetween(self, roots=None, heads=None):
532 548 """Return a topological path from 'roots' to 'heads'.
533 549
534 550 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
535 551 topologically sorted list of all nodes N that satisfy both of
536 552 these constraints:
537 553
538 554 1. N is a descendant of some node in 'roots'
539 555 2. N is an ancestor of some node in 'heads'
540 556
541 557 Every node is considered to be both a descendant and an ancestor
542 558 of itself, so every reachable node in 'roots' and 'heads' will be
543 559 included in 'nodes'.
544 560
545 561 'outroots' is the list of reachable nodes in 'roots', i.e., the
546 562 subset of 'roots' that is returned in 'nodes'. Likewise,
547 563 'outheads' is the subset of 'heads' that is also in 'nodes'.
548 564
549 565 'roots' and 'heads' are both lists of node IDs. If 'roots' is
550 566 unspecified, uses nullid as the only root. If 'heads' is
551 567 unspecified, uses list of all of the revlog's heads."""
552 568 nonodes = ([], [], [])
553 569 if roots is not None:
554 570 roots = list(roots)
555 571 if not roots:
556 572 return nonodes
557 573 lowestrev = min([self.rev(n) for n in roots])
558 574 else:
559 575 roots = [nullid] # Everybody's a descendant of nullid
560 576 lowestrev = nullrev
561 577 if (lowestrev == nullrev) and (heads is None):
562 578 # We want _all_ the nodes!
563 579 return ([self.node(r) for r in self], [nullid], list(self.heads()))
564 580 if heads is None:
565 581 # All nodes are ancestors, so the latest ancestor is the last
566 582 # node.
567 583 highestrev = len(self) - 1
568 584 # Set ancestors to None to signal that every node is an ancestor.
569 585 ancestors = None
570 586 # Set heads to an empty dictionary for later discovery of heads
571 587 heads = {}
572 588 else:
573 589 heads = list(heads)
574 590 if not heads:
575 591 return nonodes
576 592 ancestors = set()
577 593 # Turn heads into a dictionary so we can remove 'fake' heads.
578 594 # Also, later we will be using it to filter out the heads we can't
579 595 # find from roots.
580 596 heads = dict.fromkeys(heads, False)
581 597 # Start at the top and keep marking parents until we're done.
582 598 nodestotag = set(heads)
583 599 # Remember where the top was so we can use it as a limit later.
584 600 highestrev = max([self.rev(n) for n in nodestotag])
585 601 while nodestotag:
586 602 # grab a node to tag
587 603 n = nodestotag.pop()
588 604 # Never tag nullid
589 605 if n == nullid:
590 606 continue
591 607 # A node's revision number represents its place in a
592 608 # topologically sorted list of nodes.
593 609 r = self.rev(n)
594 610 if r >= lowestrev:
595 611 if n not in ancestors:
596 612 # If we are possibly a descendant of one of the roots
597 613 # and we haven't already been marked as an ancestor
598 614 ancestors.add(n) # Mark as ancestor
599 615 # Add non-nullid parents to list of nodes to tag.
600 616 nodestotag.update([p for p in self.parents(n) if
601 617 p != nullid])
602 618 elif n in heads: # We've seen it before, is it a fake head?
603 619 # So it is, real heads should not be the ancestors of
604 620 # any other heads.
605 621 heads.pop(n)
606 622 if not ancestors:
607 623 return nonodes
608 624 # Now that we have our set of ancestors, we want to remove any
609 625 # roots that are not ancestors.
610 626
611 627 # If one of the roots was nullid, everything is included anyway.
612 628 if lowestrev > nullrev:
613 629 # But, since we weren't, let's recompute the lowest rev to not
614 630 # include roots that aren't ancestors.
615 631
616 632 # Filter out roots that aren't ancestors of heads
617 633 roots = [n for n in roots if n in ancestors]
618 634 # Recompute the lowest revision
619 635 if roots:
620 636 lowestrev = min([self.rev(n) for n in roots])
621 637 else:
622 638 # No more roots? Return empty list
623 639 return nonodes
624 640 else:
625 641 # We are descending from nullid, and don't need to care about
626 642 # any other roots.
627 643 lowestrev = nullrev
628 644 roots = [nullid]
629 645 # Transform our roots list into a set.
630 646 descendants = set(roots)
631 647 # Also, keep the original roots so we can filter out roots that aren't
632 648 # 'real' roots (i.e. are descended from other roots).
633 649 roots = descendants.copy()
634 650 # Our topologically sorted list of output nodes.
635 651 orderedout = []
636 652 # Don't start at nullid since we don't want nullid in our output list,
637 653 # and if nullid shows up in descendants, empty parents will look like
638 654 # they're descendants.
639 655 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
640 656 n = self.node(r)
641 657 isdescendant = False
642 658 if lowestrev == nullrev: # Everybody is a descendant of nullid
643 659 isdescendant = True
644 660 elif n in descendants:
645 661 # n is already a descendant
646 662 isdescendant = True
647 663 # This check only needs to be done here because all the roots
648 664 # will start being marked is descendants before the loop.
649 665 if n in roots:
650 666 # If n was a root, check if it's a 'real' root.
651 667 p = tuple(self.parents(n))
652 668 # If any of its parents are descendants, it's not a root.
653 669 if (p[0] in descendants) or (p[1] in descendants):
654 670 roots.remove(n)
655 671 else:
656 672 p = tuple(self.parents(n))
657 673 # A node is a descendant if either of its parents are
658 674 # descendants. (We seeded the dependents list with the roots
659 675 # up there, remember?)
660 676 if (p[0] in descendants) or (p[1] in descendants):
661 677 descendants.add(n)
662 678 isdescendant = True
663 679 if isdescendant and ((ancestors is None) or (n in ancestors)):
664 680 # Only include nodes that are both descendants and ancestors.
665 681 orderedout.append(n)
666 682 if (ancestors is not None) and (n in heads):
667 683 # We're trying to figure out which heads are reachable
668 684 # from roots.
669 685 # Mark this head as having been reached
670 686 heads[n] = True
671 687 elif ancestors is None:
672 688 # Otherwise, we're trying to discover the heads.
673 689 # Assume this is a head because if it isn't, the next step
674 690 # will eventually remove it.
675 691 heads[n] = True
676 692 # But, obviously its parents aren't.
677 693 for p in self.parents(n):
678 694 heads.pop(p, None)
679 695 heads = [n for n, flag in heads.iteritems() if flag]
680 696 roots = list(roots)
681 697 assert orderedout
682 698 assert roots
683 699 assert heads
684 700 return (orderedout, roots, heads)
685 701
686 702 def headrevs(self):
687 703 try:
688 704 return self.index.headrevs()
689 705 except AttributeError:
690 706 return self._headrevs()
691 707
692 708 def _headrevs(self):
693 709 count = len(self)
694 710 if not count:
695 711 return [nullrev]
696 712 # we won't iter over filtered rev so nobody is a head at start
697 713 ishead = [0] * (count + 1)
698 714 index = self.index
699 715 for r in self:
700 716 ishead[r] = 1 # I may be an head
701 717 e = index[r]
702 718 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
703 719 return [r for r, val in enumerate(ishead) if val]
704 720
705 721 def heads(self, start=None, stop=None):
706 722 """return the list of all nodes that have no children
707 723
708 724 if start is specified, only heads that are descendants of
709 725 start will be returned
710 726 if stop is specified, it will consider all the revs from stop
711 727 as if they had no children
712 728 """
713 729 if start is None and stop is None:
714 730 if not len(self):
715 731 return [nullid]
716 732 return [self.node(r) for r in self.headrevs()]
717 733
718 734 if start is None:
719 735 start = nullid
720 736 if stop is None:
721 737 stop = []
722 738 stoprevs = set([self.rev(n) for n in stop])
723 739 startrev = self.rev(start)
724 740 reachable = set((startrev,))
725 741 heads = set((startrev,))
726 742
727 743 parentrevs = self.parentrevs
728 744 for r in self.revs(start=startrev + 1):
729 745 for p in parentrevs(r):
730 746 if p in reachable:
731 747 if r not in stoprevs:
732 748 reachable.add(r)
733 749 heads.add(r)
734 750 if p in heads and p not in stoprevs:
735 751 heads.remove(p)
736 752
737 753 return [self.node(r) for r in heads]
738 754
739 755 def children(self, node):
740 756 """find the children of a given node"""
741 757 c = []
742 758 p = self.rev(node)
743 759 for r in self.revs(start=p + 1):
744 760 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
745 761 if prevs:
746 762 for pr in prevs:
747 763 if pr == p:
748 764 c.append(self.node(r))
749 765 elif p == nullrev:
750 766 c.append(self.node(r))
751 767 return c
752 768
753 769 def descendant(self, start, end):
754 770 if start == nullrev:
755 771 return True
756 772 for i in self.descendants([start]):
757 773 if i == end:
758 774 return True
759 775 elif i > end:
760 776 break
761 777 return False
762 778
763 779 def commonancestorsheads(self, a, b):
764 780 """calculate all the heads of the common ancestors of nodes a and b"""
765 781 a, b = self.rev(a), self.rev(b)
766 782 try:
767 783 ancs = self.index.commonancestorsheads(a, b)
768 784 except (AttributeError, OverflowError): # C implementation failed
769 785 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
770 786 return map(self.node, ancs)
771 787
772 788 def isancestor(self, a, b):
773 789 """return True if node a is an ancestor of node b
774 790
775 791 The implementation of this is trivial but the use of
776 792 commonancestorsheads is not."""
777 793 return a in self.commonancestorsheads(a, b)
778 794
779 795 def ancestor(self, a, b):
780 796 """calculate the "best" common ancestor of nodes a and b"""
781 797
782 798 a, b = self.rev(a), self.rev(b)
783 799 try:
784 800 ancs = self.index.ancestors(a, b)
785 801 except (AttributeError, OverflowError):
786 802 ancs = ancestor.ancestors(self.parentrevs, a, b)
787 803 if ancs:
788 804 # choose a consistent winner when there's a tie
789 805 return min(map(self.node, ancs))
790 806 return nullid
791 807
792 808 def _match(self, id):
793 809 if isinstance(id, int):
794 810 # rev
795 811 return self.node(id)
796 812 if len(id) == 20:
797 813 # possibly a binary node
798 814 # odds of a binary node being all hex in ASCII are 1 in 10**25
799 815 try:
800 816 node = id
801 817 self.rev(node) # quick search the index
802 818 return node
803 819 except LookupError:
804 820 pass # may be partial hex id
805 821 try:
806 822 # str(rev)
807 823 rev = int(id)
808 824 if str(rev) != id:
809 825 raise ValueError
810 826 if rev < 0:
811 827 rev = len(self) + rev
812 828 if rev < 0 or rev >= len(self):
813 829 raise ValueError
814 830 return self.node(rev)
815 831 except (ValueError, OverflowError):
816 832 pass
817 833 if len(id) == 40:
818 834 try:
819 835 # a full hex nodeid?
820 836 node = bin(id)
821 837 self.rev(node)
822 838 return node
823 839 except (TypeError, LookupError):
824 840 pass
825 841
826 842 def _partialmatch(self, id):
827 843 try:
828 844 n = self.index.partialmatch(id)
829 845 if n and self.hasnode(n):
830 846 return n
831 847 return None
832 848 except RevlogError:
833 849 # parsers.c radix tree lookup gave multiple matches
834 850 # fall through to slow path that filters hidden revisions
835 851 pass
836 852 except (AttributeError, ValueError):
837 853 # we are pure python, or key was too short to search radix tree
838 854 pass
839 855
840 856 if id in self._pcache:
841 857 return self._pcache[id]
842 858
843 859 if len(id) < 40:
844 860 try:
845 861 # hex(node)[:...]
846 862 l = len(id) // 2 # grab an even number of digits
847 863 prefix = bin(id[:l * 2])
848 864 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
849 865 nl = [n for n in nl if hex(n).startswith(id) and
850 866 self.hasnode(n)]
851 867 if len(nl) > 0:
852 868 if len(nl) == 1:
853 869 self._pcache[id] = nl[0]
854 870 return nl[0]
855 871 raise LookupError(id, self.indexfile,
856 872 _('ambiguous identifier'))
857 873 return None
858 874 except TypeError:
859 875 pass
860 876
861 877 def lookup(self, id):
862 878 """locate a node based on:
863 879 - revision number or str(revision number)
864 880 - nodeid or subset of hex nodeid
865 881 """
866 882 n = self._match(id)
867 883 if n is not None:
868 884 return n
869 885 n = self._partialmatch(id)
870 886 if n:
871 887 return n
872 888
873 889 raise LookupError(id, self.indexfile, _('no match found'))
874 890
875 891 def cmp(self, node, text):
876 892 """compare text with a given file revision
877 893
878 894 returns True if text is different than what is stored.
879 895 """
880 896 p1, p2 = self.parents(node)
881 897 return hash(text, p1, p2) != node
882 898
883 899 def _addchunk(self, offset, data):
884 900 o, d = self._chunkcache
885 901 # try to add to existing cache
886 902 if o + len(d) == offset and len(d) + len(data) < _chunksize:
887 903 self._chunkcache = o, d + data
888 904 else:
889 905 self._chunkcache = offset, data
890 906
891 907 def _loadchunk(self, offset, length):
892 908 if self._inline:
893 909 df = self.opener(self.indexfile)
894 910 else:
895 911 df = self.opener(self.datafile)
896 912
897 913 # Cache data both forward and backward around the requested
898 914 # data, in a fixed size window. This helps speed up operations
899 915 # involving reading the revlog backwards.
900 916 cachesize = self._chunkcachesize
901 917 realoffset = offset & ~(cachesize - 1)
902 918 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
903 919 - realoffset)
904 920 df.seek(realoffset)
905 921 d = df.read(reallength)
906 922 df.close()
907 923 self._addchunk(realoffset, d)
908 924 if offset != realoffset or reallength != length:
909 925 return util.buffer(d, offset - realoffset, length)
910 926 return d
911 927
912 928 def _getchunk(self, offset, length):
913 929 o, d = self._chunkcache
914 930 l = len(d)
915 931
916 932 # is it in the cache?
917 933 cachestart = offset - o
918 934 cacheend = cachestart + length
919 935 if cachestart >= 0 and cacheend <= l:
920 936 if cachestart == 0 and cacheend == l:
921 937 return d # avoid a copy
922 938 return util.buffer(d, cachestart, cacheend - cachestart)
923 939
924 940 return self._loadchunk(offset, length)
925 941
926 942 def _chunkraw(self, startrev, endrev):
927 943 start = self.start(startrev)
928 944 end = self.end(endrev)
929 945 if self._inline:
930 946 start += (startrev + 1) * self._io.size
931 947 end += (endrev + 1) * self._io.size
932 948 length = end - start
933 949 return self._getchunk(start, length)
934 950
935 951 def _chunk(self, rev):
936 952 return decompress(self._chunkraw(rev, rev))
937 953
938 954 def _chunks(self, revs):
939 955 '''faster version of [self._chunk(rev) for rev in revs]
940 956
941 957 Assumes that revs is in ascending order.'''
942 958 if not revs:
943 959 return []
944 960 start = self.start
945 961 length = self.length
946 962 inline = self._inline
947 963 iosize = self._io.size
948 964 buffer = util.buffer
949 965
950 966 l = []
951 967 ladd = l.append
952 968
953 969 # preload the cache
954 970 try:
955 971 while True:
956 972 # ensure that the cache doesn't change out from under us
957 973 _cache = self._chunkcache
958 974 self._chunkraw(revs[0], revs[-1])
959 975 if _cache == self._chunkcache:
960 976 break
961 977 offset, data = _cache
962 978 except OverflowError:
963 979 # issue4215 - we can't cache a run of chunks greater than
964 980 # 2G on Windows
965 981 return [self._chunk(rev) for rev in revs]
966 982
967 983 for rev in revs:
968 984 chunkstart = start(rev)
969 985 if inline:
970 986 chunkstart += (rev + 1) * iosize
971 987 chunklength = length(rev)
972 988 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
973 989
974 990 return l
975 991
976 992 def _chunkclear(self):
977 993 self._chunkcache = (0, '')
978 994
979 995 def deltaparent(self, rev):
980 996 """return deltaparent of the given revision"""
981 997 base = self.index[rev][3]
982 998 if base == rev:
983 999 return nullrev
984 1000 elif self._generaldelta:
985 1001 return base
986 1002 else:
987 1003 return rev - 1
988 1004
989 1005 def revdiff(self, rev1, rev2):
990 1006 """return or calculate a delta between two revisions"""
991 1007 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
992 1008 return str(self._chunk(rev2))
993 1009
994 1010 return mdiff.textdiff(self.revision(rev1),
995 1011 self.revision(rev2))
996 1012
997 1013 def revision(self, nodeorrev):
998 1014 """return an uncompressed revision of a given node or revision
999 1015 number.
1000 1016 """
1001 1017 if isinstance(nodeorrev, int):
1002 1018 rev = nodeorrev
1003 1019 node = self.node(rev)
1004 1020 else:
1005 1021 node = nodeorrev
1006 1022 rev = None
1007 1023
1008 1024 _cache = self._cache # grab local copy of cache to avoid thread race
1009 1025 cachedrev = None
1010 1026 if node == nullid:
1011 1027 return ""
1012 1028 if _cache:
1013 1029 if _cache[0] == node:
1014 1030 return _cache[2]
1015 1031 cachedrev = _cache[1]
1016 1032
1017 1033 # look up what we need to read
1018 1034 text = None
1019 1035 if rev is None:
1020 1036 rev = self.rev(node)
1021 1037
1022 1038 # check rev flags
1023 1039 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1024 1040 raise RevlogError(_('incompatible revision flag %x') %
1025 1041 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1026 1042
1027 1043 # build delta chain
1028 1044 chain = []
1029 1045 index = self.index # for performance
1030 1046 generaldelta = self._generaldelta
1031 1047 iterrev = rev
1032 1048 e = index[iterrev]
1033 1049 while iterrev != e[3] and iterrev != cachedrev:
1034 1050 chain.append(iterrev)
1035 1051 if generaldelta:
1036 1052 iterrev = e[3]
1037 1053 else:
1038 1054 iterrev -= 1
1039 1055 e = index[iterrev]
1040 1056
1041 1057 if iterrev == cachedrev:
1042 1058 # cache hit
1043 1059 text = _cache[2]
1044 1060 else:
1045 1061 chain.append(iterrev)
1046 1062 chain.reverse()
1047 1063
1048 1064 # drop cache to save memory
1049 1065 self._cache = None
1050 1066
1051 1067 bins = self._chunks(chain)
1052 1068 if text is None:
1053 1069 text = str(bins[0])
1054 1070 bins = bins[1:]
1055 1071
1056 1072 text = mdiff.patches(text, bins)
1057 1073
1058 1074 text = self._checkhash(text, node, rev)
1059 1075
1060 1076 self._cache = (node, rev, text)
1061 1077 return text
1062 1078
1063 1079 def hash(self, text, p1, p2):
1064 1080 """Compute a node hash.
1065 1081
1066 1082 Available as a function so that subclasses can replace the hash
1067 1083 as needed.
1068 1084 """
1069 1085 return hash(text, p1, p2)
1070 1086
1071 1087 def _checkhash(self, text, node, rev):
1072 1088 p1, p2 = self.parents(node)
1073 1089 self.checkhash(text, p1, p2, node, rev)
1074 1090 return text
1075 1091
1076 1092 def checkhash(self, text, p1, p2, node, rev=None):
1077 1093 if node != self.hash(text, p1, p2):
1078 1094 revornode = rev
1079 1095 if revornode is None:
1080 1096 revornode = templatefilters.short(hex(node))
1081 1097 raise RevlogError(_("integrity check failed on %s:%s")
1082 1098 % (self.indexfile, revornode))
1083 1099
1084 1100 def checkinlinesize(self, tr, fp=None):
1085 1101 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1086 1102 return
1087 1103
1088 1104 trinfo = tr.find(self.indexfile)
1089 1105 if trinfo is None:
1090 1106 raise RevlogError(_("%s not found in the transaction")
1091 1107 % self.indexfile)
1092 1108
1093 1109 trindex = trinfo[2]
1094 1110 dataoff = self.start(trindex)
1095 1111
1096 1112 tr.add(self.datafile, dataoff)
1097 1113
1098 1114 if fp:
1099 1115 fp.flush()
1100 1116 fp.close()
1101 1117
1102 1118 df = self.opener(self.datafile, 'w')
1103 1119 try:
1104 1120 for r in self:
1105 1121 df.write(self._chunkraw(r, r))
1106 1122 finally:
1107 1123 df.close()
1108 1124
1109 1125 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1110 1126 self.version &= ~(REVLOGNGINLINEDATA)
1111 1127 self._inline = False
1112 1128 for i in self:
1113 1129 e = self._io.packentry(self.index[i], self.node, self.version, i)
1114 1130 fp.write(e)
1115 1131
1116 1132 # if we don't call close, the temp file will never replace the
1117 1133 # real index
1118 1134 fp.close()
1119 1135
1120 1136 tr.replace(self.indexfile, trindex * self._io.size)
1121 1137 self._chunkclear()
1122 1138
1123 1139 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1124 1140 node=None):
1125 1141 """add a revision to the log
1126 1142
1127 1143 text - the revision data to add
1128 1144 transaction - the transaction object used for rollback
1129 1145 link - the linkrev data to add
1130 1146 p1, p2 - the parent nodeids of the revision
1131 1147 cachedelta - an optional precomputed delta
1132 1148 node - nodeid of revision; typically node is not specified, and it is
1133 1149 computed by default as hash(text, p1, p2), however subclasses might
1134 1150 use different hashing method (and override checkhash() in such case)
1135 1151 """
1136 1152 if link == nullrev:
1137 1153 raise RevlogError(_("attempted to add linkrev -1 to %s")
1138 1154 % self.indexfile)
1139 1155 node = node or self.hash(text, p1, p2)
1140 1156 if node in self.nodemap:
1141 1157 return node
1142 1158
1143 1159 dfh = None
1144 1160 if not self._inline:
1145 1161 dfh = self.opener(self.datafile, "a")
1146 1162 ifh = self.opener(self.indexfile, "a+")
1147 1163 try:
1148 1164 return self._addrevision(node, text, transaction, link, p1, p2,
1149 1165 cachedelta, ifh, dfh)
1150 1166 finally:
1151 1167 if dfh:
1152 1168 dfh.close()
1153 1169 ifh.close()
1154 1170
1155 1171 def compress(self, text):
1156 1172 """ generate a possibly-compressed representation of text """
1157 1173 if not text:
1158 1174 return ("", text)
1159 1175 l = len(text)
1160 1176 bin = None
1161 1177 if l < 44:
1162 1178 pass
1163 1179 elif l > 1000000:
1164 1180 # zlib makes an internal copy, thus doubling memory usage for
1165 1181 # large files, so lets do this in pieces
1166 1182 z = zlib.compressobj()
1167 1183 p = []
1168 1184 pos = 0
1169 1185 while pos < l:
1170 1186 pos2 = pos + 2**20
1171 1187 p.append(z.compress(text[pos:pos2]))
1172 1188 pos = pos2
1173 1189 p.append(z.flush())
1174 1190 if sum(map(len, p)) < l:
1175 1191 bin = "".join(p)
1176 1192 else:
1177 1193 bin = _compress(text)
1178 1194 if bin is None or len(bin) > l:
1179 1195 if text[0] == '\0':
1180 1196 return ("", text)
1181 1197 return ('u', text)
1182 1198 return ("", bin)
1183 1199
1184 1200 def _addrevision(self, node, text, transaction, link, p1, p2,
1185 1201 cachedelta, ifh, dfh):
1186 1202 """internal function to add revisions to the log
1187 1203
1188 1204 see addrevision for argument descriptions.
1189 1205 invariants:
1190 1206 - text is optional (can be None); if not set, cachedelta must be set.
1191 1207 if both are set, they must correspond to each other.
1192 1208 """
1193 1209 btext = [text]
1194 1210 def buildtext():
1195 1211 if btext[0] is not None:
1196 1212 return btext[0]
1197 1213 # flush any pending writes here so we can read it in revision
1198 1214 if dfh:
1199 1215 dfh.flush()
1200 1216 ifh.flush()
1201 1217 basetext = self.revision(self.node(cachedelta[0]))
1202 1218 btext[0] = mdiff.patch(basetext, cachedelta[1])
1203 1219 try:
1204 1220 self.checkhash(btext[0], p1, p2, node)
1205 1221 except CensoredNodeError:
1206 1222 pass # always import a censor tombstone.
1207 1223 return btext[0]
1208 1224
1209 1225 def builddelta(rev):
1210 1226 # can we use the cached delta?
1211 1227 if cachedelta and cachedelta[0] == rev:
1212 1228 delta = cachedelta[1]
1213 1229 else:
1214 1230 t = buildtext()
1215 1231 ptext = self.revision(self.node(rev))
1216 1232 delta = mdiff.textdiff(ptext, t)
1217 1233 data = self.compress(delta)
1218 1234 l = len(data[1]) + len(data[0])
1219 1235 if basecache[0] == rev:
1220 1236 chainbase = basecache[1]
1221 1237 else:
1222 1238 chainbase = self.chainbase(rev)
1223 1239 dist = l + offset - self.start(chainbase)
1224 1240 if self._generaldelta:
1225 1241 base = rev
1226 1242 else:
1227 1243 base = chainbase
1228 1244 chainlen, compresseddeltalen = self._chaininfo(rev)
1229 1245 chainlen += 1
1230 1246 compresseddeltalen += l
1231 1247 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1232 1248
1233 1249 curr = len(self)
1234 1250 prev = curr - 1
1235 1251 base = chainbase = curr
1236 1252 chainlen = None
1237 1253 offset = self.end(prev)
1238 1254 flags = 0
1239 1255 d = None
1240 1256 if self._basecache is None:
1241 1257 self._basecache = (prev, self.chainbase(prev))
1242 1258 basecache = self._basecache
1243 1259 p1r, p2r = self.rev(p1), self.rev(p2)
1244 1260
1245 1261 # should we try to build a delta?
1246 1262 if prev != nullrev:
1247 1263 if self._generaldelta:
1248 1264 if p1r >= basecache[1]:
1249 1265 d = builddelta(p1r)
1250 1266 elif p2r >= basecache[1]:
1251 1267 d = builddelta(p2r)
1252 1268 else:
1253 1269 d = builddelta(prev)
1254 1270 else:
1255 1271 d = builddelta(prev)
1256 1272 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1257 1273
1258 1274 # full versions are inserted when the needed deltas
1259 1275 # become comparable to the uncompressed text
1260 1276 if text is None:
1261 1277 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1262 1278 cachedelta[1])
1263 1279 else:
1264 1280 textlen = len(text)
1265 1281
1266 1282 # - 'dist' is the distance from the base revision -- bounding it limits
1267 1283 # the amount of I/O we need to do.
1268 1284 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1269 1285 # to apply -- bounding it limits the amount of CPU we consume.
1270 1286 if (d is None or dist > textlen * 4 or l > textlen or
1271 1287 compresseddeltalen > textlen * 2 or
1272 1288 (self._maxchainlen and chainlen > self._maxchainlen)):
1273 1289 text = buildtext()
1274 1290 data = self.compress(text)
1275 1291 l = len(data[1]) + len(data[0])
1276 1292 base = chainbase = curr
1277 1293
1278 1294 e = (offset_type(offset, flags), l, textlen,
1279 1295 base, link, p1r, p2r, node)
1280 1296 self.index.insert(-1, e)
1281 1297 self.nodemap[node] = curr
1282 1298
1283 1299 entry = self._io.packentry(e, self.node, self.version, curr)
1284 1300 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1285 1301
1286 1302 if type(text) == str: # only accept immutable objects
1287 1303 self._cache = (node, curr, text)
1288 1304 self._basecache = (curr, chainbase)
1289 1305 return node
1290 1306
1291 1307 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1292 1308 curr = len(self) - 1
1293 1309 if not self._inline:
1294 1310 transaction.add(self.datafile, offset)
1295 1311 transaction.add(self.indexfile, curr * len(entry))
1296 1312 if data[0]:
1297 1313 dfh.write(data[0])
1298 1314 dfh.write(data[1])
1299 1315 dfh.flush()
1300 1316 ifh.write(entry)
1301 1317 else:
1302 1318 offset += curr * self._io.size
1303 1319 transaction.add(self.indexfile, offset, curr)
1304 1320 ifh.write(entry)
1305 1321 ifh.write(data[0])
1306 1322 ifh.write(data[1])
1307 1323 self.checkinlinesize(transaction, ifh)
1308 1324
1309 1325 def addgroup(self, bundle, linkmapper, transaction):
1310 1326 """
1311 1327 add a delta group
1312 1328
1313 1329 given a set of deltas, add them to the revision log. the
1314 1330 first delta is against its parent, which should be in our
1315 1331 log, the rest are against the previous delta.
1316 1332 """
1317 1333
1318 1334 # track the base of the current delta log
1319 1335 content = []
1320 1336 node = None
1321 1337
1322 1338 r = len(self)
1323 1339 end = 0
1324 1340 if r:
1325 1341 end = self.end(r - 1)
1326 1342 ifh = self.opener(self.indexfile, "a+")
1327 1343 isize = r * self._io.size
1328 1344 if self._inline:
1329 1345 transaction.add(self.indexfile, end + isize, r)
1330 1346 dfh = None
1331 1347 else:
1332 1348 transaction.add(self.indexfile, isize, r)
1333 1349 transaction.add(self.datafile, end)
1334 1350 dfh = self.opener(self.datafile, "a")
1335 1351
1336 1352 try:
1337 1353 # loop through our set of deltas
1338 1354 chain = None
1339 1355 while True:
1340 1356 chunkdata = bundle.deltachunk(chain)
1341 1357 if not chunkdata:
1342 1358 break
1343 1359 node = chunkdata['node']
1344 1360 p1 = chunkdata['p1']
1345 1361 p2 = chunkdata['p2']
1346 1362 cs = chunkdata['cs']
1347 1363 deltabase = chunkdata['deltabase']
1348 1364 delta = chunkdata['delta']
1349 1365
1350 1366 content.append(node)
1351 1367
1352 1368 link = linkmapper(cs)
1353 1369 if node in self.nodemap:
1354 1370 # this can happen if two branches make the same change
1355 1371 chain = node
1356 1372 continue
1357 1373
1358 1374 for p in (p1, p2):
1359 1375 if p not in self.nodemap:
1360 1376 raise LookupError(p, self.indexfile,
1361 1377 _('unknown parent'))
1362 1378
1363 1379 if deltabase not in self.nodemap:
1364 1380 raise LookupError(deltabase, self.indexfile,
1365 1381 _('unknown delta base'))
1366 1382
1367 1383 baserev = self.rev(deltabase)
1368 1384 chain = self._addrevision(node, None, transaction, link,
1369 1385 p1, p2, (baserev, delta), ifh, dfh)
1370 1386 if not dfh and not self._inline:
1371 1387 # addrevision switched from inline to conventional
1372 1388 # reopen the index
1373 1389 ifh.close()
1374 1390 dfh = self.opener(self.datafile, "a")
1375 1391 ifh = self.opener(self.indexfile, "a")
1376 1392 finally:
1377 1393 if dfh:
1378 1394 dfh.close()
1379 1395 ifh.close()
1380 1396
1381 1397 return content
1382 1398
1383 1399 def getstrippoint(self, minlink):
1384 1400 """find the minimum rev that must be stripped to strip the linkrev
1385 1401
1386 1402 Returns a tuple containing the minimum rev and a set of all revs that
1387 1403 have linkrevs that will be broken by this strip.
1388 1404 """
1389 1405 brokenrevs = set()
1390 1406 strippoint = len(self)
1391 1407
1392 1408 heads = {}
1393 1409 futurelargelinkrevs = set()
1394 1410 for head in self.headrevs():
1395 1411 headlinkrev = self.linkrev(head)
1396 1412 heads[head] = headlinkrev
1397 1413 if headlinkrev >= minlink:
1398 1414 futurelargelinkrevs.add(headlinkrev)
1399 1415
1400 1416 # This algorithm involves walking down the rev graph, starting at the
1401 1417 # heads. Since the revs are topologically sorted according to linkrev,
1402 1418 # once all head linkrevs are below the minlink, we know there are
1403 1419 # no more revs that could have a linkrev greater than minlink.
1404 1420 # So we can stop walking.
1405 1421 while futurelargelinkrevs:
1406 1422 strippoint -= 1
1407 1423 linkrev = heads.pop(strippoint)
1408 1424
1409 1425 if linkrev < minlink:
1410 1426 brokenrevs.add(strippoint)
1411 1427 else:
1412 1428 futurelargelinkrevs.remove(linkrev)
1413 1429
1414 1430 for p in self.parentrevs(strippoint):
1415 1431 if p != nullrev:
1416 1432 plinkrev = self.linkrev(p)
1417 1433 heads[p] = plinkrev
1418 1434 if plinkrev >= minlink:
1419 1435 futurelargelinkrevs.add(plinkrev)
1420 1436
1421 1437 return strippoint, brokenrevs
1422 1438
1423 1439 def strip(self, minlink, transaction):
1424 1440 """truncate the revlog on the first revision with a linkrev >= minlink
1425 1441
1426 1442 This function is called when we're stripping revision minlink and
1427 1443 its descendants from the repository.
1428 1444
1429 1445 We have to remove all revisions with linkrev >= minlink, because
1430 1446 the equivalent changelog revisions will be renumbered after the
1431 1447 strip.
1432 1448
1433 1449 So we truncate the revlog on the first of these revisions, and
1434 1450 trust that the caller has saved the revisions that shouldn't be
1435 1451 removed and that it'll re-add them after this truncation.
1436 1452 """
1437 1453 if len(self) == 0:
1438 1454 return
1439 1455
1440 1456 rev, _ = self.getstrippoint(minlink)
1441 1457 if rev == len(self):
1442 1458 return
1443 1459
1444 1460 # first truncate the files on disk
1445 1461 end = self.start(rev)
1446 1462 if not self._inline:
1447 1463 transaction.add(self.datafile, end)
1448 1464 end = rev * self._io.size
1449 1465 else:
1450 1466 end += rev * self._io.size
1451 1467
1452 1468 transaction.add(self.indexfile, end)
1453 1469
1454 1470 # then reset internal state in memory to forget those revisions
1455 1471 self._cache = None
1472 self._chaininfocache = {}
1456 1473 self._chunkclear()
1457 1474 for x in xrange(rev, len(self)):
1458 1475 del self.nodemap[self.node(x)]
1459 1476
1460 1477 del self.index[rev:-1]
1461 1478
1462 1479 def checksize(self):
1463 1480 expected = 0
1464 1481 if len(self):
1465 1482 expected = max(0, self.end(len(self) - 1))
1466 1483
1467 1484 try:
1468 1485 f = self.opener(self.datafile)
1469 1486 f.seek(0, 2)
1470 1487 actual = f.tell()
1471 1488 f.close()
1472 1489 dd = actual - expected
1473 1490 except IOError, inst:
1474 1491 if inst.errno != errno.ENOENT:
1475 1492 raise
1476 1493 dd = 0
1477 1494
1478 1495 try:
1479 1496 f = self.opener(self.indexfile)
1480 1497 f.seek(0, 2)
1481 1498 actual = f.tell()
1482 1499 f.close()
1483 1500 s = self._io.size
1484 1501 i = max(0, actual // s)
1485 1502 di = actual - (i * s)
1486 1503 if self._inline:
1487 1504 databytes = 0
1488 1505 for r in self:
1489 1506 databytes += max(0, self.length(r))
1490 1507 dd = 0
1491 1508 di = actual - len(self) * s - databytes
1492 1509 except IOError, inst:
1493 1510 if inst.errno != errno.ENOENT:
1494 1511 raise
1495 1512 di = 0
1496 1513
1497 1514 return (dd, di)
1498 1515
1499 1516 def files(self):
1500 1517 res = [self.indexfile]
1501 1518 if not self._inline:
1502 1519 res.append(self.datafile)
1503 1520 return res
General Comments 0
You need to be logged in to leave comments. Login now