##// END OF EJS Templates
revlog: increase I/O bound to 4x the amount of data consumed...
Siddharth Agarwal -
r23288:2b9bc796 default
parent child Browse files
Show More
@@ -1,1503 +1,1503 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45 CensoredNodeError = error.CensoredNodeError
46 46
47 47 def getoffset(q):
48 48 return int(q >> 16)
49 49
50 50 def gettype(q):
51 51 return int(q & 0xFFFF)
52 52
53 53 def offset_type(offset, type):
54 54 return long(long(offset) << 16 | type)
55 55
56 56 _nullhash = _sha(nullid)
57 57
58 58 def hash(text, p1, p2):
59 59 """generate a hash from the given text and its parent hashes
60 60
61 61 This hash combines both the current file contents and its history
62 62 in a manner that makes it easy to distinguish nodes with the same
63 63 content in the revision graph.
64 64 """
65 65 # As of now, if one of the parent node is null, p2 is null
66 66 if p2 == nullid:
67 67 # deep copy of a hash is faster than creating one
68 68 s = _nullhash.copy()
69 69 s.update(p1)
70 70 else:
71 71 # none of the parent nodes are nullid
72 72 l = [p1, p2]
73 73 l.sort()
74 74 s = _sha(l[0])
75 75 s.update(l[1])
76 76 s.update(text)
77 77 return s.digest()
78 78
79 79 def decompress(bin):
80 80 """ decompress the given input """
81 81 if not bin:
82 82 return bin
83 83 t = bin[0]
84 84 if t == '\0':
85 85 return bin
86 86 if t == 'x':
87 87 try:
88 88 return _decompress(bin)
89 89 except zlib.error, e:
90 90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 91 if t == 'u':
92 92 return bin[1:]
93 93 raise RevlogError(_("unknown compression type %r") % t)
94 94
95 95 # index v0:
96 96 # 4 bytes: offset
97 97 # 4 bytes: compressed length
98 98 # 4 bytes: base rev
99 99 # 4 bytes: link rev
100 100 # 32 bytes: parent 1 nodeid
101 101 # 32 bytes: parent 2 nodeid
102 102 # 32 bytes: nodeid
103 103 indexformatv0 = ">4l20s20s20s"
104 104 v0shaoffset = 56
105 105
106 106 class revlogoldio(object):
107 107 def __init__(self):
108 108 self.size = struct.calcsize(indexformatv0)
109 109
110 110 def parseindex(self, data, inline):
111 111 s = self.size
112 112 index = []
113 113 nodemap = {nullid: nullrev}
114 114 n = off = 0
115 115 l = len(data)
116 116 while off + s <= l:
117 117 cur = data[off:off + s]
118 118 off += s
119 119 e = _unpack(indexformatv0, cur)
120 120 # transform to revlogv1 format
121 121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 123 index.append(e2)
124 124 nodemap[e[6]] = n
125 125 n += 1
126 126
127 127 # add the magic null revision at -1
128 128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129 129
130 130 return index, nodemap, None
131 131
132 132 def packentry(self, entry, node, version, rev):
133 133 if gettype(entry[0]):
134 134 raise RevlogError(_("index entry flags need RevlogNG"))
135 135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 136 node(entry[5]), node(entry[6]), entry[7])
137 137 return _pack(indexformatv0, *e2)
138 138
139 139 # index ng:
140 140 # 6 bytes: offset
141 141 # 2 bytes: flags
142 142 # 4 bytes: compressed length
143 143 # 4 bytes: uncompressed length
144 144 # 4 bytes: base rev
145 145 # 4 bytes: link rev
146 146 # 4 bytes: parent 1 rev
147 147 # 4 bytes: parent 2 rev
148 148 # 32 bytes: nodeid
149 149 indexformatng = ">Qiiiiii20s12x"
150 150 ngshaoffset = 32
151 151 versionformat = ">I"
152 152
153 153 class revlogio(object):
154 154 def __init__(self):
155 155 self.size = struct.calcsize(indexformatng)
156 156
157 157 def parseindex(self, data, inline):
158 158 # call the C implementation to parse the index data
159 159 index, cache = parsers.parse_index2(data, inline)
160 160 return index, getattr(index, 'nodemap', None), cache
161 161
162 162 def packentry(self, entry, node, version, rev):
163 163 p = _pack(indexformatng, *entry)
164 164 if rev == 0:
165 165 p = _pack(versionformat, version) + p[4:]
166 166 return p
167 167
168 168 class revlog(object):
169 169 """
170 170 the underlying revision storage object
171 171
172 172 A revlog consists of two parts, an index and the revision data.
173 173
174 174 The index is a file with a fixed record size containing
175 175 information on each revision, including its nodeid (hash), the
176 176 nodeids of its parents, the position and offset of its data within
177 177 the data file, and the revision it's based on. Finally, each entry
178 178 contains a linkrev entry that can serve as a pointer to external
179 179 data.
180 180
181 181 The revision data itself is a linear collection of data chunks.
182 182 Each chunk represents a revision and is usually represented as a
183 183 delta against the previous chunk. To bound lookup time, runs of
184 184 deltas are limited to about 2 times the length of the original
185 185 version data. This makes retrieval of a version proportional to
186 186 its size, or O(1) relative to the number of revisions.
187 187
188 188 Both pieces of the revlog are written to in an append-only
189 189 fashion, which means we never need to rewrite a file to insert or
190 190 remove data, and can use some simple techniques to avoid the need
191 191 for locking while reading.
192 192 """
193 193 def __init__(self, opener, indexfile):
194 194 """
195 195 create a revlog object
196 196
197 197 opener is a function that abstracts the file opening operation
198 198 and can be used to implement COW semantics or the like.
199 199 """
200 200 self.indexfile = indexfile
201 201 self.datafile = indexfile[:-2] + ".d"
202 202 self.opener = opener
203 203 self._cache = None
204 204 self._basecache = None
205 205 self._chunkcache = (0, '')
206 206 self._chunkcachesize = 65536
207 207 self._maxchainlen = None
208 208 self.index = []
209 209 self._pcache = {}
210 210 self._nodecache = {nullid: nullrev}
211 211 self._nodepos = None
212 212
213 213 v = REVLOG_DEFAULT_VERSION
214 214 opts = getattr(opener, 'options', None)
215 215 if opts is not None:
216 216 if 'revlogv1' in opts:
217 217 if 'generaldelta' in opts:
218 218 v |= REVLOGGENERALDELTA
219 219 else:
220 220 v = 0
221 221 if 'chunkcachesize' in opts:
222 222 self._chunkcachesize = opts['chunkcachesize']
223 223 if 'maxchainlen' in opts:
224 224 self._maxchainlen = opts['maxchainlen']
225 225
226 226 if self._chunkcachesize <= 0:
227 227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 228 'than 0') % self._chunkcachesize)
229 229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 231 'of 2') % self._chunkcachesize)
232 232
233 233 i = ''
234 234 self._initempty = True
235 235 try:
236 236 f = self.opener(self.indexfile)
237 237 i = f.read()
238 238 f.close()
239 239 if len(i) > 0:
240 240 v = struct.unpack(versionformat, i[:4])[0]
241 241 self._initempty = False
242 242 except IOError, inst:
243 243 if inst.errno != errno.ENOENT:
244 244 raise
245 245
246 246 self.version = v
247 247 self._inline = v & REVLOGNGINLINEDATA
248 248 self._generaldelta = v & REVLOGGENERALDELTA
249 249 flags = v & ~0xFFFF
250 250 fmt = v & 0xFFFF
251 251 if fmt == REVLOGV0 and flags:
252 252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 253 % (self.indexfile, flags >> 16))
254 254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 256 % (self.indexfile, flags >> 16))
257 257 elif fmt > REVLOGNG:
258 258 raise RevlogError(_("index %s unknown format %d")
259 259 % (self.indexfile, fmt))
260 260
261 261 self._io = revlogio()
262 262 if self.version == REVLOGV0:
263 263 self._io = revlogoldio()
264 264 try:
265 265 d = self._io.parseindex(i, self._inline)
266 266 except (ValueError, IndexError):
267 267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 268 self.index, nodemap, self._chunkcache = d
269 269 if nodemap is not None:
270 270 self.nodemap = self._nodecache = nodemap
271 271 if not self._chunkcache:
272 272 self._chunkclear()
273 273
274 274 def tip(self):
275 275 return self.node(len(self.index) - 2)
276 276 def __len__(self):
277 277 return len(self.index) - 1
278 278 def __iter__(self):
279 279 return iter(xrange(len(self)))
280 280 def revs(self, start=0, stop=None):
281 281 """iterate over all rev in this revlog (from start to stop)"""
282 282 step = 1
283 283 if stop is not None:
284 284 if start > stop:
285 285 step = -1
286 286 stop += step
287 287 else:
288 288 stop = len(self)
289 289 return xrange(start, stop, step)
290 290
291 291 @util.propertycache
292 292 def nodemap(self):
293 293 self.rev(self.node(0))
294 294 return self._nodecache
295 295
296 296 def hasnode(self, node):
297 297 try:
298 298 self.rev(node)
299 299 return True
300 300 except KeyError:
301 301 return False
302 302
303 303 def clearcaches(self):
304 304 try:
305 305 self._nodecache.clearcaches()
306 306 except AttributeError:
307 307 self._nodecache = {nullid: nullrev}
308 308 self._nodepos = None
309 309
310 310 def rev(self, node):
311 311 try:
312 312 return self._nodecache[node]
313 313 except TypeError:
314 314 raise
315 315 except RevlogError:
316 316 # parsers.c radix tree lookup failed
317 317 raise LookupError(node, self.indexfile, _('no node'))
318 318 except KeyError:
319 319 # pure python cache lookup failed
320 320 n = self._nodecache
321 321 i = self.index
322 322 p = self._nodepos
323 323 if p is None:
324 324 p = len(i) - 2
325 325 for r in xrange(p, -1, -1):
326 326 v = i[r][7]
327 327 n[v] = r
328 328 if v == node:
329 329 self._nodepos = r - 1
330 330 return r
331 331 raise LookupError(node, self.indexfile, _('no node'))
332 332
333 333 def node(self, rev):
334 334 return self.index[rev][7]
335 335 def linkrev(self, rev):
336 336 return self.index[rev][4]
337 337 def parents(self, node):
338 338 i = self.index
339 339 d = i[self.rev(node)]
340 340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
341 341 def parentrevs(self, rev):
342 342 return self.index[rev][5:7]
343 343 def start(self, rev):
344 344 return int(self.index[rev][0] >> 16)
345 345 def end(self, rev):
346 346 return self.start(rev) + self.length(rev)
347 347 def length(self, rev):
348 348 return self.index[rev][1]
349 349 def chainbase(self, rev):
350 350 index = self.index
351 351 base = index[rev][3]
352 352 while base != rev:
353 353 rev = base
354 354 base = index[rev][3]
355 355 return base
356 356 def chainlen(self, rev):
357 357 return self._chaininfo(rev)[0]
358 358 def _chaininfo(self, rev):
359 359 index = self.index
360 360 generaldelta = self._generaldelta
361 361 iterrev = rev
362 362 e = index[iterrev]
363 363 clen = 0
364 364 compresseddeltalen = 0
365 365 while iterrev != e[3]:
366 366 clen += 1
367 367 compresseddeltalen += e[1]
368 368 if generaldelta:
369 369 iterrev = e[3]
370 370 else:
371 371 iterrev -= 1
372 372 e = index[iterrev]
373 373 # add text length of base since decompressing that also takes work
374 374 compresseddeltalen += e[1]
375 375 return clen, compresseddeltalen
376 376 def flags(self, rev):
377 377 return self.index[rev][0] & 0xFFFF
378 378 def rawsize(self, rev):
379 379 """return the length of the uncompressed text for a given revision"""
380 380 l = self.index[rev][2]
381 381 if l >= 0:
382 382 return l
383 383
384 384 t = self.revision(self.node(rev))
385 385 return len(t)
386 386 size = rawsize
387 387
388 388 def ancestors(self, revs, stoprev=0, inclusive=False):
389 389 """Generate the ancestors of 'revs' in reverse topological order.
390 390 Does not generate revs lower than stoprev.
391 391
392 392 See the documentation for ancestor.lazyancestors for more details."""
393 393
394 394 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
395 395 inclusive=inclusive)
396 396
397 397 def descendants(self, revs):
398 398 """Generate the descendants of 'revs' in revision order.
399 399
400 400 Yield a sequence of revision numbers starting with a child of
401 401 some rev in revs, i.e., each revision is *not* considered a
402 402 descendant of itself. Results are ordered by revision number (a
403 403 topological sort)."""
404 404 first = min(revs)
405 405 if first == nullrev:
406 406 for i in self:
407 407 yield i
408 408 return
409 409
410 410 seen = set(revs)
411 411 for i in self.revs(start=first + 1):
412 412 for x in self.parentrevs(i):
413 413 if x != nullrev and x in seen:
414 414 seen.add(i)
415 415 yield i
416 416 break
417 417
418 418 def findcommonmissing(self, common=None, heads=None):
419 419 """Return a tuple of the ancestors of common and the ancestors of heads
420 420 that are not ancestors of common. In revset terminology, we return the
421 421 tuple:
422 422
423 423 ::common, (::heads) - (::common)
424 424
425 425 The list is sorted by revision number, meaning it is
426 426 topologically sorted.
427 427
428 428 'heads' and 'common' are both lists of node IDs. If heads is
429 429 not supplied, uses all of the revlog's heads. If common is not
430 430 supplied, uses nullid."""
431 431 if common is None:
432 432 common = [nullid]
433 433 if heads is None:
434 434 heads = self.heads()
435 435
436 436 common = [self.rev(n) for n in common]
437 437 heads = [self.rev(n) for n in heads]
438 438
439 439 # we want the ancestors, but inclusive
440 440 class lazyset(object):
441 441 def __init__(self, lazyvalues):
442 442 self.addedvalues = set()
443 443 self.lazyvalues = lazyvalues
444 444
445 445 def __contains__(self, value):
446 446 return value in self.addedvalues or value in self.lazyvalues
447 447
448 448 def __iter__(self):
449 449 added = self.addedvalues
450 450 for r in added:
451 451 yield r
452 452 for r in self.lazyvalues:
453 453 if not r in added:
454 454 yield r
455 455
456 456 def add(self, value):
457 457 self.addedvalues.add(value)
458 458
459 459 def update(self, values):
460 460 self.addedvalues.update(values)
461 461
462 462 has = lazyset(self.ancestors(common))
463 463 has.add(nullrev)
464 464 has.update(common)
465 465
466 466 # take all ancestors from heads that aren't in has
467 467 missing = set()
468 468 visit = util.deque(r for r in heads if r not in has)
469 469 while visit:
470 470 r = visit.popleft()
471 471 if r in missing:
472 472 continue
473 473 else:
474 474 missing.add(r)
475 475 for p in self.parentrevs(r):
476 476 if p not in has:
477 477 visit.append(p)
478 478 missing = list(missing)
479 479 missing.sort()
480 480 return has, [self.node(r) for r in missing]
481 481
482 482 def findmissingrevs(self, common=None, heads=None):
483 483 """Return the revision numbers of the ancestors of heads that
484 484 are not ancestors of common.
485 485
486 486 More specifically, return a list of revision numbers corresponding to
487 487 nodes N such that every N satisfies the following constraints:
488 488
489 489 1. N is an ancestor of some node in 'heads'
490 490 2. N is not an ancestor of any node in 'common'
491 491
492 492 The list is sorted by revision number, meaning it is
493 493 topologically sorted.
494 494
495 495 'heads' and 'common' are both lists of revision numbers. If heads is
496 496 not supplied, uses all of the revlog's heads. If common is not
497 497 supplied, uses nullid."""
498 498 if common is None:
499 499 common = [nullrev]
500 500 if heads is None:
501 501 heads = self.headrevs()
502 502
503 503 return ancestor.missingancestors(heads, common, self.parentrevs)
504 504
505 505 def findmissing(self, common=None, heads=None):
506 506 """Return the ancestors of heads that are not ancestors of common.
507 507
508 508 More specifically, return a list of nodes N such that every N
509 509 satisfies the following constraints:
510 510
511 511 1. N is an ancestor of some node in 'heads'
512 512 2. N is not an ancestor of any node in 'common'
513 513
514 514 The list is sorted by revision number, meaning it is
515 515 topologically sorted.
516 516
517 517 'heads' and 'common' are both lists of node IDs. If heads is
518 518 not supplied, uses all of the revlog's heads. If common is not
519 519 supplied, uses nullid."""
520 520 if common is None:
521 521 common = [nullid]
522 522 if heads is None:
523 523 heads = self.heads()
524 524
525 525 common = [self.rev(n) for n in common]
526 526 heads = [self.rev(n) for n in heads]
527 527
528 528 return [self.node(r) for r in
529 529 ancestor.missingancestors(heads, common, self.parentrevs)]
530 530
531 531 def nodesbetween(self, roots=None, heads=None):
532 532 """Return a topological path from 'roots' to 'heads'.
533 533
534 534 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
535 535 topologically sorted list of all nodes N that satisfy both of
536 536 these constraints:
537 537
538 538 1. N is a descendant of some node in 'roots'
539 539 2. N is an ancestor of some node in 'heads'
540 540
541 541 Every node is considered to be both a descendant and an ancestor
542 542 of itself, so every reachable node in 'roots' and 'heads' will be
543 543 included in 'nodes'.
544 544
545 545 'outroots' is the list of reachable nodes in 'roots', i.e., the
546 546 subset of 'roots' that is returned in 'nodes'. Likewise,
547 547 'outheads' is the subset of 'heads' that is also in 'nodes'.
548 548
549 549 'roots' and 'heads' are both lists of node IDs. If 'roots' is
550 550 unspecified, uses nullid as the only root. If 'heads' is
551 551 unspecified, uses list of all of the revlog's heads."""
552 552 nonodes = ([], [], [])
553 553 if roots is not None:
554 554 roots = list(roots)
555 555 if not roots:
556 556 return nonodes
557 557 lowestrev = min([self.rev(n) for n in roots])
558 558 else:
559 559 roots = [nullid] # Everybody's a descendant of nullid
560 560 lowestrev = nullrev
561 561 if (lowestrev == nullrev) and (heads is None):
562 562 # We want _all_ the nodes!
563 563 return ([self.node(r) for r in self], [nullid], list(self.heads()))
564 564 if heads is None:
565 565 # All nodes are ancestors, so the latest ancestor is the last
566 566 # node.
567 567 highestrev = len(self) - 1
568 568 # Set ancestors to None to signal that every node is an ancestor.
569 569 ancestors = None
570 570 # Set heads to an empty dictionary for later discovery of heads
571 571 heads = {}
572 572 else:
573 573 heads = list(heads)
574 574 if not heads:
575 575 return nonodes
576 576 ancestors = set()
577 577 # Turn heads into a dictionary so we can remove 'fake' heads.
578 578 # Also, later we will be using it to filter out the heads we can't
579 579 # find from roots.
580 580 heads = dict.fromkeys(heads, False)
581 581 # Start at the top and keep marking parents until we're done.
582 582 nodestotag = set(heads)
583 583 # Remember where the top was so we can use it as a limit later.
584 584 highestrev = max([self.rev(n) for n in nodestotag])
585 585 while nodestotag:
586 586 # grab a node to tag
587 587 n = nodestotag.pop()
588 588 # Never tag nullid
589 589 if n == nullid:
590 590 continue
591 591 # A node's revision number represents its place in a
592 592 # topologically sorted list of nodes.
593 593 r = self.rev(n)
594 594 if r >= lowestrev:
595 595 if n not in ancestors:
596 596 # If we are possibly a descendant of one of the roots
597 597 # and we haven't already been marked as an ancestor
598 598 ancestors.add(n) # Mark as ancestor
599 599 # Add non-nullid parents to list of nodes to tag.
600 600 nodestotag.update([p for p in self.parents(n) if
601 601 p != nullid])
602 602 elif n in heads: # We've seen it before, is it a fake head?
603 603 # So it is, real heads should not be the ancestors of
604 604 # any other heads.
605 605 heads.pop(n)
606 606 if not ancestors:
607 607 return nonodes
608 608 # Now that we have our set of ancestors, we want to remove any
609 609 # roots that are not ancestors.
610 610
611 611 # If one of the roots was nullid, everything is included anyway.
612 612 if lowestrev > nullrev:
613 613 # But, since we weren't, let's recompute the lowest rev to not
614 614 # include roots that aren't ancestors.
615 615
616 616 # Filter out roots that aren't ancestors of heads
617 617 roots = [n for n in roots if n in ancestors]
618 618 # Recompute the lowest revision
619 619 if roots:
620 620 lowestrev = min([self.rev(n) for n in roots])
621 621 else:
622 622 # No more roots? Return empty list
623 623 return nonodes
624 624 else:
625 625 # We are descending from nullid, and don't need to care about
626 626 # any other roots.
627 627 lowestrev = nullrev
628 628 roots = [nullid]
629 629 # Transform our roots list into a set.
630 630 descendants = set(roots)
631 631 # Also, keep the original roots so we can filter out roots that aren't
632 632 # 'real' roots (i.e. are descended from other roots).
633 633 roots = descendants.copy()
634 634 # Our topologically sorted list of output nodes.
635 635 orderedout = []
636 636 # Don't start at nullid since we don't want nullid in our output list,
637 637 # and if nullid shows up in descendants, empty parents will look like
638 638 # they're descendants.
639 639 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
640 640 n = self.node(r)
641 641 isdescendant = False
642 642 if lowestrev == nullrev: # Everybody is a descendant of nullid
643 643 isdescendant = True
644 644 elif n in descendants:
645 645 # n is already a descendant
646 646 isdescendant = True
647 647 # This check only needs to be done here because all the roots
648 648 # will start being marked is descendants before the loop.
649 649 if n in roots:
650 650 # If n was a root, check if it's a 'real' root.
651 651 p = tuple(self.parents(n))
652 652 # If any of its parents are descendants, it's not a root.
653 653 if (p[0] in descendants) or (p[1] in descendants):
654 654 roots.remove(n)
655 655 else:
656 656 p = tuple(self.parents(n))
657 657 # A node is a descendant if either of its parents are
658 658 # descendants. (We seeded the dependents list with the roots
659 659 # up there, remember?)
660 660 if (p[0] in descendants) or (p[1] in descendants):
661 661 descendants.add(n)
662 662 isdescendant = True
663 663 if isdescendant and ((ancestors is None) or (n in ancestors)):
664 664 # Only include nodes that are both descendants and ancestors.
665 665 orderedout.append(n)
666 666 if (ancestors is not None) and (n in heads):
667 667 # We're trying to figure out which heads are reachable
668 668 # from roots.
669 669 # Mark this head as having been reached
670 670 heads[n] = True
671 671 elif ancestors is None:
672 672 # Otherwise, we're trying to discover the heads.
673 673 # Assume this is a head because if it isn't, the next step
674 674 # will eventually remove it.
675 675 heads[n] = True
676 676 # But, obviously its parents aren't.
677 677 for p in self.parents(n):
678 678 heads.pop(p, None)
679 679 heads = [n for n, flag in heads.iteritems() if flag]
680 680 roots = list(roots)
681 681 assert orderedout
682 682 assert roots
683 683 assert heads
684 684 return (orderedout, roots, heads)
685 685
686 686 def headrevs(self):
687 687 try:
688 688 return self.index.headrevs()
689 689 except AttributeError:
690 690 return self._headrevs()
691 691
692 692 def _headrevs(self):
693 693 count = len(self)
694 694 if not count:
695 695 return [nullrev]
696 696 # we won't iter over filtered rev so nobody is a head at start
697 697 ishead = [0] * (count + 1)
698 698 index = self.index
699 699 for r in self:
700 700 ishead[r] = 1 # I may be an head
701 701 e = index[r]
702 702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
703 703 return [r for r, val in enumerate(ishead) if val]
704 704
705 705 def heads(self, start=None, stop=None):
706 706 """return the list of all nodes that have no children
707 707
708 708 if start is specified, only heads that are descendants of
709 709 start will be returned
710 710 if stop is specified, it will consider all the revs from stop
711 711 as if they had no children
712 712 """
713 713 if start is None and stop is None:
714 714 if not len(self):
715 715 return [nullid]
716 716 return [self.node(r) for r in self.headrevs()]
717 717
718 718 if start is None:
719 719 start = nullid
720 720 if stop is None:
721 721 stop = []
722 722 stoprevs = set([self.rev(n) for n in stop])
723 723 startrev = self.rev(start)
724 724 reachable = set((startrev,))
725 725 heads = set((startrev,))
726 726
727 727 parentrevs = self.parentrevs
728 728 for r in self.revs(start=startrev + 1):
729 729 for p in parentrevs(r):
730 730 if p in reachable:
731 731 if r not in stoprevs:
732 732 reachable.add(r)
733 733 heads.add(r)
734 734 if p in heads and p not in stoprevs:
735 735 heads.remove(p)
736 736
737 737 return [self.node(r) for r in heads]
738 738
739 739 def children(self, node):
740 740 """find the children of a given node"""
741 741 c = []
742 742 p = self.rev(node)
743 743 for r in self.revs(start=p + 1):
744 744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
745 745 if prevs:
746 746 for pr in prevs:
747 747 if pr == p:
748 748 c.append(self.node(r))
749 749 elif p == nullrev:
750 750 c.append(self.node(r))
751 751 return c
752 752
753 753 def descendant(self, start, end):
754 754 if start == nullrev:
755 755 return True
756 756 for i in self.descendants([start]):
757 757 if i == end:
758 758 return True
759 759 elif i > end:
760 760 break
761 761 return False
762 762
763 763 def commonancestorsheads(self, a, b):
764 764 """calculate all the heads of the common ancestors of nodes a and b"""
765 765 a, b = self.rev(a), self.rev(b)
766 766 try:
767 767 ancs = self.index.commonancestorsheads(a, b)
768 768 except (AttributeError, OverflowError): # C implementation failed
769 769 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
770 770 return map(self.node, ancs)
771 771
772 772 def isancestor(self, a, b):
773 773 """return True if node a is an ancestor of node b
774 774
775 775 The implementation of this is trivial but the use of
776 776 commonancestorsheads is not."""
777 777 return a in self.commonancestorsheads(a, b)
778 778
779 779 def ancestor(self, a, b):
780 780 """calculate the "best" common ancestor of nodes a and b"""
781 781
782 782 a, b = self.rev(a), self.rev(b)
783 783 try:
784 784 ancs = self.index.ancestors(a, b)
785 785 except (AttributeError, OverflowError):
786 786 ancs = ancestor.ancestors(self.parentrevs, a, b)
787 787 if ancs:
788 788 # choose a consistent winner when there's a tie
789 789 return min(map(self.node, ancs))
790 790 return nullid
791 791
792 792 def _match(self, id):
793 793 if isinstance(id, int):
794 794 # rev
795 795 return self.node(id)
796 796 if len(id) == 20:
797 797 # possibly a binary node
798 798 # odds of a binary node being all hex in ASCII are 1 in 10**25
799 799 try:
800 800 node = id
801 801 self.rev(node) # quick search the index
802 802 return node
803 803 except LookupError:
804 804 pass # may be partial hex id
805 805 try:
806 806 # str(rev)
807 807 rev = int(id)
808 808 if str(rev) != id:
809 809 raise ValueError
810 810 if rev < 0:
811 811 rev = len(self) + rev
812 812 if rev < 0 or rev >= len(self):
813 813 raise ValueError
814 814 return self.node(rev)
815 815 except (ValueError, OverflowError):
816 816 pass
817 817 if len(id) == 40:
818 818 try:
819 819 # a full hex nodeid?
820 820 node = bin(id)
821 821 self.rev(node)
822 822 return node
823 823 except (TypeError, LookupError):
824 824 pass
825 825
826 826 def _partialmatch(self, id):
827 827 try:
828 828 n = self.index.partialmatch(id)
829 829 if n and self.hasnode(n):
830 830 return n
831 831 return None
832 832 except RevlogError:
833 833 # parsers.c radix tree lookup gave multiple matches
834 834 # fall through to slow path that filters hidden revisions
835 835 pass
836 836 except (AttributeError, ValueError):
837 837 # we are pure python, or key was too short to search radix tree
838 838 pass
839 839
840 840 if id in self._pcache:
841 841 return self._pcache[id]
842 842
843 843 if len(id) < 40:
844 844 try:
845 845 # hex(node)[:...]
846 846 l = len(id) // 2 # grab an even number of digits
847 847 prefix = bin(id[:l * 2])
848 848 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
849 849 nl = [n for n in nl if hex(n).startswith(id) and
850 850 self.hasnode(n)]
851 851 if len(nl) > 0:
852 852 if len(nl) == 1:
853 853 self._pcache[id] = nl[0]
854 854 return nl[0]
855 855 raise LookupError(id, self.indexfile,
856 856 _('ambiguous identifier'))
857 857 return None
858 858 except TypeError:
859 859 pass
860 860
861 861 def lookup(self, id):
862 862 """locate a node based on:
863 863 - revision number or str(revision number)
864 864 - nodeid or subset of hex nodeid
865 865 """
866 866 n = self._match(id)
867 867 if n is not None:
868 868 return n
869 869 n = self._partialmatch(id)
870 870 if n:
871 871 return n
872 872
873 873 raise LookupError(id, self.indexfile, _('no match found'))
874 874
875 875 def cmp(self, node, text):
876 876 """compare text with a given file revision
877 877
878 878 returns True if text is different than what is stored.
879 879 """
880 880 p1, p2 = self.parents(node)
881 881 return hash(text, p1, p2) != node
882 882
883 883 def _addchunk(self, offset, data):
884 884 o, d = self._chunkcache
885 885 # try to add to existing cache
886 886 if o + len(d) == offset and len(d) + len(data) < _chunksize:
887 887 self._chunkcache = o, d + data
888 888 else:
889 889 self._chunkcache = offset, data
890 890
891 891 def _loadchunk(self, offset, length):
892 892 if self._inline:
893 893 df = self.opener(self.indexfile)
894 894 else:
895 895 df = self.opener(self.datafile)
896 896
897 897 # Cache data both forward and backward around the requested
898 898 # data, in a fixed size window. This helps speed up operations
899 899 # involving reading the revlog backwards.
900 900 cachesize = self._chunkcachesize
901 901 realoffset = offset & ~(cachesize - 1)
902 902 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
903 903 - realoffset)
904 904 df.seek(realoffset)
905 905 d = df.read(reallength)
906 906 df.close()
907 907 self._addchunk(realoffset, d)
908 908 if offset != realoffset or reallength != length:
909 909 return util.buffer(d, offset - realoffset, length)
910 910 return d
911 911
912 912 def _getchunk(self, offset, length):
913 913 o, d = self._chunkcache
914 914 l = len(d)
915 915
916 916 # is it in the cache?
917 917 cachestart = offset - o
918 918 cacheend = cachestart + length
919 919 if cachestart >= 0 and cacheend <= l:
920 920 if cachestart == 0 and cacheend == l:
921 921 return d # avoid a copy
922 922 return util.buffer(d, cachestart, cacheend - cachestart)
923 923
924 924 return self._loadchunk(offset, length)
925 925
926 926 def _chunkraw(self, startrev, endrev):
927 927 start = self.start(startrev)
928 928 end = self.end(endrev)
929 929 if self._inline:
930 930 start += (startrev + 1) * self._io.size
931 931 end += (endrev + 1) * self._io.size
932 932 length = end - start
933 933 return self._getchunk(start, length)
934 934
935 935 def _chunk(self, rev):
936 936 return decompress(self._chunkraw(rev, rev))
937 937
938 938 def _chunks(self, revs):
939 939 '''faster version of [self._chunk(rev) for rev in revs]
940 940
941 941 Assumes that revs is in ascending order.'''
942 942 if not revs:
943 943 return []
944 944 start = self.start
945 945 length = self.length
946 946 inline = self._inline
947 947 iosize = self._io.size
948 948 buffer = util.buffer
949 949
950 950 l = []
951 951 ladd = l.append
952 952
953 953 # preload the cache
954 954 try:
955 955 while True:
956 956 # ensure that the cache doesn't change out from under us
957 957 _cache = self._chunkcache
958 958 self._chunkraw(revs[0], revs[-1])
959 959 if _cache == self._chunkcache:
960 960 break
961 961 offset, data = _cache
962 962 except OverflowError:
963 963 # issue4215 - we can't cache a run of chunks greater than
964 964 # 2G on Windows
965 965 return [self._chunk(rev) for rev in revs]
966 966
967 967 for rev in revs:
968 968 chunkstart = start(rev)
969 969 if inline:
970 970 chunkstart += (rev + 1) * iosize
971 971 chunklength = length(rev)
972 972 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
973 973
974 974 return l
975 975
976 976 def _chunkclear(self):
977 977 self._chunkcache = (0, '')
978 978
979 979 def deltaparent(self, rev):
980 980 """return deltaparent of the given revision"""
981 981 base = self.index[rev][3]
982 982 if base == rev:
983 983 return nullrev
984 984 elif self._generaldelta:
985 985 return base
986 986 else:
987 987 return rev - 1
988 988
989 989 def revdiff(self, rev1, rev2):
990 990 """return or calculate a delta between two revisions"""
991 991 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
992 992 return str(self._chunk(rev2))
993 993
994 994 return mdiff.textdiff(self.revision(rev1),
995 995 self.revision(rev2))
996 996
997 997 def revision(self, nodeorrev):
998 998 """return an uncompressed revision of a given node or revision
999 999 number.
1000 1000 """
1001 1001 if isinstance(nodeorrev, int):
1002 1002 rev = nodeorrev
1003 1003 node = self.node(rev)
1004 1004 else:
1005 1005 node = nodeorrev
1006 1006 rev = None
1007 1007
1008 1008 _cache = self._cache # grab local copy of cache to avoid thread race
1009 1009 cachedrev = None
1010 1010 if node == nullid:
1011 1011 return ""
1012 1012 if _cache:
1013 1013 if _cache[0] == node:
1014 1014 return _cache[2]
1015 1015 cachedrev = _cache[1]
1016 1016
1017 1017 # look up what we need to read
1018 1018 text = None
1019 1019 if rev is None:
1020 1020 rev = self.rev(node)
1021 1021
1022 1022 # check rev flags
1023 1023 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1024 1024 raise RevlogError(_('incompatible revision flag %x') %
1025 1025 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1026 1026
1027 1027 # build delta chain
1028 1028 chain = []
1029 1029 index = self.index # for performance
1030 1030 generaldelta = self._generaldelta
1031 1031 iterrev = rev
1032 1032 e = index[iterrev]
1033 1033 while iterrev != e[3] and iterrev != cachedrev:
1034 1034 chain.append(iterrev)
1035 1035 if generaldelta:
1036 1036 iterrev = e[3]
1037 1037 else:
1038 1038 iterrev -= 1
1039 1039 e = index[iterrev]
1040 1040
1041 1041 if iterrev == cachedrev:
1042 1042 # cache hit
1043 1043 text = _cache[2]
1044 1044 else:
1045 1045 chain.append(iterrev)
1046 1046 chain.reverse()
1047 1047
1048 1048 # drop cache to save memory
1049 1049 self._cache = None
1050 1050
1051 1051 bins = self._chunks(chain)
1052 1052 if text is None:
1053 1053 text = str(bins[0])
1054 1054 bins = bins[1:]
1055 1055
1056 1056 text = mdiff.patches(text, bins)
1057 1057
1058 1058 text = self._checkhash(text, node, rev)
1059 1059
1060 1060 self._cache = (node, rev, text)
1061 1061 return text
1062 1062
1063 1063 def hash(self, text, p1, p2):
1064 1064 """Compute a node hash.
1065 1065
1066 1066 Available as a function so that subclasses can replace the hash
1067 1067 as needed.
1068 1068 """
1069 1069 return hash(text, p1, p2)
1070 1070
1071 1071 def _checkhash(self, text, node, rev):
1072 1072 p1, p2 = self.parents(node)
1073 1073 self.checkhash(text, p1, p2, node, rev)
1074 1074 return text
1075 1075
1076 1076 def checkhash(self, text, p1, p2, node, rev=None):
1077 1077 if node != self.hash(text, p1, p2):
1078 1078 revornode = rev
1079 1079 if revornode is None:
1080 1080 revornode = templatefilters.short(hex(node))
1081 1081 raise RevlogError(_("integrity check failed on %s:%s")
1082 1082 % (self.indexfile, revornode))
1083 1083
1084 1084 def checkinlinesize(self, tr, fp=None):
1085 1085 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1086 1086 return
1087 1087
1088 1088 trinfo = tr.find(self.indexfile)
1089 1089 if trinfo is None:
1090 1090 raise RevlogError(_("%s not found in the transaction")
1091 1091 % self.indexfile)
1092 1092
1093 1093 trindex = trinfo[2]
1094 1094 dataoff = self.start(trindex)
1095 1095
1096 1096 tr.add(self.datafile, dataoff)
1097 1097
1098 1098 if fp:
1099 1099 fp.flush()
1100 1100 fp.close()
1101 1101
1102 1102 df = self.opener(self.datafile, 'w')
1103 1103 try:
1104 1104 for r in self:
1105 1105 df.write(self._chunkraw(r, r))
1106 1106 finally:
1107 1107 df.close()
1108 1108
1109 1109 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1110 1110 self.version &= ~(REVLOGNGINLINEDATA)
1111 1111 self._inline = False
1112 1112 for i in self:
1113 1113 e = self._io.packentry(self.index[i], self.node, self.version, i)
1114 1114 fp.write(e)
1115 1115
1116 1116 # if we don't call close, the temp file will never replace the
1117 1117 # real index
1118 1118 fp.close()
1119 1119
1120 1120 tr.replace(self.indexfile, trindex * self._io.size)
1121 1121 self._chunkclear()
1122 1122
1123 1123 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1124 1124 node=None):
1125 1125 """add a revision to the log
1126 1126
1127 1127 text - the revision data to add
1128 1128 transaction - the transaction object used for rollback
1129 1129 link - the linkrev data to add
1130 1130 p1, p2 - the parent nodeids of the revision
1131 1131 cachedelta - an optional precomputed delta
1132 1132 node - nodeid of revision; typically node is not specified, and it is
1133 1133 computed by default as hash(text, p1, p2), however subclasses might
1134 1134 use different hashing method (and override checkhash() in such case)
1135 1135 """
1136 1136 if link == nullrev:
1137 1137 raise RevlogError(_("attempted to add linkrev -1 to %s")
1138 1138 % self.indexfile)
1139 1139 node = node or self.hash(text, p1, p2)
1140 1140 if node in self.nodemap:
1141 1141 return node
1142 1142
1143 1143 dfh = None
1144 1144 if not self._inline:
1145 1145 dfh = self.opener(self.datafile, "a")
1146 1146 ifh = self.opener(self.indexfile, "a+")
1147 1147 try:
1148 1148 return self._addrevision(node, text, transaction, link, p1, p2,
1149 1149 cachedelta, ifh, dfh)
1150 1150 finally:
1151 1151 if dfh:
1152 1152 dfh.close()
1153 1153 ifh.close()
1154 1154
1155 1155 def compress(self, text):
1156 1156 """ generate a possibly-compressed representation of text """
1157 1157 if not text:
1158 1158 return ("", text)
1159 1159 l = len(text)
1160 1160 bin = None
1161 1161 if l < 44:
1162 1162 pass
1163 1163 elif l > 1000000:
1164 1164 # zlib makes an internal copy, thus doubling memory usage for
1165 1165 # large files, so lets do this in pieces
1166 1166 z = zlib.compressobj()
1167 1167 p = []
1168 1168 pos = 0
1169 1169 while pos < l:
1170 1170 pos2 = pos + 2**20
1171 1171 p.append(z.compress(text[pos:pos2]))
1172 1172 pos = pos2
1173 1173 p.append(z.flush())
1174 1174 if sum(map(len, p)) < l:
1175 1175 bin = "".join(p)
1176 1176 else:
1177 1177 bin = _compress(text)
1178 1178 if bin is None or len(bin) > l:
1179 1179 if text[0] == '\0':
1180 1180 return ("", text)
1181 1181 return ('u', text)
1182 1182 return ("", bin)
1183 1183
1184 1184 def _addrevision(self, node, text, transaction, link, p1, p2,
1185 1185 cachedelta, ifh, dfh):
1186 1186 """internal function to add revisions to the log
1187 1187
1188 1188 see addrevision for argument descriptions.
1189 1189 invariants:
1190 1190 - text is optional (can be None); if not set, cachedelta must be set.
1191 1191 if both are set, they must correspond to each other.
1192 1192 """
1193 1193 btext = [text]
1194 1194 def buildtext():
1195 1195 if btext[0] is not None:
1196 1196 return btext[0]
1197 1197 # flush any pending writes here so we can read it in revision
1198 1198 if dfh:
1199 1199 dfh.flush()
1200 1200 ifh.flush()
1201 1201 basetext = self.revision(self.node(cachedelta[0]))
1202 1202 btext[0] = mdiff.patch(basetext, cachedelta[1])
1203 1203 try:
1204 1204 self.checkhash(btext[0], p1, p2, node)
1205 1205 except CensoredNodeError:
1206 1206 pass # always import a censor tombstone.
1207 1207 return btext[0]
1208 1208
1209 1209 def builddelta(rev):
1210 1210 # can we use the cached delta?
1211 1211 if cachedelta and cachedelta[0] == rev:
1212 1212 delta = cachedelta[1]
1213 1213 else:
1214 1214 t = buildtext()
1215 1215 ptext = self.revision(self.node(rev))
1216 1216 delta = mdiff.textdiff(ptext, t)
1217 1217 data = self.compress(delta)
1218 1218 l = len(data[1]) + len(data[0])
1219 1219 if basecache[0] == rev:
1220 1220 chainbase = basecache[1]
1221 1221 else:
1222 1222 chainbase = self.chainbase(rev)
1223 1223 dist = l + offset - self.start(chainbase)
1224 1224 if self._generaldelta:
1225 1225 base = rev
1226 1226 else:
1227 1227 base = chainbase
1228 1228 chainlen, compresseddeltalen = self._chaininfo(rev)
1229 1229 chainlen += 1
1230 1230 compresseddeltalen += l
1231 1231 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1232 1232
1233 1233 curr = len(self)
1234 1234 prev = curr - 1
1235 1235 base = chainbase = curr
1236 1236 chainlen = None
1237 1237 offset = self.end(prev)
1238 1238 flags = 0
1239 1239 d = None
1240 1240 if self._basecache is None:
1241 1241 self._basecache = (prev, self.chainbase(prev))
1242 1242 basecache = self._basecache
1243 1243 p1r, p2r = self.rev(p1), self.rev(p2)
1244 1244
1245 1245 # should we try to build a delta?
1246 1246 if prev != nullrev:
1247 1247 if self._generaldelta:
1248 1248 if p1r >= basecache[1]:
1249 1249 d = builddelta(p1r)
1250 1250 elif p2r >= basecache[1]:
1251 1251 d = builddelta(p2r)
1252 1252 else:
1253 1253 d = builddelta(prev)
1254 1254 else:
1255 1255 d = builddelta(prev)
1256 1256 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1257 1257
1258 1258 # full versions are inserted when the needed deltas
1259 1259 # become comparable to the uncompressed text
1260 1260 if text is None:
1261 1261 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1262 1262 cachedelta[1])
1263 1263 else:
1264 1264 textlen = len(text)
1265 1265
1266 1266 # - 'dist' is the distance from the base revision -- bounding it limits
1267 1267 # the amount of I/O we need to do.
1268 1268 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1269 1269 # to apply -- bounding it limits the amount of CPU we consume.
1270 if (d is None or dist > textlen * 2 or l > textlen or
1270 if (d is None or dist > textlen * 4 or l > textlen or
1271 1271 compresseddeltalen > textlen * 2 or
1272 1272 (self._maxchainlen and chainlen > self._maxchainlen)):
1273 1273 text = buildtext()
1274 1274 data = self.compress(text)
1275 1275 l = len(data[1]) + len(data[0])
1276 1276 base = chainbase = curr
1277 1277
1278 1278 e = (offset_type(offset, flags), l, textlen,
1279 1279 base, link, p1r, p2r, node)
1280 1280 self.index.insert(-1, e)
1281 1281 self.nodemap[node] = curr
1282 1282
1283 1283 entry = self._io.packentry(e, self.node, self.version, curr)
1284 1284 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1285 1285
1286 1286 if type(text) == str: # only accept immutable objects
1287 1287 self._cache = (node, curr, text)
1288 1288 self._basecache = (curr, chainbase)
1289 1289 return node
1290 1290
1291 1291 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1292 1292 curr = len(self) - 1
1293 1293 if not self._inline:
1294 1294 transaction.add(self.datafile, offset)
1295 1295 transaction.add(self.indexfile, curr * len(entry))
1296 1296 if data[0]:
1297 1297 dfh.write(data[0])
1298 1298 dfh.write(data[1])
1299 1299 dfh.flush()
1300 1300 ifh.write(entry)
1301 1301 else:
1302 1302 offset += curr * self._io.size
1303 1303 transaction.add(self.indexfile, offset, curr)
1304 1304 ifh.write(entry)
1305 1305 ifh.write(data[0])
1306 1306 ifh.write(data[1])
1307 1307 self.checkinlinesize(transaction, ifh)
1308 1308
1309 1309 def addgroup(self, bundle, linkmapper, transaction):
1310 1310 """
1311 1311 add a delta group
1312 1312
1313 1313 given a set of deltas, add them to the revision log. the
1314 1314 first delta is against its parent, which should be in our
1315 1315 log, the rest are against the previous delta.
1316 1316 """
1317 1317
1318 1318 # track the base of the current delta log
1319 1319 content = []
1320 1320 node = None
1321 1321
1322 1322 r = len(self)
1323 1323 end = 0
1324 1324 if r:
1325 1325 end = self.end(r - 1)
1326 1326 ifh = self.opener(self.indexfile, "a+")
1327 1327 isize = r * self._io.size
1328 1328 if self._inline:
1329 1329 transaction.add(self.indexfile, end + isize, r)
1330 1330 dfh = None
1331 1331 else:
1332 1332 transaction.add(self.indexfile, isize, r)
1333 1333 transaction.add(self.datafile, end)
1334 1334 dfh = self.opener(self.datafile, "a")
1335 1335
1336 1336 try:
1337 1337 # loop through our set of deltas
1338 1338 chain = None
1339 1339 while True:
1340 1340 chunkdata = bundle.deltachunk(chain)
1341 1341 if not chunkdata:
1342 1342 break
1343 1343 node = chunkdata['node']
1344 1344 p1 = chunkdata['p1']
1345 1345 p2 = chunkdata['p2']
1346 1346 cs = chunkdata['cs']
1347 1347 deltabase = chunkdata['deltabase']
1348 1348 delta = chunkdata['delta']
1349 1349
1350 1350 content.append(node)
1351 1351
1352 1352 link = linkmapper(cs)
1353 1353 if node in self.nodemap:
1354 1354 # this can happen if two branches make the same change
1355 1355 chain = node
1356 1356 continue
1357 1357
1358 1358 for p in (p1, p2):
1359 1359 if p not in self.nodemap:
1360 1360 raise LookupError(p, self.indexfile,
1361 1361 _('unknown parent'))
1362 1362
1363 1363 if deltabase not in self.nodemap:
1364 1364 raise LookupError(deltabase, self.indexfile,
1365 1365 _('unknown delta base'))
1366 1366
1367 1367 baserev = self.rev(deltabase)
1368 1368 chain = self._addrevision(node, None, transaction, link,
1369 1369 p1, p2, (baserev, delta), ifh, dfh)
1370 1370 if not dfh and not self._inline:
1371 1371 # addrevision switched from inline to conventional
1372 1372 # reopen the index
1373 1373 ifh.close()
1374 1374 dfh = self.opener(self.datafile, "a")
1375 1375 ifh = self.opener(self.indexfile, "a")
1376 1376 finally:
1377 1377 if dfh:
1378 1378 dfh.close()
1379 1379 ifh.close()
1380 1380
1381 1381 return content
1382 1382
1383 1383 def getstrippoint(self, minlink):
1384 1384 """find the minimum rev that must be stripped to strip the linkrev
1385 1385
1386 1386 Returns a tuple containing the minimum rev and a set of all revs that
1387 1387 have linkrevs that will be broken by this strip.
1388 1388 """
1389 1389 brokenrevs = set()
1390 1390 strippoint = len(self)
1391 1391
1392 1392 heads = {}
1393 1393 futurelargelinkrevs = set()
1394 1394 for head in self.headrevs():
1395 1395 headlinkrev = self.linkrev(head)
1396 1396 heads[head] = headlinkrev
1397 1397 if headlinkrev >= minlink:
1398 1398 futurelargelinkrevs.add(headlinkrev)
1399 1399
1400 1400 # This algorithm involves walking down the rev graph, starting at the
1401 1401 # heads. Since the revs are topologically sorted according to linkrev,
1402 1402 # once all head linkrevs are below the minlink, we know there are
1403 1403 # no more revs that could have a linkrev greater than minlink.
1404 1404 # So we can stop walking.
1405 1405 while futurelargelinkrevs:
1406 1406 strippoint -= 1
1407 1407 linkrev = heads.pop(strippoint)
1408 1408
1409 1409 if linkrev < minlink:
1410 1410 brokenrevs.add(strippoint)
1411 1411 else:
1412 1412 futurelargelinkrevs.remove(linkrev)
1413 1413
1414 1414 for p in self.parentrevs(strippoint):
1415 1415 if p != nullrev:
1416 1416 plinkrev = self.linkrev(p)
1417 1417 heads[p] = plinkrev
1418 1418 if plinkrev >= minlink:
1419 1419 futurelargelinkrevs.add(plinkrev)
1420 1420
1421 1421 return strippoint, brokenrevs
1422 1422
1423 1423 def strip(self, minlink, transaction):
1424 1424 """truncate the revlog on the first revision with a linkrev >= minlink
1425 1425
1426 1426 This function is called when we're stripping revision minlink and
1427 1427 its descendants from the repository.
1428 1428
1429 1429 We have to remove all revisions with linkrev >= minlink, because
1430 1430 the equivalent changelog revisions will be renumbered after the
1431 1431 strip.
1432 1432
1433 1433 So we truncate the revlog on the first of these revisions, and
1434 1434 trust that the caller has saved the revisions that shouldn't be
1435 1435 removed and that it'll re-add them after this truncation.
1436 1436 """
1437 1437 if len(self) == 0:
1438 1438 return
1439 1439
1440 1440 rev, _ = self.getstrippoint(minlink)
1441 1441 if rev == len(self):
1442 1442 return
1443 1443
1444 1444 # first truncate the files on disk
1445 1445 end = self.start(rev)
1446 1446 if not self._inline:
1447 1447 transaction.add(self.datafile, end)
1448 1448 end = rev * self._io.size
1449 1449 else:
1450 1450 end += rev * self._io.size
1451 1451
1452 1452 transaction.add(self.indexfile, end)
1453 1453
1454 1454 # then reset internal state in memory to forget those revisions
1455 1455 self._cache = None
1456 1456 self._chunkclear()
1457 1457 for x in xrange(rev, len(self)):
1458 1458 del self.nodemap[self.node(x)]
1459 1459
1460 1460 del self.index[rev:-1]
1461 1461
1462 1462 def checksize(self):
1463 1463 expected = 0
1464 1464 if len(self):
1465 1465 expected = max(0, self.end(len(self) - 1))
1466 1466
1467 1467 try:
1468 1468 f = self.opener(self.datafile)
1469 1469 f.seek(0, 2)
1470 1470 actual = f.tell()
1471 1471 f.close()
1472 1472 dd = actual - expected
1473 1473 except IOError, inst:
1474 1474 if inst.errno != errno.ENOENT:
1475 1475 raise
1476 1476 dd = 0
1477 1477
1478 1478 try:
1479 1479 f = self.opener(self.indexfile)
1480 1480 f.seek(0, 2)
1481 1481 actual = f.tell()
1482 1482 f.close()
1483 1483 s = self._io.size
1484 1484 i = max(0, actual // s)
1485 1485 di = actual - (i * s)
1486 1486 if self._inline:
1487 1487 databytes = 0
1488 1488 for r in self:
1489 1489 databytes += max(0, self.length(r))
1490 1490 dd = 0
1491 1491 di = actual - len(self) * s - databytes
1492 1492 except IOError, inst:
1493 1493 if inst.errno != errno.ENOENT:
1494 1494 raise
1495 1495 di = 0
1496 1496
1497 1497 return (dd, di)
1498 1498
1499 1499 def files(self):
1500 1500 res = [self.indexfile]
1501 1501 if not self._inline:
1502 1502 res.append(self.datafile)
1503 1503 return res
General Comments 0
You need to be logged in to leave comments. Login now