##// END OF EJS Templates
revlog: make converting from inline to non-line work after a strip...
Mike Edgar -
r24454:59904edf default
parent child Browse files
Show More
@@ -1,1588 +1,1593
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 38 REVIDX_DEFAULT_FLAGS = 0
39 39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47 CensoredNodeError = error.CensoredNodeError
48 48
49 49 def getoffset(q):
50 50 return int(q >> 16)
51 51
52 52 def gettype(q):
53 53 return int(q & 0xFFFF)
54 54
55 55 def offset_type(offset, type):
56 56 return long(long(offset) << 16 | type)
57 57
58 58 _nullhash = _sha(nullid)
59 59
60 60 def hash(text, p1, p2):
61 61 """generate a hash from the given text and its parent hashes
62 62
63 63 This hash combines both the current file contents and its history
64 64 in a manner that makes it easy to distinguish nodes with the same
65 65 content in the revision graph.
66 66 """
67 67 # As of now, if one of the parent node is null, p2 is null
68 68 if p2 == nullid:
69 69 # deep copy of a hash is faster than creating one
70 70 s = _nullhash.copy()
71 71 s.update(p1)
72 72 else:
73 73 # none of the parent nodes are nullid
74 74 l = [p1, p2]
75 75 l.sort()
76 76 s = _sha(l[0])
77 77 s.update(l[1])
78 78 s.update(text)
79 79 return s.digest()
80 80
81 81 def decompress(bin):
82 82 """ decompress the given input """
83 83 if not bin:
84 84 return bin
85 85 t = bin[0]
86 86 if t == '\0':
87 87 return bin
88 88 if t == 'x':
89 89 try:
90 90 return _decompress(bin)
91 91 except zlib.error, e:
92 92 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 93 if t == 'u':
94 94 return bin[1:]
95 95 raise RevlogError(_("unknown compression type %r") % t)
96 96
97 97 # index v0:
98 98 # 4 bytes: offset
99 99 # 4 bytes: compressed length
100 100 # 4 bytes: base rev
101 101 # 4 bytes: link rev
102 102 # 32 bytes: parent 1 nodeid
103 103 # 32 bytes: parent 2 nodeid
104 104 # 32 bytes: nodeid
105 105 indexformatv0 = ">4l20s20s20s"
106 106 v0shaoffset = 56
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 ngshaoffset = 32
153 153 versionformat = ">I"
154 154
155 155 class revlogio(object):
156 156 def __init__(self):
157 157 self.size = struct.calcsize(indexformatng)
158 158
159 159 def parseindex(self, data, inline):
160 160 # call the C implementation to parse the index data
161 161 index, cache = parsers.parse_index2(data, inline)
162 162 return index, getattr(index, 'nodemap', None), cache
163 163
164 164 def packentry(self, entry, node, version, rev):
165 165 p = _pack(indexformatng, *entry)
166 166 if rev == 0:
167 167 p = _pack(versionformat, version) + p[4:]
168 168 return p
169 169
170 170 class revlog(object):
171 171 """
172 172 the underlying revision storage object
173 173
174 174 A revlog consists of two parts, an index and the revision data.
175 175
176 176 The index is a file with a fixed record size containing
177 177 information on each revision, including its nodeid (hash), the
178 178 nodeids of its parents, the position and offset of its data within
179 179 the data file, and the revision it's based on. Finally, each entry
180 180 contains a linkrev entry that can serve as a pointer to external
181 181 data.
182 182
183 183 The revision data itself is a linear collection of data chunks.
184 184 Each chunk represents a revision and is usually represented as a
185 185 delta against the previous chunk. To bound lookup time, runs of
186 186 deltas are limited to about 2 times the length of the original
187 187 version data. This makes retrieval of a version proportional to
188 188 its size, or O(1) relative to the number of revisions.
189 189
190 190 Both pieces of the revlog are written to in an append-only
191 191 fashion, which means we never need to rewrite a file to insert or
192 192 remove data, and can use some simple techniques to avoid the need
193 193 for locking while reading.
194 194 """
195 195 def __init__(self, opener, indexfile):
196 196 """
197 197 create a revlog object
198 198
199 199 opener is a function that abstracts the file opening operation
200 200 and can be used to implement COW semantics or the like.
201 201 """
202 202 self.indexfile = indexfile
203 203 self.datafile = indexfile[:-2] + ".d"
204 204 self.opener = opener
205 205 self._cache = None
206 206 self._basecache = None
207 207 self._chunkcache = (0, '')
208 208 self._chunkcachesize = 65536
209 209 self._maxchainlen = None
210 210 self.index = []
211 211 self._pcache = {}
212 212 self._nodecache = {nullid: nullrev}
213 213 self._nodepos = None
214 214
215 215 v = REVLOG_DEFAULT_VERSION
216 216 opts = getattr(opener, 'options', None)
217 217 if opts is not None:
218 218 if 'revlogv1' in opts:
219 219 if 'generaldelta' in opts:
220 220 v |= REVLOGGENERALDELTA
221 221 else:
222 222 v = 0
223 223 if 'chunkcachesize' in opts:
224 224 self._chunkcachesize = opts['chunkcachesize']
225 225 if 'maxchainlen' in opts:
226 226 self._maxchainlen = opts['maxchainlen']
227 227
228 228 if self._chunkcachesize <= 0:
229 229 raise RevlogError(_('revlog chunk cache size %r is not greater '
230 230 'than 0') % self._chunkcachesize)
231 231 elif self._chunkcachesize & (self._chunkcachesize - 1):
232 232 raise RevlogError(_('revlog chunk cache size %r is not a power '
233 233 'of 2') % self._chunkcachesize)
234 234
235 235 i = ''
236 236 self._initempty = True
237 237 try:
238 238 f = self.opener(self.indexfile)
239 239 i = f.read()
240 240 f.close()
241 241 if len(i) > 0:
242 242 v = struct.unpack(versionformat, i[:4])[0]
243 243 self._initempty = False
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._generaldelta = v & REVLOGGENERALDELTA
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275 # revnum -> (chain-length, sum-delta-length)
276 276 self._chaininfocache = {}
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __contains__(self, rev):
281 281 return 0 <= rev < len(self)
282 282 def __len__(self):
283 283 return len(self.index) - 1
284 284 def __iter__(self):
285 285 return iter(xrange(len(self)))
286 286 def revs(self, start=0, stop=None):
287 287 """iterate over all rev in this revlog (from start to stop)"""
288 288 step = 1
289 289 if stop is not None:
290 290 if start > stop:
291 291 step = -1
292 292 stop += step
293 293 else:
294 294 stop = len(self)
295 295 return xrange(start, stop, step)
296 296
297 297 @util.propertycache
298 298 def nodemap(self):
299 299 self.rev(self.node(0))
300 300 return self._nodecache
301 301
302 302 def hasnode(self, node):
303 303 try:
304 304 self.rev(node)
305 305 return True
306 306 except KeyError:
307 307 return False
308 308
309 309 def clearcaches(self):
310 310 try:
311 311 self._nodecache.clearcaches()
312 312 except AttributeError:
313 313 self._nodecache = {nullid: nullrev}
314 314 self._nodepos = None
315 315
316 316 def rev(self, node):
317 317 try:
318 318 return self._nodecache[node]
319 319 except TypeError:
320 320 raise
321 321 except RevlogError:
322 322 # parsers.c radix tree lookup failed
323 323 raise LookupError(node, self.indexfile, _('no node'))
324 324 except KeyError:
325 325 # pure python cache lookup failed
326 326 n = self._nodecache
327 327 i = self.index
328 328 p = self._nodepos
329 329 if p is None:
330 330 p = len(i) - 2
331 331 for r in xrange(p, -1, -1):
332 332 v = i[r][7]
333 333 n[v] = r
334 334 if v == node:
335 335 self._nodepos = r - 1
336 336 return r
337 337 raise LookupError(node, self.indexfile, _('no node'))
338 338
339 339 def node(self, rev):
340 340 return self.index[rev][7]
341 341 def linkrev(self, rev):
342 342 return self.index[rev][4]
343 343 def parents(self, node):
344 344 i = self.index
345 345 d = i[self.rev(node)]
346 346 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
347 347 def parentrevs(self, rev):
348 348 return self.index[rev][5:7]
349 349 def start(self, rev):
350 350 return int(self.index[rev][0] >> 16)
351 351 def end(self, rev):
352 352 return self.start(rev) + self.length(rev)
353 353 def length(self, rev):
354 354 return self.index[rev][1]
355 355 def chainbase(self, rev):
356 356 index = self.index
357 357 base = index[rev][3]
358 358 while base != rev:
359 359 rev = base
360 360 base = index[rev][3]
361 361 return base
362 362 def chainlen(self, rev):
363 363 return self._chaininfo(rev)[0]
364 364
365 365 def _chaininfo(self, rev):
366 366 chaininfocache = self._chaininfocache
367 367 if rev in chaininfocache:
368 368 return chaininfocache[rev]
369 369 index = self.index
370 370 generaldelta = self._generaldelta
371 371 iterrev = rev
372 372 e = index[iterrev]
373 373 clen = 0
374 374 compresseddeltalen = 0
375 375 while iterrev != e[3]:
376 376 clen += 1
377 377 compresseddeltalen += e[1]
378 378 if generaldelta:
379 379 iterrev = e[3]
380 380 else:
381 381 iterrev -= 1
382 382 if iterrev in chaininfocache:
383 383 t = chaininfocache[iterrev]
384 384 clen += t[0]
385 385 compresseddeltalen += t[1]
386 386 break
387 387 e = index[iterrev]
388 388 else:
389 389 # Add text length of base since decompressing that also takes
390 390 # work. For cache hits the length is already included.
391 391 compresseddeltalen += e[1]
392 392 r = (clen, compresseddeltalen)
393 393 chaininfocache[rev] = r
394 394 return r
395 395
396 396 def flags(self, rev):
397 397 return self.index[rev][0] & 0xFFFF
398 398 def rawsize(self, rev):
399 399 """return the length of the uncompressed text for a given revision"""
400 400 l = self.index[rev][2]
401 401 if l >= 0:
402 402 return l
403 403
404 404 t = self.revision(self.node(rev))
405 405 return len(t)
406 406 size = rawsize
407 407
408 408 def ancestors(self, revs, stoprev=0, inclusive=False):
409 409 """Generate the ancestors of 'revs' in reverse topological order.
410 410 Does not generate revs lower than stoprev.
411 411
412 412 See the documentation for ancestor.lazyancestors for more details."""
413 413
414 414 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
415 415 inclusive=inclusive)
416 416
417 417 def descendants(self, revs):
418 418 """Generate the descendants of 'revs' in revision order.
419 419
420 420 Yield a sequence of revision numbers starting with a child of
421 421 some rev in revs, i.e., each revision is *not* considered a
422 422 descendant of itself. Results are ordered by revision number (a
423 423 topological sort)."""
424 424 first = min(revs)
425 425 if first == nullrev:
426 426 for i in self:
427 427 yield i
428 428 return
429 429
430 430 seen = set(revs)
431 431 for i in self.revs(start=first + 1):
432 432 for x in self.parentrevs(i):
433 433 if x != nullrev and x in seen:
434 434 seen.add(i)
435 435 yield i
436 436 break
437 437
438 438 def findcommonmissing(self, common=None, heads=None):
439 439 """Return a tuple of the ancestors of common and the ancestors of heads
440 440 that are not ancestors of common. In revset terminology, we return the
441 441 tuple:
442 442
443 443 ::common, (::heads) - (::common)
444 444
445 445 The list is sorted by revision number, meaning it is
446 446 topologically sorted.
447 447
448 448 'heads' and 'common' are both lists of node IDs. If heads is
449 449 not supplied, uses all of the revlog's heads. If common is not
450 450 supplied, uses nullid."""
451 451 if common is None:
452 452 common = [nullid]
453 453 if heads is None:
454 454 heads = self.heads()
455 455
456 456 common = [self.rev(n) for n in common]
457 457 heads = [self.rev(n) for n in heads]
458 458
459 459 # we want the ancestors, but inclusive
460 460 class lazyset(object):
461 461 def __init__(self, lazyvalues):
462 462 self.addedvalues = set()
463 463 self.lazyvalues = lazyvalues
464 464
465 465 def __contains__(self, value):
466 466 return value in self.addedvalues or value in self.lazyvalues
467 467
468 468 def __iter__(self):
469 469 added = self.addedvalues
470 470 for r in added:
471 471 yield r
472 472 for r in self.lazyvalues:
473 473 if not r in added:
474 474 yield r
475 475
476 476 def add(self, value):
477 477 self.addedvalues.add(value)
478 478
479 479 def update(self, values):
480 480 self.addedvalues.update(values)
481 481
482 482 has = lazyset(self.ancestors(common))
483 483 has.add(nullrev)
484 484 has.update(common)
485 485
486 486 # take all ancestors from heads that aren't in has
487 487 missing = set()
488 488 visit = util.deque(r for r in heads if r not in has)
489 489 while visit:
490 490 r = visit.popleft()
491 491 if r in missing:
492 492 continue
493 493 else:
494 494 missing.add(r)
495 495 for p in self.parentrevs(r):
496 496 if p not in has:
497 497 visit.append(p)
498 498 missing = list(missing)
499 499 missing.sort()
500 500 return has, [self.node(r) for r in missing]
501 501
502 502 def incrementalmissingrevs(self, common=None):
503 503 """Return an object that can be used to incrementally compute the
504 504 revision numbers of the ancestors of arbitrary sets that are not
505 505 ancestors of common. This is an ancestor.incrementalmissingancestors
506 506 object.
507 507
508 508 'common' is a list of revision numbers. If common is not supplied, uses
509 509 nullrev.
510 510 """
511 511 if common is None:
512 512 common = [nullrev]
513 513
514 514 return ancestor.incrementalmissingancestors(self.parentrevs, common)
515 515
516 516 def findmissingrevs(self, common=None, heads=None):
517 517 """Return the revision numbers of the ancestors of heads that
518 518 are not ancestors of common.
519 519
520 520 More specifically, return a list of revision numbers corresponding to
521 521 nodes N such that every N satisfies the following constraints:
522 522
523 523 1. N is an ancestor of some node in 'heads'
524 524 2. N is not an ancestor of any node in 'common'
525 525
526 526 The list is sorted by revision number, meaning it is
527 527 topologically sorted.
528 528
529 529 'heads' and 'common' are both lists of revision numbers. If heads is
530 530 not supplied, uses all of the revlog's heads. If common is not
531 531 supplied, uses nullid."""
532 532 if common is None:
533 533 common = [nullrev]
534 534 if heads is None:
535 535 heads = self.headrevs()
536 536
537 537 inc = self.incrementalmissingrevs(common=common)
538 538 return inc.missingancestors(heads)
539 539
540 540 def findmissing(self, common=None, heads=None):
541 541 """Return the ancestors of heads that are not ancestors of common.
542 542
543 543 More specifically, return a list of nodes N such that every N
544 544 satisfies the following constraints:
545 545
546 546 1. N is an ancestor of some node in 'heads'
547 547 2. N is not an ancestor of any node in 'common'
548 548
549 549 The list is sorted by revision number, meaning it is
550 550 topologically sorted.
551 551
552 552 'heads' and 'common' are both lists of node IDs. If heads is
553 553 not supplied, uses all of the revlog's heads. If common is not
554 554 supplied, uses nullid."""
555 555 if common is None:
556 556 common = [nullid]
557 557 if heads is None:
558 558 heads = self.heads()
559 559
560 560 common = [self.rev(n) for n in common]
561 561 heads = [self.rev(n) for n in heads]
562 562
563 563 inc = self.incrementalmissingrevs(common=common)
564 564 return [self.node(r) for r in inc.missingancestors(heads)]
565 565
566 566 def nodesbetween(self, roots=None, heads=None):
567 567 """Return a topological path from 'roots' to 'heads'.
568 568
569 569 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
570 570 topologically sorted list of all nodes N that satisfy both of
571 571 these constraints:
572 572
573 573 1. N is a descendant of some node in 'roots'
574 574 2. N is an ancestor of some node in 'heads'
575 575
576 576 Every node is considered to be both a descendant and an ancestor
577 577 of itself, so every reachable node in 'roots' and 'heads' will be
578 578 included in 'nodes'.
579 579
580 580 'outroots' is the list of reachable nodes in 'roots', i.e., the
581 581 subset of 'roots' that is returned in 'nodes'. Likewise,
582 582 'outheads' is the subset of 'heads' that is also in 'nodes'.
583 583
584 584 'roots' and 'heads' are both lists of node IDs. If 'roots' is
585 585 unspecified, uses nullid as the only root. If 'heads' is
586 586 unspecified, uses list of all of the revlog's heads."""
587 587 nonodes = ([], [], [])
588 588 if roots is not None:
589 589 roots = list(roots)
590 590 if not roots:
591 591 return nonodes
592 592 lowestrev = min([self.rev(n) for n in roots])
593 593 else:
594 594 roots = [nullid] # Everybody's a descendant of nullid
595 595 lowestrev = nullrev
596 596 if (lowestrev == nullrev) and (heads is None):
597 597 # We want _all_ the nodes!
598 598 return ([self.node(r) for r in self], [nullid], list(self.heads()))
599 599 if heads is None:
600 600 # All nodes are ancestors, so the latest ancestor is the last
601 601 # node.
602 602 highestrev = len(self) - 1
603 603 # Set ancestors to None to signal that every node is an ancestor.
604 604 ancestors = None
605 605 # Set heads to an empty dictionary for later discovery of heads
606 606 heads = {}
607 607 else:
608 608 heads = list(heads)
609 609 if not heads:
610 610 return nonodes
611 611 ancestors = set()
612 612 # Turn heads into a dictionary so we can remove 'fake' heads.
613 613 # Also, later we will be using it to filter out the heads we can't
614 614 # find from roots.
615 615 heads = dict.fromkeys(heads, False)
616 616 # Start at the top and keep marking parents until we're done.
617 617 nodestotag = set(heads)
618 618 # Remember where the top was so we can use it as a limit later.
619 619 highestrev = max([self.rev(n) for n in nodestotag])
620 620 while nodestotag:
621 621 # grab a node to tag
622 622 n = nodestotag.pop()
623 623 # Never tag nullid
624 624 if n == nullid:
625 625 continue
626 626 # A node's revision number represents its place in a
627 627 # topologically sorted list of nodes.
628 628 r = self.rev(n)
629 629 if r >= lowestrev:
630 630 if n not in ancestors:
631 631 # If we are possibly a descendant of one of the roots
632 632 # and we haven't already been marked as an ancestor
633 633 ancestors.add(n) # Mark as ancestor
634 634 # Add non-nullid parents to list of nodes to tag.
635 635 nodestotag.update([p for p in self.parents(n) if
636 636 p != nullid])
637 637 elif n in heads: # We've seen it before, is it a fake head?
638 638 # So it is, real heads should not be the ancestors of
639 639 # any other heads.
640 640 heads.pop(n)
641 641 if not ancestors:
642 642 return nonodes
643 643 # Now that we have our set of ancestors, we want to remove any
644 644 # roots that are not ancestors.
645 645
646 646 # If one of the roots was nullid, everything is included anyway.
647 647 if lowestrev > nullrev:
648 648 # But, since we weren't, let's recompute the lowest rev to not
649 649 # include roots that aren't ancestors.
650 650
651 651 # Filter out roots that aren't ancestors of heads
652 652 roots = [n for n in roots if n in ancestors]
653 653 # Recompute the lowest revision
654 654 if roots:
655 655 lowestrev = min([self.rev(n) for n in roots])
656 656 else:
657 657 # No more roots? Return empty list
658 658 return nonodes
659 659 else:
660 660 # We are descending from nullid, and don't need to care about
661 661 # any other roots.
662 662 lowestrev = nullrev
663 663 roots = [nullid]
664 664 # Transform our roots list into a set.
665 665 descendants = set(roots)
666 666 # Also, keep the original roots so we can filter out roots that aren't
667 667 # 'real' roots (i.e. are descended from other roots).
668 668 roots = descendants.copy()
669 669 # Our topologically sorted list of output nodes.
670 670 orderedout = []
671 671 # Don't start at nullid since we don't want nullid in our output list,
672 672 # and if nullid shows up in descendants, empty parents will look like
673 673 # they're descendants.
674 674 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
675 675 n = self.node(r)
676 676 isdescendant = False
677 677 if lowestrev == nullrev: # Everybody is a descendant of nullid
678 678 isdescendant = True
679 679 elif n in descendants:
680 680 # n is already a descendant
681 681 isdescendant = True
682 682 # This check only needs to be done here because all the roots
683 683 # will start being marked is descendants before the loop.
684 684 if n in roots:
685 685 # If n was a root, check if it's a 'real' root.
686 686 p = tuple(self.parents(n))
687 687 # If any of its parents are descendants, it's not a root.
688 688 if (p[0] in descendants) or (p[1] in descendants):
689 689 roots.remove(n)
690 690 else:
691 691 p = tuple(self.parents(n))
692 692 # A node is a descendant if either of its parents are
693 693 # descendants. (We seeded the dependents list with the roots
694 694 # up there, remember?)
695 695 if (p[0] in descendants) or (p[1] in descendants):
696 696 descendants.add(n)
697 697 isdescendant = True
698 698 if isdescendant and ((ancestors is None) or (n in ancestors)):
699 699 # Only include nodes that are both descendants and ancestors.
700 700 orderedout.append(n)
701 701 if (ancestors is not None) and (n in heads):
702 702 # We're trying to figure out which heads are reachable
703 703 # from roots.
704 704 # Mark this head as having been reached
705 705 heads[n] = True
706 706 elif ancestors is None:
707 707 # Otherwise, we're trying to discover the heads.
708 708 # Assume this is a head because if it isn't, the next step
709 709 # will eventually remove it.
710 710 heads[n] = True
711 711 # But, obviously its parents aren't.
712 712 for p in self.parents(n):
713 713 heads.pop(p, None)
714 714 heads = [n for n, flag in heads.iteritems() if flag]
715 715 roots = list(roots)
716 716 assert orderedout
717 717 assert roots
718 718 assert heads
719 719 return (orderedout, roots, heads)
720 720
721 721 def headrevs(self):
722 722 try:
723 723 return self.index.headrevs()
724 724 except AttributeError:
725 725 return self._headrevs()
726 726
727 727 def computephases(self, roots):
728 728 return self.index.computephases(roots)
729 729
730 730 def _headrevs(self):
731 731 count = len(self)
732 732 if not count:
733 733 return [nullrev]
734 734 # we won't iter over filtered rev so nobody is a head at start
735 735 ishead = [0] * (count + 1)
736 736 index = self.index
737 737 for r in self:
738 738 ishead[r] = 1 # I may be an head
739 739 e = index[r]
740 740 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
741 741 return [r for r, val in enumerate(ishead) if val]
742 742
743 743 def heads(self, start=None, stop=None):
744 744 """return the list of all nodes that have no children
745 745
746 746 if start is specified, only heads that are descendants of
747 747 start will be returned
748 748 if stop is specified, it will consider all the revs from stop
749 749 as if they had no children
750 750 """
751 751 if start is None and stop is None:
752 752 if not len(self):
753 753 return [nullid]
754 754 return [self.node(r) for r in self.headrevs()]
755 755
756 756 if start is None:
757 757 start = nullid
758 758 if stop is None:
759 759 stop = []
760 760 stoprevs = set([self.rev(n) for n in stop])
761 761 startrev = self.rev(start)
762 762 reachable = set((startrev,))
763 763 heads = set((startrev,))
764 764
765 765 parentrevs = self.parentrevs
766 766 for r in self.revs(start=startrev + 1):
767 767 for p in parentrevs(r):
768 768 if p in reachable:
769 769 if r not in stoprevs:
770 770 reachable.add(r)
771 771 heads.add(r)
772 772 if p in heads and p not in stoprevs:
773 773 heads.remove(p)
774 774
775 775 return [self.node(r) for r in heads]
776 776
777 777 def children(self, node):
778 778 """find the children of a given node"""
779 779 c = []
780 780 p = self.rev(node)
781 781 for r in self.revs(start=p + 1):
782 782 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
783 783 if prevs:
784 784 for pr in prevs:
785 785 if pr == p:
786 786 c.append(self.node(r))
787 787 elif p == nullrev:
788 788 c.append(self.node(r))
789 789 return c
790 790
791 791 def descendant(self, start, end):
792 792 if start == nullrev:
793 793 return True
794 794 for i in self.descendants([start]):
795 795 if i == end:
796 796 return True
797 797 elif i > end:
798 798 break
799 799 return False
800 800
801 801 def commonancestorsheads(self, a, b):
802 802 """calculate all the heads of the common ancestors of nodes a and b"""
803 803 a, b = self.rev(a), self.rev(b)
804 804 try:
805 805 ancs = self.index.commonancestorsheads(a, b)
806 806 except (AttributeError, OverflowError): # C implementation failed
807 807 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
808 808 return map(self.node, ancs)
809 809
810 810 def isancestor(self, a, b):
811 811 """return True if node a is an ancestor of node b
812 812
813 813 The implementation of this is trivial but the use of
814 814 commonancestorsheads is not."""
815 815 return a in self.commonancestorsheads(a, b)
816 816
817 817 def ancestor(self, a, b):
818 818 """calculate the "best" common ancestor of nodes a and b"""
819 819
820 820 a, b = self.rev(a), self.rev(b)
821 821 try:
822 822 ancs = self.index.ancestors(a, b)
823 823 except (AttributeError, OverflowError):
824 824 ancs = ancestor.ancestors(self.parentrevs, a, b)
825 825 if ancs:
826 826 # choose a consistent winner when there's a tie
827 827 return min(map(self.node, ancs))
828 828 return nullid
829 829
830 830 def _match(self, id):
831 831 if isinstance(id, int):
832 832 # rev
833 833 return self.node(id)
834 834 if len(id) == 20:
835 835 # possibly a binary node
836 836 # odds of a binary node being all hex in ASCII are 1 in 10**25
837 837 try:
838 838 node = id
839 839 self.rev(node) # quick search the index
840 840 return node
841 841 except LookupError:
842 842 pass # may be partial hex id
843 843 try:
844 844 # str(rev)
845 845 rev = int(id)
846 846 if str(rev) != id:
847 847 raise ValueError
848 848 if rev < 0:
849 849 rev = len(self) + rev
850 850 if rev < 0 or rev >= len(self):
851 851 raise ValueError
852 852 return self.node(rev)
853 853 except (ValueError, OverflowError):
854 854 pass
855 855 if len(id) == 40:
856 856 try:
857 857 # a full hex nodeid?
858 858 node = bin(id)
859 859 self.rev(node)
860 860 return node
861 861 except (TypeError, LookupError):
862 862 pass
863 863
864 864 def _partialmatch(self, id):
865 865 try:
866 866 n = self.index.partialmatch(id)
867 867 if n and self.hasnode(n):
868 868 return n
869 869 return None
870 870 except RevlogError:
871 871 # parsers.c radix tree lookup gave multiple matches
872 872 # fall through to slow path that filters hidden revisions
873 873 pass
874 874 except (AttributeError, ValueError):
875 875 # we are pure python, or key was too short to search radix tree
876 876 pass
877 877
878 878 if id in self._pcache:
879 879 return self._pcache[id]
880 880
881 881 if len(id) < 40:
882 882 try:
883 883 # hex(node)[:...]
884 884 l = len(id) // 2 # grab an even number of digits
885 885 prefix = bin(id[:l * 2])
886 886 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
887 887 nl = [n for n in nl if hex(n).startswith(id) and
888 888 self.hasnode(n)]
889 889 if len(nl) > 0:
890 890 if len(nl) == 1:
891 891 self._pcache[id] = nl[0]
892 892 return nl[0]
893 893 raise LookupError(id, self.indexfile,
894 894 _('ambiguous identifier'))
895 895 return None
896 896 except TypeError:
897 897 pass
898 898
899 899 def lookup(self, id):
900 900 """locate a node based on:
901 901 - revision number or str(revision number)
902 902 - nodeid or subset of hex nodeid
903 903 """
904 904 n = self._match(id)
905 905 if n is not None:
906 906 return n
907 907 n = self._partialmatch(id)
908 908 if n:
909 909 return n
910 910
911 911 raise LookupError(id, self.indexfile, _('no match found'))
912 912
913 913 def cmp(self, node, text):
914 914 """compare text with a given file revision
915 915
916 916 returns True if text is different than what is stored.
917 917 """
918 918 p1, p2 = self.parents(node)
919 919 return hash(text, p1, p2) != node
920 920
921 921 def _addchunk(self, offset, data):
922 922 o, d = self._chunkcache
923 923 # try to add to existing cache
924 924 if o + len(d) == offset and len(d) + len(data) < _chunksize:
925 925 self._chunkcache = o, d + data
926 926 else:
927 927 self._chunkcache = offset, data
928 928
929 929 def _loadchunk(self, offset, length):
930 930 if self._inline:
931 931 df = self.opener(self.indexfile)
932 932 else:
933 933 df = self.opener(self.datafile)
934 934
935 935 # Cache data both forward and backward around the requested
936 936 # data, in a fixed size window. This helps speed up operations
937 937 # involving reading the revlog backwards.
938 938 cachesize = self._chunkcachesize
939 939 realoffset = offset & ~(cachesize - 1)
940 940 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
941 941 - realoffset)
942 942 df.seek(realoffset)
943 943 d = df.read(reallength)
944 944 df.close()
945 945 self._addchunk(realoffset, d)
946 946 if offset != realoffset or reallength != length:
947 947 return util.buffer(d, offset - realoffset, length)
948 948 return d
949 949
950 950 def _getchunk(self, offset, length):
951 951 o, d = self._chunkcache
952 952 l = len(d)
953 953
954 954 # is it in the cache?
955 955 cachestart = offset - o
956 956 cacheend = cachestart + length
957 957 if cachestart >= 0 and cacheend <= l:
958 958 if cachestart == 0 and cacheend == l:
959 959 return d # avoid a copy
960 960 return util.buffer(d, cachestart, cacheend - cachestart)
961 961
962 962 return self._loadchunk(offset, length)
963 963
964 964 def _chunkraw(self, startrev, endrev):
965 965 start = self.start(startrev)
966 966 end = self.end(endrev)
967 967 if self._inline:
968 968 start += (startrev + 1) * self._io.size
969 969 end += (endrev + 1) * self._io.size
970 970 length = end - start
971 971 return self._getchunk(start, length)
972 972
973 973 def _chunk(self, rev):
974 974 return decompress(self._chunkraw(rev, rev))
975 975
976 976 def _chunks(self, revs):
977 977 '''faster version of [self._chunk(rev) for rev in revs]
978 978
979 979 Assumes that revs is in ascending order.'''
980 980 if not revs:
981 981 return []
982 982 start = self.start
983 983 length = self.length
984 984 inline = self._inline
985 985 iosize = self._io.size
986 986 buffer = util.buffer
987 987
988 988 l = []
989 989 ladd = l.append
990 990
991 991 # preload the cache
992 992 try:
993 993 while True:
994 994 # ensure that the cache doesn't change out from under us
995 995 _cache = self._chunkcache
996 996 self._chunkraw(revs[0], revs[-1])
997 997 if _cache == self._chunkcache:
998 998 break
999 999 offset, data = _cache
1000 1000 except OverflowError:
1001 1001 # issue4215 - we can't cache a run of chunks greater than
1002 1002 # 2G on Windows
1003 1003 return [self._chunk(rev) for rev in revs]
1004 1004
1005 1005 for rev in revs:
1006 1006 chunkstart = start(rev)
1007 1007 if inline:
1008 1008 chunkstart += (rev + 1) * iosize
1009 1009 chunklength = length(rev)
1010 1010 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1011 1011
1012 1012 return l
1013 1013
1014 1014 def _chunkclear(self):
1015 1015 self._chunkcache = (0, '')
1016 1016
1017 1017 def deltaparent(self, rev):
1018 1018 """return deltaparent of the given revision"""
1019 1019 base = self.index[rev][3]
1020 1020 if base == rev:
1021 1021 return nullrev
1022 1022 elif self._generaldelta:
1023 1023 return base
1024 1024 else:
1025 1025 return rev - 1
1026 1026
1027 1027 def revdiff(self, rev1, rev2):
1028 1028 """return or calculate a delta between two revisions"""
1029 1029 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1030 1030 return str(self._chunk(rev2))
1031 1031
1032 1032 return mdiff.textdiff(self.revision(rev1),
1033 1033 self.revision(rev2))
1034 1034
1035 1035 def revision(self, nodeorrev):
1036 1036 """return an uncompressed revision of a given node or revision
1037 1037 number.
1038 1038 """
1039 1039 if isinstance(nodeorrev, int):
1040 1040 rev = nodeorrev
1041 1041 node = self.node(rev)
1042 1042 else:
1043 1043 node = nodeorrev
1044 1044 rev = None
1045 1045
1046 1046 _cache = self._cache # grab local copy of cache to avoid thread race
1047 1047 cachedrev = None
1048 1048 if node == nullid:
1049 1049 return ""
1050 1050 if _cache:
1051 1051 if _cache[0] == node:
1052 1052 return _cache[2]
1053 1053 cachedrev = _cache[1]
1054 1054
1055 1055 # look up what we need to read
1056 1056 text = None
1057 1057 if rev is None:
1058 1058 rev = self.rev(node)
1059 1059
1060 1060 # check rev flags
1061 1061 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1062 1062 raise RevlogError(_('incompatible revision flag %x') %
1063 1063 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1064 1064
1065 1065 # build delta chain
1066 1066 chain = []
1067 1067 index = self.index # for performance
1068 1068 generaldelta = self._generaldelta
1069 1069 iterrev = rev
1070 1070 e = index[iterrev]
1071 1071 while iterrev != e[3] and iterrev != cachedrev:
1072 1072 chain.append(iterrev)
1073 1073 if generaldelta:
1074 1074 iterrev = e[3]
1075 1075 else:
1076 1076 iterrev -= 1
1077 1077 e = index[iterrev]
1078 1078
1079 1079 if iterrev == cachedrev:
1080 1080 # cache hit
1081 1081 text = _cache[2]
1082 1082 else:
1083 1083 chain.append(iterrev)
1084 1084 chain.reverse()
1085 1085
1086 1086 # drop cache to save memory
1087 1087 self._cache = None
1088 1088
1089 1089 bins = self._chunks(chain)
1090 1090 if text is None:
1091 1091 text = str(bins[0])
1092 1092 bins = bins[1:]
1093 1093
1094 1094 text = mdiff.patches(text, bins)
1095 1095
1096 1096 text = self._checkhash(text, node, rev)
1097 1097
1098 1098 self._cache = (node, rev, text)
1099 1099 return text
1100 1100
1101 1101 def hash(self, text, p1, p2):
1102 1102 """Compute a node hash.
1103 1103
1104 1104 Available as a function so that subclasses can replace the hash
1105 1105 as needed.
1106 1106 """
1107 1107 return hash(text, p1, p2)
1108 1108
1109 1109 def _checkhash(self, text, node, rev):
1110 1110 p1, p2 = self.parents(node)
1111 1111 self.checkhash(text, p1, p2, node, rev)
1112 1112 return text
1113 1113
1114 1114 def checkhash(self, text, p1, p2, node, rev=None):
1115 1115 if node != self.hash(text, p1, p2):
1116 1116 revornode = rev
1117 1117 if revornode is None:
1118 1118 revornode = templatefilters.short(hex(node))
1119 1119 raise RevlogError(_("integrity check failed on %s:%s")
1120 1120 % (self.indexfile, revornode))
1121 1121
1122 1122 def checkinlinesize(self, tr, fp=None):
1123 1123 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1124 1124 return
1125 1125
1126 1126 trinfo = tr.find(self.indexfile)
1127 1127 if trinfo is None:
1128 1128 raise RevlogError(_("%s not found in the transaction")
1129 1129 % self.indexfile)
1130 1130
1131 1131 trindex = trinfo[2]
1132 dataoff = self.start(trindex)
1132 if trindex is not None:
1133 dataoff = self.start(trindex)
1134 else:
1135 # revlog was stripped at start of transaction, use all leftover data
1136 trindex = len(self) - 1
1137 dataoff = self.end(-2)
1133 1138
1134 1139 tr.add(self.datafile, dataoff)
1135 1140
1136 1141 if fp:
1137 1142 fp.flush()
1138 1143 fp.close()
1139 1144
1140 1145 df = self.opener(self.datafile, 'w')
1141 1146 try:
1142 1147 for r in self:
1143 1148 df.write(self._chunkraw(r, r))
1144 1149 finally:
1145 1150 df.close()
1146 1151
1147 1152 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1148 1153 self.version &= ~(REVLOGNGINLINEDATA)
1149 1154 self._inline = False
1150 1155 for i in self:
1151 1156 e = self._io.packentry(self.index[i], self.node, self.version, i)
1152 1157 fp.write(e)
1153 1158
1154 1159 # if we don't call close, the temp file will never replace the
1155 1160 # real index
1156 1161 fp.close()
1157 1162
1158 1163 tr.replace(self.indexfile, trindex * self._io.size)
1159 1164 self._chunkclear()
1160 1165
1161 1166 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1162 1167 node=None):
1163 1168 """add a revision to the log
1164 1169
1165 1170 text - the revision data to add
1166 1171 transaction - the transaction object used for rollback
1167 1172 link - the linkrev data to add
1168 1173 p1, p2 - the parent nodeids of the revision
1169 1174 cachedelta - an optional precomputed delta
1170 1175 node - nodeid of revision; typically node is not specified, and it is
1171 1176 computed by default as hash(text, p1, p2), however subclasses might
1172 1177 use different hashing method (and override checkhash() in such case)
1173 1178 """
1174 1179 if link == nullrev:
1175 1180 raise RevlogError(_("attempted to add linkrev -1 to %s")
1176 1181 % self.indexfile)
1177 1182 node = node or self.hash(text, p1, p2)
1178 1183 if node in self.nodemap:
1179 1184 return node
1180 1185
1181 1186 dfh = None
1182 1187 if not self._inline:
1183 1188 dfh = self.opener(self.datafile, "a")
1184 1189 ifh = self.opener(self.indexfile, "a+")
1185 1190 try:
1186 1191 return self._addrevision(node, text, transaction, link, p1, p2,
1187 1192 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1188 1193 finally:
1189 1194 if dfh:
1190 1195 dfh.close()
1191 1196 ifh.close()
1192 1197
1193 1198 def compress(self, text):
1194 1199 """ generate a possibly-compressed representation of text """
1195 1200 if not text:
1196 1201 return ("", text)
1197 1202 l = len(text)
1198 1203 bin = None
1199 1204 if l < 44:
1200 1205 pass
1201 1206 elif l > 1000000:
1202 1207 # zlib makes an internal copy, thus doubling memory usage for
1203 1208 # large files, so lets do this in pieces
1204 1209 z = zlib.compressobj()
1205 1210 p = []
1206 1211 pos = 0
1207 1212 while pos < l:
1208 1213 pos2 = pos + 2**20
1209 1214 p.append(z.compress(text[pos:pos2]))
1210 1215 pos = pos2
1211 1216 p.append(z.flush())
1212 1217 if sum(map(len, p)) < l:
1213 1218 bin = "".join(p)
1214 1219 else:
1215 1220 bin = _compress(text)
1216 1221 if bin is None or len(bin) > l:
1217 1222 if text[0] == '\0':
1218 1223 return ("", text)
1219 1224 return ('u', text)
1220 1225 return ("", bin)
1221 1226
1222 1227 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1223 1228 cachedelta, ifh, dfh):
1224 1229 """internal function to add revisions to the log
1225 1230
1226 1231 see addrevision for argument descriptions.
1227 1232 invariants:
1228 1233 - text is optional (can be None); if not set, cachedelta must be set.
1229 1234 if both are set, they must correspond to each other.
1230 1235 """
1231 1236 btext = [text]
1232 1237 def buildtext():
1233 1238 if btext[0] is not None:
1234 1239 return btext[0]
1235 1240 # flush any pending writes here so we can read it in revision
1236 1241 if dfh:
1237 1242 dfh.flush()
1238 1243 ifh.flush()
1239 1244 baserev = cachedelta[0]
1240 1245 delta = cachedelta[1]
1241 1246 # special case deltas which replace entire base; no need to decode
1242 1247 # base revision. this neatly avoids censored bases, which throw when
1243 1248 # they're decoded.
1244 1249 hlen = struct.calcsize(">lll")
1245 1250 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1246 1251 len(delta) - hlen):
1247 1252 btext[0] = delta[hlen:]
1248 1253 else:
1249 1254 basetext = self.revision(self.node(baserev))
1250 1255 btext[0] = mdiff.patch(basetext, delta)
1251 1256 try:
1252 1257 self.checkhash(btext[0], p1, p2, node)
1253 1258 if flags & REVIDX_ISCENSORED:
1254 1259 raise RevlogError(_('node %s is not censored') % node)
1255 1260 except CensoredNodeError:
1256 1261 # must pass the censored index flag to add censored revisions
1257 1262 if not flags & REVIDX_ISCENSORED:
1258 1263 raise
1259 1264 return btext[0]
1260 1265
1261 1266 def builddelta(rev):
1262 1267 # can we use the cached delta?
1263 1268 if cachedelta and cachedelta[0] == rev:
1264 1269 delta = cachedelta[1]
1265 1270 else:
1266 1271 t = buildtext()
1267 1272 if self.iscensored(rev):
1268 1273 # deltas based on a censored revision must replace the
1269 1274 # full content in one patch, so delta works everywhere
1270 1275 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1271 1276 delta = header + t
1272 1277 else:
1273 1278 ptext = self.revision(self.node(rev))
1274 1279 delta = mdiff.textdiff(ptext, t)
1275 1280 data = self.compress(delta)
1276 1281 l = len(data[1]) + len(data[0])
1277 1282 if basecache[0] == rev:
1278 1283 chainbase = basecache[1]
1279 1284 else:
1280 1285 chainbase = self.chainbase(rev)
1281 1286 dist = l + offset - self.start(chainbase)
1282 1287 if self._generaldelta:
1283 1288 base = rev
1284 1289 else:
1285 1290 base = chainbase
1286 1291 chainlen, compresseddeltalen = self._chaininfo(rev)
1287 1292 chainlen += 1
1288 1293 compresseddeltalen += l
1289 1294 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1290 1295
1291 1296 curr = len(self)
1292 1297 prev = curr - 1
1293 1298 base = chainbase = curr
1294 1299 chainlen = None
1295 1300 offset = self.end(prev)
1296 1301 d = None
1297 1302 if self._basecache is None:
1298 1303 self._basecache = (prev, self.chainbase(prev))
1299 1304 basecache = self._basecache
1300 1305 p1r, p2r = self.rev(p1), self.rev(p2)
1301 1306
1302 1307 # should we try to build a delta?
1303 1308 if prev != nullrev:
1304 1309 if self._generaldelta:
1305 1310 if p1r >= basecache[1]:
1306 1311 d = builddelta(p1r)
1307 1312 elif p2r >= basecache[1]:
1308 1313 d = builddelta(p2r)
1309 1314 else:
1310 1315 d = builddelta(prev)
1311 1316 else:
1312 1317 d = builddelta(prev)
1313 1318 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1314 1319
1315 1320 # full versions are inserted when the needed deltas
1316 1321 # become comparable to the uncompressed text
1317 1322 if text is None:
1318 1323 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1319 1324 cachedelta[1])
1320 1325 else:
1321 1326 textlen = len(text)
1322 1327
1323 1328 # - 'dist' is the distance from the base revision -- bounding it limits
1324 1329 # the amount of I/O we need to do.
1325 1330 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1326 1331 # to apply -- bounding it limits the amount of CPU we consume.
1327 1332 if (d is None or dist > textlen * 4 or l > textlen or
1328 1333 compresseddeltalen > textlen * 2 or
1329 1334 (self._maxchainlen and chainlen > self._maxchainlen)):
1330 1335 text = buildtext()
1331 1336 data = self.compress(text)
1332 1337 l = len(data[1]) + len(data[0])
1333 1338 base = chainbase = curr
1334 1339
1335 1340 e = (offset_type(offset, flags), l, textlen,
1336 1341 base, link, p1r, p2r, node)
1337 1342 self.index.insert(-1, e)
1338 1343 self.nodemap[node] = curr
1339 1344
1340 1345 entry = self._io.packentry(e, self.node, self.version, curr)
1341 1346 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1342 1347
1343 1348 if type(text) == str: # only accept immutable objects
1344 1349 self._cache = (node, curr, text)
1345 1350 self._basecache = (curr, chainbase)
1346 1351 return node
1347 1352
1348 1353 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1349 1354 curr = len(self) - 1
1350 1355 if not self._inline:
1351 1356 transaction.add(self.datafile, offset)
1352 1357 transaction.add(self.indexfile, curr * len(entry))
1353 1358 if data[0]:
1354 1359 dfh.write(data[0])
1355 1360 dfh.write(data[1])
1356 1361 dfh.flush()
1357 1362 ifh.write(entry)
1358 1363 else:
1359 1364 offset += curr * self._io.size
1360 1365 transaction.add(self.indexfile, offset, curr)
1361 1366 ifh.write(entry)
1362 1367 ifh.write(data[0])
1363 1368 ifh.write(data[1])
1364 1369 self.checkinlinesize(transaction, ifh)
1365 1370
1366 1371 def addgroup(self, bundle, linkmapper, transaction):
1367 1372 """
1368 1373 add a delta group
1369 1374
1370 1375 given a set of deltas, add them to the revision log. the
1371 1376 first delta is against its parent, which should be in our
1372 1377 log, the rest are against the previous delta.
1373 1378 """
1374 1379
1375 1380 # track the base of the current delta log
1376 1381 content = []
1377 1382 node = None
1378 1383
1379 1384 r = len(self)
1380 1385 end = 0
1381 1386 if r:
1382 1387 end = self.end(r - 1)
1383 1388 ifh = self.opener(self.indexfile, "a+")
1384 1389 isize = r * self._io.size
1385 1390 if self._inline:
1386 1391 transaction.add(self.indexfile, end + isize, r)
1387 1392 dfh = None
1388 1393 else:
1389 1394 transaction.add(self.indexfile, isize, r)
1390 1395 transaction.add(self.datafile, end)
1391 1396 dfh = self.opener(self.datafile, "a")
1392 1397 def flush():
1393 1398 if dfh:
1394 1399 dfh.flush()
1395 1400 ifh.flush()
1396 1401 try:
1397 1402 # loop through our set of deltas
1398 1403 chain = None
1399 1404 while True:
1400 1405 chunkdata = bundle.deltachunk(chain)
1401 1406 if not chunkdata:
1402 1407 break
1403 1408 node = chunkdata['node']
1404 1409 p1 = chunkdata['p1']
1405 1410 p2 = chunkdata['p2']
1406 1411 cs = chunkdata['cs']
1407 1412 deltabase = chunkdata['deltabase']
1408 1413 delta = chunkdata['delta']
1409 1414
1410 1415 content.append(node)
1411 1416
1412 1417 link = linkmapper(cs)
1413 1418 if node in self.nodemap:
1414 1419 # this can happen if two branches make the same change
1415 1420 chain = node
1416 1421 continue
1417 1422
1418 1423 for p in (p1, p2):
1419 1424 if p not in self.nodemap:
1420 1425 raise LookupError(p, self.indexfile,
1421 1426 _('unknown parent'))
1422 1427
1423 1428 if deltabase not in self.nodemap:
1424 1429 raise LookupError(deltabase, self.indexfile,
1425 1430 _('unknown delta base'))
1426 1431
1427 1432 baserev = self.rev(deltabase)
1428 1433
1429 1434 if baserev != nullrev and self.iscensored(baserev):
1430 1435 # if base is censored, delta must be full replacement in a
1431 1436 # single patch operation
1432 1437 hlen = struct.calcsize(">lll")
1433 1438 oldlen = self.rawsize(baserev)
1434 1439 newlen = len(delta) - hlen
1435 1440 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1436 1441 raise error.CensoredBaseError(self.indexfile,
1437 1442 self.node(baserev))
1438 1443
1439 1444 flags = REVIDX_DEFAULT_FLAGS
1440 1445 if self._peek_iscensored(baserev, delta, flush):
1441 1446 flags |= REVIDX_ISCENSORED
1442 1447
1443 1448 chain = self._addrevision(node, None, transaction, link,
1444 1449 p1, p2, flags, (baserev, delta),
1445 1450 ifh, dfh)
1446 1451 if not dfh and not self._inline:
1447 1452 # addrevision switched from inline to conventional
1448 1453 # reopen the index
1449 1454 ifh.close()
1450 1455 dfh = self.opener(self.datafile, "a")
1451 1456 ifh = self.opener(self.indexfile, "a")
1452 1457 finally:
1453 1458 if dfh:
1454 1459 dfh.close()
1455 1460 ifh.close()
1456 1461
1457 1462 return content
1458 1463
1459 1464 def iscensored(self, rev):
1460 1465 """Check if a file revision is censored."""
1461 1466 return False
1462 1467
1463 1468 def _peek_iscensored(self, baserev, delta, flush):
1464 1469 """Quickly check if a delta produces a censored revision."""
1465 1470 return False
1466 1471
1467 1472 def getstrippoint(self, minlink):
1468 1473 """find the minimum rev that must be stripped to strip the linkrev
1469 1474
1470 1475 Returns a tuple containing the minimum rev and a set of all revs that
1471 1476 have linkrevs that will be broken by this strip.
1472 1477 """
1473 1478 brokenrevs = set()
1474 1479 strippoint = len(self)
1475 1480
1476 1481 heads = {}
1477 1482 futurelargelinkrevs = set()
1478 1483 for head in self.headrevs():
1479 1484 headlinkrev = self.linkrev(head)
1480 1485 heads[head] = headlinkrev
1481 1486 if headlinkrev >= minlink:
1482 1487 futurelargelinkrevs.add(headlinkrev)
1483 1488
1484 1489 # This algorithm involves walking down the rev graph, starting at the
1485 1490 # heads. Since the revs are topologically sorted according to linkrev,
1486 1491 # once all head linkrevs are below the minlink, we know there are
1487 1492 # no more revs that could have a linkrev greater than minlink.
1488 1493 # So we can stop walking.
1489 1494 while futurelargelinkrevs:
1490 1495 strippoint -= 1
1491 1496 linkrev = heads.pop(strippoint)
1492 1497
1493 1498 if linkrev < minlink:
1494 1499 brokenrevs.add(strippoint)
1495 1500 else:
1496 1501 futurelargelinkrevs.remove(linkrev)
1497 1502
1498 1503 for p in self.parentrevs(strippoint):
1499 1504 if p != nullrev:
1500 1505 plinkrev = self.linkrev(p)
1501 1506 heads[p] = plinkrev
1502 1507 if plinkrev >= minlink:
1503 1508 futurelargelinkrevs.add(plinkrev)
1504 1509
1505 1510 return strippoint, brokenrevs
1506 1511
1507 1512 def strip(self, minlink, transaction):
1508 1513 """truncate the revlog on the first revision with a linkrev >= minlink
1509 1514
1510 1515 This function is called when we're stripping revision minlink and
1511 1516 its descendants from the repository.
1512 1517
1513 1518 We have to remove all revisions with linkrev >= minlink, because
1514 1519 the equivalent changelog revisions will be renumbered after the
1515 1520 strip.
1516 1521
1517 1522 So we truncate the revlog on the first of these revisions, and
1518 1523 trust that the caller has saved the revisions that shouldn't be
1519 1524 removed and that it'll re-add them after this truncation.
1520 1525 """
1521 1526 if len(self) == 0:
1522 1527 return
1523 1528
1524 1529 rev, _ = self.getstrippoint(minlink)
1525 1530 if rev == len(self):
1526 1531 return
1527 1532
1528 1533 # first truncate the files on disk
1529 1534 end = self.start(rev)
1530 1535 if not self._inline:
1531 1536 transaction.add(self.datafile, end)
1532 1537 end = rev * self._io.size
1533 1538 else:
1534 1539 end += rev * self._io.size
1535 1540
1536 1541 transaction.add(self.indexfile, end)
1537 1542
1538 1543 # then reset internal state in memory to forget those revisions
1539 1544 self._cache = None
1540 1545 self._chaininfocache = {}
1541 1546 self._chunkclear()
1542 1547 for x in xrange(rev, len(self)):
1543 1548 del self.nodemap[self.node(x)]
1544 1549
1545 1550 del self.index[rev:-1]
1546 1551
1547 1552 def checksize(self):
1548 1553 expected = 0
1549 1554 if len(self):
1550 1555 expected = max(0, self.end(len(self) - 1))
1551 1556
1552 1557 try:
1553 1558 f = self.opener(self.datafile)
1554 1559 f.seek(0, 2)
1555 1560 actual = f.tell()
1556 1561 f.close()
1557 1562 dd = actual - expected
1558 1563 except IOError, inst:
1559 1564 if inst.errno != errno.ENOENT:
1560 1565 raise
1561 1566 dd = 0
1562 1567
1563 1568 try:
1564 1569 f = self.opener(self.indexfile)
1565 1570 f.seek(0, 2)
1566 1571 actual = f.tell()
1567 1572 f.close()
1568 1573 s = self._io.size
1569 1574 i = max(0, actual // s)
1570 1575 di = actual - (i * s)
1571 1576 if self._inline:
1572 1577 databytes = 0
1573 1578 for r in self:
1574 1579 databytes += max(0, self.length(r))
1575 1580 dd = 0
1576 1581 di = actual - len(self) * s - databytes
1577 1582 except IOError, inst:
1578 1583 if inst.errno != errno.ENOENT:
1579 1584 raise
1580 1585 di = 0
1581 1586
1582 1587 return (dd, di)
1583 1588
1584 1589 def files(self):
1585 1590 res = [self.indexfile]
1586 1591 if not self._inline:
1587 1592 res.append(self.datafile)
1588 1593 return res
General Comments 0
You need to be logged in to leave comments. Login now