##// END OF EJS Templates
phases: fix bug where native phase computation wasn't called...
Laurent Charignon -
r25361:1635579f default
parent child Browse files
Show More
@@ -1,1594 +1,1594 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error, e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 32 bytes: parent 1 nodeid
104 104 # 32 bytes: parent 2 nodeid
105 105 # 32 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107 v0shaoffset = 56
108 108
109 109 class revlogoldio(object):
110 110 def __init__(self):
111 111 self.size = struct.calcsize(indexformatv0)
112 112
113 113 def parseindex(self, data, inline):
114 114 s = self.size
115 115 index = []
116 116 nodemap = {nullid: nullrev}
117 117 n = off = 0
118 118 l = len(data)
119 119 while off + s <= l:
120 120 cur = data[off:off + s]
121 121 off += s
122 122 e = _unpack(indexformatv0, cur)
123 123 # transform to revlogv1 format
124 124 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
125 125 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
126 126 index.append(e2)
127 127 nodemap[e[6]] = n
128 128 n += 1
129 129
130 130 # add the magic null revision at -1
131 131 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
132 132
133 133 return index, nodemap, None
134 134
135 135 def packentry(self, entry, node, version, rev):
136 136 if gettype(entry[0]):
137 137 raise RevlogError(_("index entry flags need RevlogNG"))
138 138 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
139 139 node(entry[5]), node(entry[6]), entry[7])
140 140 return _pack(indexformatv0, *e2)
141 141
142 142 # index ng:
143 143 # 6 bytes: offset
144 144 # 2 bytes: flags
145 145 # 4 bytes: compressed length
146 146 # 4 bytes: uncompressed length
147 147 # 4 bytes: base rev
148 148 # 4 bytes: link rev
149 149 # 4 bytes: parent 1 rev
150 150 # 4 bytes: parent 2 rev
151 151 # 32 bytes: nodeid
152 152 indexformatng = ">Qiiiiii20s12x"
153 153 ngshaoffset = 32
154 154 versionformat = ">I"
155 155
156 156 class revlogio(object):
157 157 def __init__(self):
158 158 self.size = struct.calcsize(indexformatng)
159 159
160 160 def parseindex(self, data, inline):
161 161 # call the C implementation to parse the index data
162 162 index, cache = parsers.parse_index2(data, inline)
163 163 return index, getattr(index, 'nodemap', None), cache
164 164
165 165 def packentry(self, entry, node, version, rev):
166 166 p = _pack(indexformatng, *entry)
167 167 if rev == 0:
168 168 p = _pack(versionformat, version) + p[4:]
169 169 return p
170 170
171 171 class revlog(object):
172 172 """
173 173 the underlying revision storage object
174 174
175 175 A revlog consists of two parts, an index and the revision data.
176 176
177 177 The index is a file with a fixed record size containing
178 178 information on each revision, including its nodeid (hash), the
179 179 nodeids of its parents, the position and offset of its data within
180 180 the data file, and the revision it's based on. Finally, each entry
181 181 contains a linkrev entry that can serve as a pointer to external
182 182 data.
183 183
184 184 The revision data itself is a linear collection of data chunks.
185 185 Each chunk represents a revision and is usually represented as a
186 186 delta against the previous chunk. To bound lookup time, runs of
187 187 deltas are limited to about 2 times the length of the original
188 188 version data. This makes retrieval of a version proportional to
189 189 its size, or O(1) relative to the number of revisions.
190 190
191 191 Both pieces of the revlog are written to in an append-only
192 192 fashion, which means we never need to rewrite a file to insert or
193 193 remove data, and can use some simple techniques to avoid the need
194 194 for locking while reading.
195 195 """
196 196 def __init__(self, opener, indexfile):
197 197 """
198 198 create a revlog object
199 199
200 200 opener is a function that abstracts the file opening operation
201 201 and can be used to implement COW semantics or the like.
202 202 """
203 203 self.indexfile = indexfile
204 204 self.datafile = indexfile[:-2] + ".d"
205 205 self.opener = opener
206 206 self._cache = None
207 207 self._basecache = None
208 208 self._chunkcache = (0, '')
209 209 self._chunkcachesize = 65536
210 210 self._maxchainlen = None
211 211 self.index = []
212 212 self._pcache = {}
213 213 self._nodecache = {nullid: nullrev}
214 214 self._nodepos = None
215 215
216 216 v = REVLOG_DEFAULT_VERSION
217 217 opts = getattr(opener, 'options', None)
218 218 if opts is not None:
219 219 if 'revlogv1' in opts:
220 220 if 'generaldelta' in opts:
221 221 v |= REVLOGGENERALDELTA
222 222 else:
223 223 v = 0
224 224 if 'chunkcachesize' in opts:
225 225 self._chunkcachesize = opts['chunkcachesize']
226 226 if 'maxchainlen' in opts:
227 227 self._maxchainlen = opts['maxchainlen']
228 228
229 229 if self._chunkcachesize <= 0:
230 230 raise RevlogError(_('revlog chunk cache size %r is not greater '
231 231 'than 0') % self._chunkcachesize)
232 232 elif self._chunkcachesize & (self._chunkcachesize - 1):
233 233 raise RevlogError(_('revlog chunk cache size %r is not a power '
234 234 'of 2') % self._chunkcachesize)
235 235
236 236 i = ''
237 237 self._initempty = True
238 238 try:
239 239 f = self.opener(self.indexfile)
240 240 i = f.read()
241 241 f.close()
242 242 if len(i) > 0:
243 243 v = struct.unpack(versionformat, i[:4])[0]
244 244 self._initempty = False
245 245 except IOError, inst:
246 246 if inst.errno != errno.ENOENT:
247 247 raise
248 248
249 249 self.version = v
250 250 self._inline = v & REVLOGNGINLINEDATA
251 251 self._generaldelta = v & REVLOGGENERALDELTA
252 252 flags = v & ~0xFFFF
253 253 fmt = v & 0xFFFF
254 254 if fmt == REVLOGV0 and flags:
255 255 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 256 % (self.indexfile, flags >> 16))
257 257 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 258 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 259 % (self.indexfile, flags >> 16))
260 260 elif fmt > REVLOGNG:
261 261 raise RevlogError(_("index %s unknown format %d")
262 262 % (self.indexfile, fmt))
263 263
264 264 self._io = revlogio()
265 265 if self.version == REVLOGV0:
266 266 self._io = revlogoldio()
267 267 try:
268 268 d = self._io.parseindex(i, self._inline)
269 269 except (ValueError, IndexError):
270 270 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 271 self.index, nodemap, self._chunkcache = d
272 272 if nodemap is not None:
273 273 self.nodemap = self._nodecache = nodemap
274 274 if not self._chunkcache:
275 275 self._chunkclear()
276 276 # revnum -> (chain-length, sum-delta-length)
277 277 self._chaininfocache = {}
278 278
279 279 def tip(self):
280 280 return self.node(len(self.index) - 2)
281 281 def __contains__(self, rev):
282 282 return 0 <= rev < len(self)
283 283 def __len__(self):
284 284 return len(self.index) - 1
285 285 def __iter__(self):
286 286 return iter(xrange(len(self)))
287 287 def revs(self, start=0, stop=None):
288 288 """iterate over all rev in this revlog (from start to stop)"""
289 289 step = 1
290 290 if stop is not None:
291 291 if start > stop:
292 292 step = -1
293 293 stop += step
294 294 else:
295 295 stop = len(self)
296 296 return xrange(start, stop, step)
297 297
298 298 @util.propertycache
299 299 def nodemap(self):
300 300 self.rev(self.node(0))
301 301 return self._nodecache
302 302
303 303 def hasnode(self, node):
304 304 try:
305 305 self.rev(node)
306 306 return True
307 307 except KeyError:
308 308 return False
309 309
310 310 def clearcaches(self):
311 311 try:
312 312 self._nodecache.clearcaches()
313 313 except AttributeError:
314 314 self._nodecache = {nullid: nullrev}
315 315 self._nodepos = None
316 316
317 317 def rev(self, node):
318 318 try:
319 319 return self._nodecache[node]
320 320 except TypeError:
321 321 raise
322 322 except RevlogError:
323 323 # parsers.c radix tree lookup failed
324 324 raise LookupError(node, self.indexfile, _('no node'))
325 325 except KeyError:
326 326 # pure python cache lookup failed
327 327 n = self._nodecache
328 328 i = self.index
329 329 p = self._nodepos
330 330 if p is None:
331 331 p = len(i) - 2
332 332 for r in xrange(p, -1, -1):
333 333 v = i[r][7]
334 334 n[v] = r
335 335 if v == node:
336 336 self._nodepos = r - 1
337 337 return r
338 338 raise LookupError(node, self.indexfile, _('no node'))
339 339
340 340 def node(self, rev):
341 341 return self.index[rev][7]
342 342 def linkrev(self, rev):
343 343 return self.index[rev][4]
344 344 def parents(self, node):
345 345 i = self.index
346 346 d = i[self.rev(node)]
347 347 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
348 348 def parentrevs(self, rev):
349 349 return self.index[rev][5:7]
350 350 def start(self, rev):
351 351 return int(self.index[rev][0] >> 16)
352 352 def end(self, rev):
353 353 return self.start(rev) + self.length(rev)
354 354 def length(self, rev):
355 355 return self.index[rev][1]
356 356 def chainbase(self, rev):
357 357 index = self.index
358 358 base = index[rev][3]
359 359 while base != rev:
360 360 rev = base
361 361 base = index[rev][3]
362 362 return base
363 363 def chainlen(self, rev):
364 364 return self._chaininfo(rev)[0]
365 365
366 366 def _chaininfo(self, rev):
367 367 chaininfocache = self._chaininfocache
368 368 if rev in chaininfocache:
369 369 return chaininfocache[rev]
370 370 index = self.index
371 371 generaldelta = self._generaldelta
372 372 iterrev = rev
373 373 e = index[iterrev]
374 374 clen = 0
375 375 compresseddeltalen = 0
376 376 while iterrev != e[3]:
377 377 clen += 1
378 378 compresseddeltalen += e[1]
379 379 if generaldelta:
380 380 iterrev = e[3]
381 381 else:
382 382 iterrev -= 1
383 383 if iterrev in chaininfocache:
384 384 t = chaininfocache[iterrev]
385 385 clen += t[0]
386 386 compresseddeltalen += t[1]
387 387 break
388 388 e = index[iterrev]
389 389 else:
390 390 # Add text length of base since decompressing that also takes
391 391 # work. For cache hits the length is already included.
392 392 compresseddeltalen += e[1]
393 393 r = (clen, compresseddeltalen)
394 394 chaininfocache[rev] = r
395 395 return r
396 396
397 397 def flags(self, rev):
398 398 return self.index[rev][0] & 0xFFFF
399 399 def rawsize(self, rev):
400 400 """return the length of the uncompressed text for a given revision"""
401 401 l = self.index[rev][2]
402 402 if l >= 0:
403 403 return l
404 404
405 405 t = self.revision(self.node(rev))
406 406 return len(t)
407 407 size = rawsize
408 408
409 409 def ancestors(self, revs, stoprev=0, inclusive=False):
410 410 """Generate the ancestors of 'revs' in reverse topological order.
411 411 Does not generate revs lower than stoprev.
412 412
413 413 See the documentation for ancestor.lazyancestors for more details."""
414 414
415 415 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
416 416 inclusive=inclusive)
417 417
418 418 def descendants(self, revs):
419 419 """Generate the descendants of 'revs' in revision order.
420 420
421 421 Yield a sequence of revision numbers starting with a child of
422 422 some rev in revs, i.e., each revision is *not* considered a
423 423 descendant of itself. Results are ordered by revision number (a
424 424 topological sort)."""
425 425 first = min(revs)
426 426 if first == nullrev:
427 427 for i in self:
428 428 yield i
429 429 return
430 430
431 431 seen = set(revs)
432 432 for i in self.revs(start=first + 1):
433 433 for x in self.parentrevs(i):
434 434 if x != nullrev and x in seen:
435 435 seen.add(i)
436 436 yield i
437 437 break
438 438
439 439 def findcommonmissing(self, common=None, heads=None):
440 440 """Return a tuple of the ancestors of common and the ancestors of heads
441 441 that are not ancestors of common. In revset terminology, we return the
442 442 tuple:
443 443
444 444 ::common, (::heads) - (::common)
445 445
446 446 The list is sorted by revision number, meaning it is
447 447 topologically sorted.
448 448
449 449 'heads' and 'common' are both lists of node IDs. If heads is
450 450 not supplied, uses all of the revlog's heads. If common is not
451 451 supplied, uses nullid."""
452 452 if common is None:
453 453 common = [nullid]
454 454 if heads is None:
455 455 heads = self.heads()
456 456
457 457 common = [self.rev(n) for n in common]
458 458 heads = [self.rev(n) for n in heads]
459 459
460 460 # we want the ancestors, but inclusive
461 461 class lazyset(object):
462 462 def __init__(self, lazyvalues):
463 463 self.addedvalues = set()
464 464 self.lazyvalues = lazyvalues
465 465
466 466 def __contains__(self, value):
467 467 return value in self.addedvalues or value in self.lazyvalues
468 468
469 469 def __iter__(self):
470 470 added = self.addedvalues
471 471 for r in added:
472 472 yield r
473 473 for r in self.lazyvalues:
474 474 if not r in added:
475 475 yield r
476 476
477 477 def add(self, value):
478 478 self.addedvalues.add(value)
479 479
480 480 def update(self, values):
481 481 self.addedvalues.update(values)
482 482
483 483 has = lazyset(self.ancestors(common))
484 484 has.add(nullrev)
485 485 has.update(common)
486 486
487 487 # take all ancestors from heads that aren't in has
488 488 missing = set()
489 489 visit = collections.deque(r for r in heads if r not in has)
490 490 while visit:
491 491 r = visit.popleft()
492 492 if r in missing:
493 493 continue
494 494 else:
495 495 missing.add(r)
496 496 for p in self.parentrevs(r):
497 497 if p not in has:
498 498 visit.append(p)
499 499 missing = list(missing)
500 500 missing.sort()
501 501 return has, [self.node(r) for r in missing]
502 502
503 503 def incrementalmissingrevs(self, common=None):
504 504 """Return an object that can be used to incrementally compute the
505 505 revision numbers of the ancestors of arbitrary sets that are not
506 506 ancestors of common. This is an ancestor.incrementalmissingancestors
507 507 object.
508 508
509 509 'common' is a list of revision numbers. If common is not supplied, uses
510 510 nullrev.
511 511 """
512 512 if common is None:
513 513 common = [nullrev]
514 514
515 515 return ancestor.incrementalmissingancestors(self.parentrevs, common)
516 516
517 517 def findmissingrevs(self, common=None, heads=None):
518 518 """Return the revision numbers of the ancestors of heads that
519 519 are not ancestors of common.
520 520
521 521 More specifically, return a list of revision numbers corresponding to
522 522 nodes N such that every N satisfies the following constraints:
523 523
524 524 1. N is an ancestor of some node in 'heads'
525 525 2. N is not an ancestor of any node in 'common'
526 526
527 527 The list is sorted by revision number, meaning it is
528 528 topologically sorted.
529 529
530 530 'heads' and 'common' are both lists of revision numbers. If heads is
531 531 not supplied, uses all of the revlog's heads. If common is not
532 532 supplied, uses nullid."""
533 533 if common is None:
534 534 common = [nullrev]
535 535 if heads is None:
536 536 heads = self.headrevs()
537 537
538 538 inc = self.incrementalmissingrevs(common=common)
539 539 return inc.missingancestors(heads)
540 540
541 541 def findmissing(self, common=None, heads=None):
542 542 """Return the ancestors of heads that are not ancestors of common.
543 543
544 544 More specifically, return a list of nodes N such that every N
545 545 satisfies the following constraints:
546 546
547 547 1. N is an ancestor of some node in 'heads'
548 548 2. N is not an ancestor of any node in 'common'
549 549
550 550 The list is sorted by revision number, meaning it is
551 551 topologically sorted.
552 552
553 553 'heads' and 'common' are both lists of node IDs. If heads is
554 554 not supplied, uses all of the revlog's heads. If common is not
555 555 supplied, uses nullid."""
556 556 if common is None:
557 557 common = [nullid]
558 558 if heads is None:
559 559 heads = self.heads()
560 560
561 561 common = [self.rev(n) for n in common]
562 562 heads = [self.rev(n) for n in heads]
563 563
564 564 inc = self.incrementalmissingrevs(common=common)
565 565 return [self.node(r) for r in inc.missingancestors(heads)]
566 566
567 567 def nodesbetween(self, roots=None, heads=None):
568 568 """Return a topological path from 'roots' to 'heads'.
569 569
570 570 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
571 571 topologically sorted list of all nodes N that satisfy both of
572 572 these constraints:
573 573
574 574 1. N is a descendant of some node in 'roots'
575 575 2. N is an ancestor of some node in 'heads'
576 576
577 577 Every node is considered to be both a descendant and an ancestor
578 578 of itself, so every reachable node in 'roots' and 'heads' will be
579 579 included in 'nodes'.
580 580
581 581 'outroots' is the list of reachable nodes in 'roots', i.e., the
582 582 subset of 'roots' that is returned in 'nodes'. Likewise,
583 583 'outheads' is the subset of 'heads' that is also in 'nodes'.
584 584
585 585 'roots' and 'heads' are both lists of node IDs. If 'roots' is
586 586 unspecified, uses nullid as the only root. If 'heads' is
587 587 unspecified, uses list of all of the revlog's heads."""
588 588 nonodes = ([], [], [])
589 589 if roots is not None:
590 590 roots = list(roots)
591 591 if not roots:
592 592 return nonodes
593 593 lowestrev = min([self.rev(n) for n in roots])
594 594 else:
595 595 roots = [nullid] # Everybody's a descendant of nullid
596 596 lowestrev = nullrev
597 597 if (lowestrev == nullrev) and (heads is None):
598 598 # We want _all_ the nodes!
599 599 return ([self.node(r) for r in self], [nullid], list(self.heads()))
600 600 if heads is None:
601 601 # All nodes are ancestors, so the latest ancestor is the last
602 602 # node.
603 603 highestrev = len(self) - 1
604 604 # Set ancestors to None to signal that every node is an ancestor.
605 605 ancestors = None
606 606 # Set heads to an empty dictionary for later discovery of heads
607 607 heads = {}
608 608 else:
609 609 heads = list(heads)
610 610 if not heads:
611 611 return nonodes
612 612 ancestors = set()
613 613 # Turn heads into a dictionary so we can remove 'fake' heads.
614 614 # Also, later we will be using it to filter out the heads we can't
615 615 # find from roots.
616 616 heads = dict.fromkeys(heads, False)
617 617 # Start at the top and keep marking parents until we're done.
618 618 nodestotag = set(heads)
619 619 # Remember where the top was so we can use it as a limit later.
620 620 highestrev = max([self.rev(n) for n in nodestotag])
621 621 while nodestotag:
622 622 # grab a node to tag
623 623 n = nodestotag.pop()
624 624 # Never tag nullid
625 625 if n == nullid:
626 626 continue
627 627 # A node's revision number represents its place in a
628 628 # topologically sorted list of nodes.
629 629 r = self.rev(n)
630 630 if r >= lowestrev:
631 631 if n not in ancestors:
632 632 # If we are possibly a descendant of one of the roots
633 633 # and we haven't already been marked as an ancestor
634 634 ancestors.add(n) # Mark as ancestor
635 635 # Add non-nullid parents to list of nodes to tag.
636 636 nodestotag.update([p for p in self.parents(n) if
637 637 p != nullid])
638 638 elif n in heads: # We've seen it before, is it a fake head?
639 639 # So it is, real heads should not be the ancestors of
640 640 # any other heads.
641 641 heads.pop(n)
642 642 if not ancestors:
643 643 return nonodes
644 644 # Now that we have our set of ancestors, we want to remove any
645 645 # roots that are not ancestors.
646 646
647 647 # If one of the roots was nullid, everything is included anyway.
648 648 if lowestrev > nullrev:
649 649 # But, since we weren't, let's recompute the lowest rev to not
650 650 # include roots that aren't ancestors.
651 651
652 652 # Filter out roots that aren't ancestors of heads
653 653 roots = [n for n in roots if n in ancestors]
654 654 # Recompute the lowest revision
655 655 if roots:
656 656 lowestrev = min([self.rev(n) for n in roots])
657 657 else:
658 658 # No more roots? Return empty list
659 659 return nonodes
660 660 else:
661 661 # We are descending from nullid, and don't need to care about
662 662 # any other roots.
663 663 lowestrev = nullrev
664 664 roots = [nullid]
665 665 # Transform our roots list into a set.
666 666 descendants = set(roots)
667 667 # Also, keep the original roots so we can filter out roots that aren't
668 668 # 'real' roots (i.e. are descended from other roots).
669 669 roots = descendants.copy()
670 670 # Our topologically sorted list of output nodes.
671 671 orderedout = []
672 672 # Don't start at nullid since we don't want nullid in our output list,
673 673 # and if nullid shows up in descendants, empty parents will look like
674 674 # they're descendants.
675 675 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
676 676 n = self.node(r)
677 677 isdescendant = False
678 678 if lowestrev == nullrev: # Everybody is a descendant of nullid
679 679 isdescendant = True
680 680 elif n in descendants:
681 681 # n is already a descendant
682 682 isdescendant = True
683 683 # This check only needs to be done here because all the roots
684 684 # will start being marked is descendants before the loop.
685 685 if n in roots:
686 686 # If n was a root, check if it's a 'real' root.
687 687 p = tuple(self.parents(n))
688 688 # If any of its parents are descendants, it's not a root.
689 689 if (p[0] in descendants) or (p[1] in descendants):
690 690 roots.remove(n)
691 691 else:
692 692 p = tuple(self.parents(n))
693 693 # A node is a descendant if either of its parents are
694 694 # descendants. (We seeded the dependents list with the roots
695 695 # up there, remember?)
696 696 if (p[0] in descendants) or (p[1] in descendants):
697 697 descendants.add(n)
698 698 isdescendant = True
699 699 if isdescendant and ((ancestors is None) or (n in ancestors)):
700 700 # Only include nodes that are both descendants and ancestors.
701 701 orderedout.append(n)
702 702 if (ancestors is not None) and (n in heads):
703 703 # We're trying to figure out which heads are reachable
704 704 # from roots.
705 705 # Mark this head as having been reached
706 706 heads[n] = True
707 707 elif ancestors is None:
708 708 # Otherwise, we're trying to discover the heads.
709 709 # Assume this is a head because if it isn't, the next step
710 710 # will eventually remove it.
711 711 heads[n] = True
712 712 # But, obviously its parents aren't.
713 713 for p in self.parents(n):
714 714 heads.pop(p, None)
715 715 heads = [n for n, flag in heads.iteritems() if flag]
716 716 roots = list(roots)
717 717 assert orderedout
718 718 assert roots
719 719 assert heads
720 720 return (orderedout, roots, heads)
721 721
722 722 def headrevs(self):
723 723 try:
724 724 return self.index.headrevs()
725 725 except AttributeError:
726 726 return self._headrevs()
727 727
728 728 def computephases(self, roots):
729 return self.index.computephases(roots)
729 return self.index.computephasesmapsets(roots)
730 730
731 731 def _headrevs(self):
732 732 count = len(self)
733 733 if not count:
734 734 return [nullrev]
735 735 # we won't iter over filtered rev so nobody is a head at start
736 736 ishead = [0] * (count + 1)
737 737 index = self.index
738 738 for r in self:
739 739 ishead[r] = 1 # I may be an head
740 740 e = index[r]
741 741 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
742 742 return [r for r, val in enumerate(ishead) if val]
743 743
744 744 def heads(self, start=None, stop=None):
745 745 """return the list of all nodes that have no children
746 746
747 747 if start is specified, only heads that are descendants of
748 748 start will be returned
749 749 if stop is specified, it will consider all the revs from stop
750 750 as if they had no children
751 751 """
752 752 if start is None and stop is None:
753 753 if not len(self):
754 754 return [nullid]
755 755 return [self.node(r) for r in self.headrevs()]
756 756
757 757 if start is None:
758 758 start = nullid
759 759 if stop is None:
760 760 stop = []
761 761 stoprevs = set([self.rev(n) for n in stop])
762 762 startrev = self.rev(start)
763 763 reachable = set((startrev,))
764 764 heads = set((startrev,))
765 765
766 766 parentrevs = self.parentrevs
767 767 for r in self.revs(start=startrev + 1):
768 768 for p in parentrevs(r):
769 769 if p in reachable:
770 770 if r not in stoprevs:
771 771 reachable.add(r)
772 772 heads.add(r)
773 773 if p in heads and p not in stoprevs:
774 774 heads.remove(p)
775 775
776 776 return [self.node(r) for r in heads]
777 777
778 778 def children(self, node):
779 779 """find the children of a given node"""
780 780 c = []
781 781 p = self.rev(node)
782 782 for r in self.revs(start=p + 1):
783 783 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
784 784 if prevs:
785 785 for pr in prevs:
786 786 if pr == p:
787 787 c.append(self.node(r))
788 788 elif p == nullrev:
789 789 c.append(self.node(r))
790 790 return c
791 791
792 792 def descendant(self, start, end):
793 793 if start == nullrev:
794 794 return True
795 795 for i in self.descendants([start]):
796 796 if i == end:
797 797 return True
798 798 elif i > end:
799 799 break
800 800 return False
801 801
802 802 def commonancestorsheads(self, a, b):
803 803 """calculate all the heads of the common ancestors of nodes a and b"""
804 804 a, b = self.rev(a), self.rev(b)
805 805 try:
806 806 ancs = self.index.commonancestorsheads(a, b)
807 807 except (AttributeError, OverflowError): # C implementation failed
808 808 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
809 809 return map(self.node, ancs)
810 810
811 811 def isancestor(self, a, b):
812 812 """return True if node a is an ancestor of node b
813 813
814 814 The implementation of this is trivial but the use of
815 815 commonancestorsheads is not."""
816 816 return a in self.commonancestorsheads(a, b)
817 817
818 818 def ancestor(self, a, b):
819 819 """calculate the "best" common ancestor of nodes a and b"""
820 820
821 821 a, b = self.rev(a), self.rev(b)
822 822 try:
823 823 ancs = self.index.ancestors(a, b)
824 824 except (AttributeError, OverflowError):
825 825 ancs = ancestor.ancestors(self.parentrevs, a, b)
826 826 if ancs:
827 827 # choose a consistent winner when there's a tie
828 828 return min(map(self.node, ancs))
829 829 return nullid
830 830
831 831 def _match(self, id):
832 832 if isinstance(id, int):
833 833 # rev
834 834 return self.node(id)
835 835 if len(id) == 20:
836 836 # possibly a binary node
837 837 # odds of a binary node being all hex in ASCII are 1 in 10**25
838 838 try:
839 839 node = id
840 840 self.rev(node) # quick search the index
841 841 return node
842 842 except LookupError:
843 843 pass # may be partial hex id
844 844 try:
845 845 # str(rev)
846 846 rev = int(id)
847 847 if str(rev) != id:
848 848 raise ValueError
849 849 if rev < 0:
850 850 rev = len(self) + rev
851 851 if rev < 0 or rev >= len(self):
852 852 raise ValueError
853 853 return self.node(rev)
854 854 except (ValueError, OverflowError):
855 855 pass
856 856 if len(id) == 40:
857 857 try:
858 858 # a full hex nodeid?
859 859 node = bin(id)
860 860 self.rev(node)
861 861 return node
862 862 except (TypeError, LookupError):
863 863 pass
864 864
865 865 def _partialmatch(self, id):
866 866 try:
867 867 n = self.index.partialmatch(id)
868 868 if n and self.hasnode(n):
869 869 return n
870 870 return None
871 871 except RevlogError:
872 872 # parsers.c radix tree lookup gave multiple matches
873 873 # fall through to slow path that filters hidden revisions
874 874 pass
875 875 except (AttributeError, ValueError):
876 876 # we are pure python, or key was too short to search radix tree
877 877 pass
878 878
879 879 if id in self._pcache:
880 880 return self._pcache[id]
881 881
882 882 if len(id) < 40:
883 883 try:
884 884 # hex(node)[:...]
885 885 l = len(id) // 2 # grab an even number of digits
886 886 prefix = bin(id[:l * 2])
887 887 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
888 888 nl = [n for n in nl if hex(n).startswith(id) and
889 889 self.hasnode(n)]
890 890 if len(nl) > 0:
891 891 if len(nl) == 1:
892 892 self._pcache[id] = nl[0]
893 893 return nl[0]
894 894 raise LookupError(id, self.indexfile,
895 895 _('ambiguous identifier'))
896 896 return None
897 897 except TypeError:
898 898 pass
899 899
900 900 def lookup(self, id):
901 901 """locate a node based on:
902 902 - revision number or str(revision number)
903 903 - nodeid or subset of hex nodeid
904 904 """
905 905 n = self._match(id)
906 906 if n is not None:
907 907 return n
908 908 n = self._partialmatch(id)
909 909 if n:
910 910 return n
911 911
912 912 raise LookupError(id, self.indexfile, _('no match found'))
913 913
914 914 def cmp(self, node, text):
915 915 """compare text with a given file revision
916 916
917 917 returns True if text is different than what is stored.
918 918 """
919 919 p1, p2 = self.parents(node)
920 920 return hash(text, p1, p2) != node
921 921
922 922 def _addchunk(self, offset, data):
923 923 o, d = self._chunkcache
924 924 # try to add to existing cache
925 925 if o + len(d) == offset and len(d) + len(data) < _chunksize:
926 926 self._chunkcache = o, d + data
927 927 else:
928 928 self._chunkcache = offset, data
929 929
930 930 def _loadchunk(self, offset, length):
931 931 if self._inline:
932 932 df = self.opener(self.indexfile)
933 933 else:
934 934 df = self.opener(self.datafile)
935 935
936 936 # Cache data both forward and backward around the requested
937 937 # data, in a fixed size window. This helps speed up operations
938 938 # involving reading the revlog backwards.
939 939 cachesize = self._chunkcachesize
940 940 realoffset = offset & ~(cachesize - 1)
941 941 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
942 942 - realoffset)
943 943 df.seek(realoffset)
944 944 d = df.read(reallength)
945 945 df.close()
946 946 self._addchunk(realoffset, d)
947 947 if offset != realoffset or reallength != length:
948 948 return util.buffer(d, offset - realoffset, length)
949 949 return d
950 950
951 951 def _getchunk(self, offset, length):
952 952 o, d = self._chunkcache
953 953 l = len(d)
954 954
955 955 # is it in the cache?
956 956 cachestart = offset - o
957 957 cacheend = cachestart + length
958 958 if cachestart >= 0 and cacheend <= l:
959 959 if cachestart == 0 and cacheend == l:
960 960 return d # avoid a copy
961 961 return util.buffer(d, cachestart, cacheend - cachestart)
962 962
963 963 return self._loadchunk(offset, length)
964 964
965 965 def _chunkraw(self, startrev, endrev):
966 966 start = self.start(startrev)
967 967 end = self.end(endrev)
968 968 if self._inline:
969 969 start += (startrev + 1) * self._io.size
970 970 end += (endrev + 1) * self._io.size
971 971 length = end - start
972 972 return self._getchunk(start, length)
973 973
974 974 def _chunk(self, rev):
975 975 return decompress(self._chunkraw(rev, rev))
976 976
977 977 def _chunks(self, revs):
978 978 '''faster version of [self._chunk(rev) for rev in revs]
979 979
980 980 Assumes that revs is in ascending order.'''
981 981 if not revs:
982 982 return []
983 983 start = self.start
984 984 length = self.length
985 985 inline = self._inline
986 986 iosize = self._io.size
987 987 buffer = util.buffer
988 988
989 989 l = []
990 990 ladd = l.append
991 991
992 992 # preload the cache
993 993 try:
994 994 while True:
995 995 # ensure that the cache doesn't change out from under us
996 996 _cache = self._chunkcache
997 997 self._chunkraw(revs[0], revs[-1])
998 998 if _cache == self._chunkcache:
999 999 break
1000 1000 offset, data = _cache
1001 1001 except OverflowError:
1002 1002 # issue4215 - we can't cache a run of chunks greater than
1003 1003 # 2G on Windows
1004 1004 return [self._chunk(rev) for rev in revs]
1005 1005
1006 1006 for rev in revs:
1007 1007 chunkstart = start(rev)
1008 1008 if inline:
1009 1009 chunkstart += (rev + 1) * iosize
1010 1010 chunklength = length(rev)
1011 1011 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1012 1012
1013 1013 return l
1014 1014
1015 1015 def _chunkclear(self):
1016 1016 self._chunkcache = (0, '')
1017 1017
1018 1018 def deltaparent(self, rev):
1019 1019 """return deltaparent of the given revision"""
1020 1020 base = self.index[rev][3]
1021 1021 if base == rev:
1022 1022 return nullrev
1023 1023 elif self._generaldelta:
1024 1024 return base
1025 1025 else:
1026 1026 return rev - 1
1027 1027
1028 1028 def revdiff(self, rev1, rev2):
1029 1029 """return or calculate a delta between two revisions"""
1030 1030 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1031 1031 return str(self._chunk(rev2))
1032 1032
1033 1033 return mdiff.textdiff(self.revision(rev1),
1034 1034 self.revision(rev2))
1035 1035
1036 1036 def revision(self, nodeorrev):
1037 1037 """return an uncompressed revision of a given node or revision
1038 1038 number.
1039 1039 """
1040 1040 if isinstance(nodeorrev, int):
1041 1041 rev = nodeorrev
1042 1042 node = self.node(rev)
1043 1043 else:
1044 1044 node = nodeorrev
1045 1045 rev = None
1046 1046
1047 1047 _cache = self._cache # grab local copy of cache to avoid thread race
1048 1048 cachedrev = None
1049 1049 if node == nullid:
1050 1050 return ""
1051 1051 if _cache:
1052 1052 if _cache[0] == node:
1053 1053 return _cache[2]
1054 1054 cachedrev = _cache[1]
1055 1055
1056 1056 # look up what we need to read
1057 1057 text = None
1058 1058 if rev is None:
1059 1059 rev = self.rev(node)
1060 1060
1061 1061 # check rev flags
1062 1062 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1063 1063 raise RevlogError(_('incompatible revision flag %x') %
1064 1064 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1065 1065
1066 1066 # build delta chain
1067 1067 chain = []
1068 1068 index = self.index # for performance
1069 1069 generaldelta = self._generaldelta
1070 1070 iterrev = rev
1071 1071 e = index[iterrev]
1072 1072 while iterrev != e[3] and iterrev != cachedrev:
1073 1073 chain.append(iterrev)
1074 1074 if generaldelta:
1075 1075 iterrev = e[3]
1076 1076 else:
1077 1077 iterrev -= 1
1078 1078 e = index[iterrev]
1079 1079
1080 1080 if iterrev == cachedrev:
1081 1081 # cache hit
1082 1082 text = _cache[2]
1083 1083 else:
1084 1084 chain.append(iterrev)
1085 1085 chain.reverse()
1086 1086
1087 1087 # drop cache to save memory
1088 1088 self._cache = None
1089 1089
1090 1090 bins = self._chunks(chain)
1091 1091 if text is None:
1092 1092 text = str(bins[0])
1093 1093 bins = bins[1:]
1094 1094
1095 1095 text = mdiff.patches(text, bins)
1096 1096
1097 1097 text = self._checkhash(text, node, rev)
1098 1098
1099 1099 self._cache = (node, rev, text)
1100 1100 return text
1101 1101
1102 1102 def hash(self, text, p1, p2):
1103 1103 """Compute a node hash.
1104 1104
1105 1105 Available as a function so that subclasses can replace the hash
1106 1106 as needed.
1107 1107 """
1108 1108 return hash(text, p1, p2)
1109 1109
1110 1110 def _checkhash(self, text, node, rev):
1111 1111 p1, p2 = self.parents(node)
1112 1112 self.checkhash(text, p1, p2, node, rev)
1113 1113 return text
1114 1114
1115 1115 def checkhash(self, text, p1, p2, node, rev=None):
1116 1116 if node != self.hash(text, p1, p2):
1117 1117 revornode = rev
1118 1118 if revornode is None:
1119 1119 revornode = templatefilters.short(hex(node))
1120 1120 raise RevlogError(_("integrity check failed on %s:%s")
1121 1121 % (self.indexfile, revornode))
1122 1122
1123 1123 def checkinlinesize(self, tr, fp=None):
1124 1124 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1125 1125 return
1126 1126
1127 1127 trinfo = tr.find(self.indexfile)
1128 1128 if trinfo is None:
1129 1129 raise RevlogError(_("%s not found in the transaction")
1130 1130 % self.indexfile)
1131 1131
1132 1132 trindex = trinfo[2]
1133 1133 if trindex is not None:
1134 1134 dataoff = self.start(trindex)
1135 1135 else:
1136 1136 # revlog was stripped at start of transaction, use all leftover data
1137 1137 trindex = len(self) - 1
1138 1138 dataoff = self.end(-2)
1139 1139
1140 1140 tr.add(self.datafile, dataoff)
1141 1141
1142 1142 if fp:
1143 1143 fp.flush()
1144 1144 fp.close()
1145 1145
1146 1146 df = self.opener(self.datafile, 'w')
1147 1147 try:
1148 1148 for r in self:
1149 1149 df.write(self._chunkraw(r, r))
1150 1150 finally:
1151 1151 df.close()
1152 1152
1153 1153 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1154 1154 self.version &= ~(REVLOGNGINLINEDATA)
1155 1155 self._inline = False
1156 1156 for i in self:
1157 1157 e = self._io.packentry(self.index[i], self.node, self.version, i)
1158 1158 fp.write(e)
1159 1159
1160 1160 # if we don't call close, the temp file will never replace the
1161 1161 # real index
1162 1162 fp.close()
1163 1163
1164 1164 tr.replace(self.indexfile, trindex * self._io.size)
1165 1165 self._chunkclear()
1166 1166
1167 1167 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1168 1168 node=None):
1169 1169 """add a revision to the log
1170 1170
1171 1171 text - the revision data to add
1172 1172 transaction - the transaction object used for rollback
1173 1173 link - the linkrev data to add
1174 1174 p1, p2 - the parent nodeids of the revision
1175 1175 cachedelta - an optional precomputed delta
1176 1176 node - nodeid of revision; typically node is not specified, and it is
1177 1177 computed by default as hash(text, p1, p2), however subclasses might
1178 1178 use different hashing method (and override checkhash() in such case)
1179 1179 """
1180 1180 if link == nullrev:
1181 1181 raise RevlogError(_("attempted to add linkrev -1 to %s")
1182 1182 % self.indexfile)
1183 1183 node = node or self.hash(text, p1, p2)
1184 1184 if node in self.nodemap:
1185 1185 return node
1186 1186
1187 1187 dfh = None
1188 1188 if not self._inline:
1189 1189 dfh = self.opener(self.datafile, "a")
1190 1190 ifh = self.opener(self.indexfile, "a+")
1191 1191 try:
1192 1192 return self._addrevision(node, text, transaction, link, p1, p2,
1193 1193 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1194 1194 finally:
1195 1195 if dfh:
1196 1196 dfh.close()
1197 1197 ifh.close()
1198 1198
1199 1199 def compress(self, text):
1200 1200 """ generate a possibly-compressed representation of text """
1201 1201 if not text:
1202 1202 return ("", text)
1203 1203 l = len(text)
1204 1204 bin = None
1205 1205 if l < 44:
1206 1206 pass
1207 1207 elif l > 1000000:
1208 1208 # zlib makes an internal copy, thus doubling memory usage for
1209 1209 # large files, so lets do this in pieces
1210 1210 z = zlib.compressobj()
1211 1211 p = []
1212 1212 pos = 0
1213 1213 while pos < l:
1214 1214 pos2 = pos + 2**20
1215 1215 p.append(z.compress(text[pos:pos2]))
1216 1216 pos = pos2
1217 1217 p.append(z.flush())
1218 1218 if sum(map(len, p)) < l:
1219 1219 bin = "".join(p)
1220 1220 else:
1221 1221 bin = _compress(text)
1222 1222 if bin is None or len(bin) > l:
1223 1223 if text[0] == '\0':
1224 1224 return ("", text)
1225 1225 return ('u', text)
1226 1226 return ("", bin)
1227 1227
1228 1228 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1229 1229 cachedelta, ifh, dfh):
1230 1230 """internal function to add revisions to the log
1231 1231
1232 1232 see addrevision for argument descriptions.
1233 1233 invariants:
1234 1234 - text is optional (can be None); if not set, cachedelta must be set.
1235 1235 if both are set, they must correspond to each other.
1236 1236 """
1237 1237 btext = [text]
1238 1238 def buildtext():
1239 1239 if btext[0] is not None:
1240 1240 return btext[0]
1241 1241 # flush any pending writes here so we can read it in revision
1242 1242 if dfh:
1243 1243 dfh.flush()
1244 1244 ifh.flush()
1245 1245 baserev = cachedelta[0]
1246 1246 delta = cachedelta[1]
1247 1247 # special case deltas which replace entire base; no need to decode
1248 1248 # base revision. this neatly avoids censored bases, which throw when
1249 1249 # they're decoded.
1250 1250 hlen = struct.calcsize(">lll")
1251 1251 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1252 1252 len(delta) - hlen):
1253 1253 btext[0] = delta[hlen:]
1254 1254 else:
1255 1255 basetext = self.revision(self.node(baserev))
1256 1256 btext[0] = mdiff.patch(basetext, delta)
1257 1257 try:
1258 1258 self.checkhash(btext[0], p1, p2, node)
1259 1259 if flags & REVIDX_ISCENSORED:
1260 1260 raise RevlogError(_('node %s is not censored') % node)
1261 1261 except CensoredNodeError:
1262 1262 # must pass the censored index flag to add censored revisions
1263 1263 if not flags & REVIDX_ISCENSORED:
1264 1264 raise
1265 1265 return btext[0]
1266 1266
1267 1267 def builddelta(rev):
1268 1268 # can we use the cached delta?
1269 1269 if cachedelta and cachedelta[0] == rev:
1270 1270 delta = cachedelta[1]
1271 1271 else:
1272 1272 t = buildtext()
1273 1273 if self.iscensored(rev):
1274 1274 # deltas based on a censored revision must replace the
1275 1275 # full content in one patch, so delta works everywhere
1276 1276 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1277 1277 delta = header + t
1278 1278 else:
1279 1279 ptext = self.revision(self.node(rev))
1280 1280 delta = mdiff.textdiff(ptext, t)
1281 1281 data = self.compress(delta)
1282 1282 l = len(data[1]) + len(data[0])
1283 1283 if basecache[0] == rev:
1284 1284 chainbase = basecache[1]
1285 1285 else:
1286 1286 chainbase = self.chainbase(rev)
1287 1287 dist = l + offset - self.start(chainbase)
1288 1288 if self._generaldelta:
1289 1289 base = rev
1290 1290 else:
1291 1291 base = chainbase
1292 1292 chainlen, compresseddeltalen = self._chaininfo(rev)
1293 1293 chainlen += 1
1294 1294 compresseddeltalen += l
1295 1295 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1296 1296
1297 1297 curr = len(self)
1298 1298 prev = curr - 1
1299 1299 base = chainbase = curr
1300 1300 chainlen = None
1301 1301 offset = self.end(prev)
1302 1302 d = None
1303 1303 if self._basecache is None:
1304 1304 self._basecache = (prev, self.chainbase(prev))
1305 1305 basecache = self._basecache
1306 1306 p1r, p2r = self.rev(p1), self.rev(p2)
1307 1307
1308 1308 # should we try to build a delta?
1309 1309 if prev != nullrev:
1310 1310 if self._generaldelta:
1311 1311 if p1r >= basecache[1]:
1312 1312 d = builddelta(p1r)
1313 1313 elif p2r >= basecache[1]:
1314 1314 d = builddelta(p2r)
1315 1315 else:
1316 1316 d = builddelta(prev)
1317 1317 else:
1318 1318 d = builddelta(prev)
1319 1319 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1320 1320
1321 1321 # full versions are inserted when the needed deltas
1322 1322 # become comparable to the uncompressed text
1323 1323 if text is None:
1324 1324 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1325 1325 cachedelta[1])
1326 1326 else:
1327 1327 textlen = len(text)
1328 1328
1329 1329 # - 'dist' is the distance from the base revision -- bounding it limits
1330 1330 # the amount of I/O we need to do.
1331 1331 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1332 1332 # to apply -- bounding it limits the amount of CPU we consume.
1333 1333 if (d is None or dist > textlen * 4 or l > textlen or
1334 1334 compresseddeltalen > textlen * 2 or
1335 1335 (self._maxchainlen and chainlen > self._maxchainlen)):
1336 1336 text = buildtext()
1337 1337 data = self.compress(text)
1338 1338 l = len(data[1]) + len(data[0])
1339 1339 base = chainbase = curr
1340 1340
1341 1341 e = (offset_type(offset, flags), l, textlen,
1342 1342 base, link, p1r, p2r, node)
1343 1343 self.index.insert(-1, e)
1344 1344 self.nodemap[node] = curr
1345 1345
1346 1346 entry = self._io.packentry(e, self.node, self.version, curr)
1347 1347 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1348 1348
1349 1349 if type(text) == str: # only accept immutable objects
1350 1350 self._cache = (node, curr, text)
1351 1351 self._basecache = (curr, chainbase)
1352 1352 return node
1353 1353
1354 1354 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1355 1355 curr = len(self) - 1
1356 1356 if not self._inline:
1357 1357 transaction.add(self.datafile, offset)
1358 1358 transaction.add(self.indexfile, curr * len(entry))
1359 1359 if data[0]:
1360 1360 dfh.write(data[0])
1361 1361 dfh.write(data[1])
1362 1362 dfh.flush()
1363 1363 ifh.write(entry)
1364 1364 else:
1365 1365 offset += curr * self._io.size
1366 1366 transaction.add(self.indexfile, offset, curr)
1367 1367 ifh.write(entry)
1368 1368 ifh.write(data[0])
1369 1369 ifh.write(data[1])
1370 1370 self.checkinlinesize(transaction, ifh)
1371 1371
1372 1372 def addgroup(self, bundle, linkmapper, transaction):
1373 1373 """
1374 1374 add a delta group
1375 1375
1376 1376 given a set of deltas, add them to the revision log. the
1377 1377 first delta is against its parent, which should be in our
1378 1378 log, the rest are against the previous delta.
1379 1379 """
1380 1380
1381 1381 # track the base of the current delta log
1382 1382 content = []
1383 1383 node = None
1384 1384
1385 1385 r = len(self)
1386 1386 end = 0
1387 1387 if r:
1388 1388 end = self.end(r - 1)
1389 1389 ifh = self.opener(self.indexfile, "a+")
1390 1390 isize = r * self._io.size
1391 1391 if self._inline:
1392 1392 transaction.add(self.indexfile, end + isize, r)
1393 1393 dfh = None
1394 1394 else:
1395 1395 transaction.add(self.indexfile, isize, r)
1396 1396 transaction.add(self.datafile, end)
1397 1397 dfh = self.opener(self.datafile, "a")
1398 1398 def flush():
1399 1399 if dfh:
1400 1400 dfh.flush()
1401 1401 ifh.flush()
1402 1402 try:
1403 1403 # loop through our set of deltas
1404 1404 chain = None
1405 1405 while True:
1406 1406 chunkdata = bundle.deltachunk(chain)
1407 1407 if not chunkdata:
1408 1408 break
1409 1409 node = chunkdata['node']
1410 1410 p1 = chunkdata['p1']
1411 1411 p2 = chunkdata['p2']
1412 1412 cs = chunkdata['cs']
1413 1413 deltabase = chunkdata['deltabase']
1414 1414 delta = chunkdata['delta']
1415 1415
1416 1416 content.append(node)
1417 1417
1418 1418 link = linkmapper(cs)
1419 1419 if node in self.nodemap:
1420 1420 # this can happen if two branches make the same change
1421 1421 chain = node
1422 1422 continue
1423 1423
1424 1424 for p in (p1, p2):
1425 1425 if p not in self.nodemap:
1426 1426 raise LookupError(p, self.indexfile,
1427 1427 _('unknown parent'))
1428 1428
1429 1429 if deltabase not in self.nodemap:
1430 1430 raise LookupError(deltabase, self.indexfile,
1431 1431 _('unknown delta base'))
1432 1432
1433 1433 baserev = self.rev(deltabase)
1434 1434
1435 1435 if baserev != nullrev and self.iscensored(baserev):
1436 1436 # if base is censored, delta must be full replacement in a
1437 1437 # single patch operation
1438 1438 hlen = struct.calcsize(">lll")
1439 1439 oldlen = self.rawsize(baserev)
1440 1440 newlen = len(delta) - hlen
1441 1441 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1442 1442 raise error.CensoredBaseError(self.indexfile,
1443 1443 self.node(baserev))
1444 1444
1445 1445 flags = REVIDX_DEFAULT_FLAGS
1446 1446 if self._peek_iscensored(baserev, delta, flush):
1447 1447 flags |= REVIDX_ISCENSORED
1448 1448
1449 1449 chain = self._addrevision(node, None, transaction, link,
1450 1450 p1, p2, flags, (baserev, delta),
1451 1451 ifh, dfh)
1452 1452 if not dfh and not self._inline:
1453 1453 # addrevision switched from inline to conventional
1454 1454 # reopen the index
1455 1455 ifh.close()
1456 1456 dfh = self.opener(self.datafile, "a")
1457 1457 ifh = self.opener(self.indexfile, "a")
1458 1458 finally:
1459 1459 if dfh:
1460 1460 dfh.close()
1461 1461 ifh.close()
1462 1462
1463 1463 return content
1464 1464
1465 1465 def iscensored(self, rev):
1466 1466 """Check if a file revision is censored."""
1467 1467 return False
1468 1468
1469 1469 def _peek_iscensored(self, baserev, delta, flush):
1470 1470 """Quickly check if a delta produces a censored revision."""
1471 1471 return False
1472 1472
1473 1473 def getstrippoint(self, minlink):
1474 1474 """find the minimum rev that must be stripped to strip the linkrev
1475 1475
1476 1476 Returns a tuple containing the minimum rev and a set of all revs that
1477 1477 have linkrevs that will be broken by this strip.
1478 1478 """
1479 1479 brokenrevs = set()
1480 1480 strippoint = len(self)
1481 1481
1482 1482 heads = {}
1483 1483 futurelargelinkrevs = set()
1484 1484 for head in self.headrevs():
1485 1485 headlinkrev = self.linkrev(head)
1486 1486 heads[head] = headlinkrev
1487 1487 if headlinkrev >= minlink:
1488 1488 futurelargelinkrevs.add(headlinkrev)
1489 1489
1490 1490 # This algorithm involves walking down the rev graph, starting at the
1491 1491 # heads. Since the revs are topologically sorted according to linkrev,
1492 1492 # once all head linkrevs are below the minlink, we know there are
1493 1493 # no more revs that could have a linkrev greater than minlink.
1494 1494 # So we can stop walking.
1495 1495 while futurelargelinkrevs:
1496 1496 strippoint -= 1
1497 1497 linkrev = heads.pop(strippoint)
1498 1498
1499 1499 if linkrev < minlink:
1500 1500 brokenrevs.add(strippoint)
1501 1501 else:
1502 1502 futurelargelinkrevs.remove(linkrev)
1503 1503
1504 1504 for p in self.parentrevs(strippoint):
1505 1505 if p != nullrev:
1506 1506 plinkrev = self.linkrev(p)
1507 1507 heads[p] = plinkrev
1508 1508 if plinkrev >= minlink:
1509 1509 futurelargelinkrevs.add(plinkrev)
1510 1510
1511 1511 return strippoint, brokenrevs
1512 1512
1513 1513 def strip(self, minlink, transaction):
1514 1514 """truncate the revlog on the first revision with a linkrev >= minlink
1515 1515
1516 1516 This function is called when we're stripping revision minlink and
1517 1517 its descendants from the repository.
1518 1518
1519 1519 We have to remove all revisions with linkrev >= minlink, because
1520 1520 the equivalent changelog revisions will be renumbered after the
1521 1521 strip.
1522 1522
1523 1523 So we truncate the revlog on the first of these revisions, and
1524 1524 trust that the caller has saved the revisions that shouldn't be
1525 1525 removed and that it'll re-add them after this truncation.
1526 1526 """
1527 1527 if len(self) == 0:
1528 1528 return
1529 1529
1530 1530 rev, _ = self.getstrippoint(minlink)
1531 1531 if rev == len(self):
1532 1532 return
1533 1533
1534 1534 # first truncate the files on disk
1535 1535 end = self.start(rev)
1536 1536 if not self._inline:
1537 1537 transaction.add(self.datafile, end)
1538 1538 end = rev * self._io.size
1539 1539 else:
1540 1540 end += rev * self._io.size
1541 1541
1542 1542 transaction.add(self.indexfile, end)
1543 1543
1544 1544 # then reset internal state in memory to forget those revisions
1545 1545 self._cache = None
1546 1546 self._chaininfocache = {}
1547 1547 self._chunkclear()
1548 1548 for x in xrange(rev, len(self)):
1549 1549 del self.nodemap[self.node(x)]
1550 1550
1551 1551 del self.index[rev:-1]
1552 1552
1553 1553 def checksize(self):
1554 1554 expected = 0
1555 1555 if len(self):
1556 1556 expected = max(0, self.end(len(self) - 1))
1557 1557
1558 1558 try:
1559 1559 f = self.opener(self.datafile)
1560 1560 f.seek(0, 2)
1561 1561 actual = f.tell()
1562 1562 f.close()
1563 1563 dd = actual - expected
1564 1564 except IOError, inst:
1565 1565 if inst.errno != errno.ENOENT:
1566 1566 raise
1567 1567 dd = 0
1568 1568
1569 1569 try:
1570 1570 f = self.opener(self.indexfile)
1571 1571 f.seek(0, 2)
1572 1572 actual = f.tell()
1573 1573 f.close()
1574 1574 s = self._io.size
1575 1575 i = max(0, actual // s)
1576 1576 di = actual - (i * s)
1577 1577 if self._inline:
1578 1578 databytes = 0
1579 1579 for r in self:
1580 1580 databytes += max(0, self.length(r))
1581 1581 dd = 0
1582 1582 di = actual - len(self) * s - databytes
1583 1583 except IOError, inst:
1584 1584 if inst.errno != errno.ENOENT:
1585 1585 raise
1586 1586 di = 0
1587 1587
1588 1588 return (dd, di)
1589 1589
1590 1590 def files(self):
1591 1591 res = [self.indexfile]
1592 1592 if not self._inline:
1593 1593 res.append(self.datafile)
1594 1594 return res
General Comments 0
You need to be logged in to leave comments. Login now