##// END OF EJS Templates
revlog: move delta check to it's own function...
Durham Goode -
r26115:748347e0 default
parent child Browse files
Show More
@@ -1,1613 +1,1626 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 208 self._cache = None
209 209 self._basecache = None
210 210 self._chunkcache = (0, '')
211 211 self._chunkcachesize = 65536
212 212 self._maxchainlen = None
213 213 self.index = []
214 214 self._pcache = {}
215 215 self._nodecache = {nullid: nullrev}
216 216 self._nodepos = None
217 217
218 218 v = REVLOG_DEFAULT_VERSION
219 219 opts = getattr(opener, 'options', None)
220 220 if opts is not None:
221 221 if 'revlogv1' in opts:
222 222 if 'generaldelta' in opts:
223 223 v |= REVLOGGENERALDELTA
224 224 else:
225 225 v = 0
226 226 if 'chunkcachesize' in opts:
227 227 self._chunkcachesize = opts['chunkcachesize']
228 228 if 'maxchainlen' in opts:
229 229 self._maxchainlen = opts['maxchainlen']
230 230
231 231 if self._chunkcachesize <= 0:
232 232 raise RevlogError(_('revlog chunk cache size %r is not greater '
233 233 'than 0') % self._chunkcachesize)
234 234 elif self._chunkcachesize & (self._chunkcachesize - 1):
235 235 raise RevlogError(_('revlog chunk cache size %r is not a power '
236 236 'of 2') % self._chunkcachesize)
237 237
238 238 i = ''
239 239 self._initempty = True
240 240 try:
241 241 f = self.opener(self.indexfile)
242 242 i = f.read()
243 243 f.close()
244 244 if len(i) > 0:
245 245 v = struct.unpack(versionformat, i[:4])[0]
246 246 self._initempty = False
247 247 except IOError as inst:
248 248 if inst.errno != errno.ENOENT:
249 249 raise
250 250
251 251 self.version = v
252 252 self._inline = v & REVLOGNGINLINEDATA
253 253 self._generaldelta = v & REVLOGGENERALDELTA
254 254 flags = v & ~0xFFFF
255 255 fmt = v & 0xFFFF
256 256 if fmt == REVLOGV0 and flags:
257 257 raise RevlogError(_("index %s unknown flags %#04x for format v0")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
260 260 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
261 261 % (self.indexfile, flags >> 16))
262 262 elif fmt > REVLOGNG:
263 263 raise RevlogError(_("index %s unknown format %d")
264 264 % (self.indexfile, fmt))
265 265
266 266 self._io = revlogio()
267 267 if self.version == REVLOGV0:
268 268 self._io = revlogoldio()
269 269 try:
270 270 d = self._io.parseindex(i, self._inline)
271 271 except (ValueError, IndexError):
272 272 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
273 273 self.index, nodemap, self._chunkcache = d
274 274 if nodemap is not None:
275 275 self.nodemap = self._nodecache = nodemap
276 276 if not self._chunkcache:
277 277 self._chunkclear()
278 278 # revnum -> (chain-length, sum-delta-length)
279 279 self._chaininfocache = {}
280 280
281 281 def tip(self):
282 282 return self.node(len(self.index) - 2)
283 283 def __contains__(self, rev):
284 284 return 0 <= rev < len(self)
285 285 def __len__(self):
286 286 return len(self.index) - 1
287 287 def __iter__(self):
288 288 return iter(xrange(len(self)))
289 289 def revs(self, start=0, stop=None):
290 290 """iterate over all rev in this revlog (from start to stop)"""
291 291 step = 1
292 292 if stop is not None:
293 293 if start > stop:
294 294 step = -1
295 295 stop += step
296 296 else:
297 297 stop = len(self)
298 298 return xrange(start, stop, step)
299 299
300 300 @util.propertycache
301 301 def nodemap(self):
302 302 self.rev(self.node(0))
303 303 return self._nodecache
304 304
305 305 def hasnode(self, node):
306 306 try:
307 307 self.rev(node)
308 308 return True
309 309 except KeyError:
310 310 return False
311 311
312 312 def clearcaches(self):
313 313 try:
314 314 self._nodecache.clearcaches()
315 315 except AttributeError:
316 316 self._nodecache = {nullid: nullrev}
317 317 self._nodepos = None
318 318
319 319 def rev(self, node):
320 320 try:
321 321 return self._nodecache[node]
322 322 except TypeError:
323 323 raise
324 324 except RevlogError:
325 325 # parsers.c radix tree lookup failed
326 326 raise LookupError(node, self.indexfile, _('no node'))
327 327 except KeyError:
328 328 # pure python cache lookup failed
329 329 n = self._nodecache
330 330 i = self.index
331 331 p = self._nodepos
332 332 if p is None:
333 333 p = len(i) - 2
334 334 for r in xrange(p, -1, -1):
335 335 v = i[r][7]
336 336 n[v] = r
337 337 if v == node:
338 338 self._nodepos = r - 1
339 339 return r
340 340 raise LookupError(node, self.indexfile, _('no node'))
341 341
342 342 def node(self, rev):
343 343 return self.index[rev][7]
344 344 def linkrev(self, rev):
345 345 return self.index[rev][4]
346 346 def parents(self, node):
347 347 i = self.index
348 348 d = i[self.rev(node)]
349 349 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
350 350 def parentrevs(self, rev):
351 351 return self.index[rev][5:7]
352 352 def start(self, rev):
353 353 return int(self.index[rev][0] >> 16)
354 354 def end(self, rev):
355 355 return self.start(rev) + self.length(rev)
356 356 def length(self, rev):
357 357 return self.index[rev][1]
358 358 def chainbase(self, rev):
359 359 index = self.index
360 360 base = index[rev][3]
361 361 while base != rev:
362 362 rev = base
363 363 base = index[rev][3]
364 364 return base
365 365 def chainlen(self, rev):
366 366 return self._chaininfo(rev)[0]
367 367
368 368 def _chaininfo(self, rev):
369 369 chaininfocache = self._chaininfocache
370 370 if rev in chaininfocache:
371 371 return chaininfocache[rev]
372 372 index = self.index
373 373 generaldelta = self._generaldelta
374 374 iterrev = rev
375 375 e = index[iterrev]
376 376 clen = 0
377 377 compresseddeltalen = 0
378 378 while iterrev != e[3]:
379 379 clen += 1
380 380 compresseddeltalen += e[1]
381 381 if generaldelta:
382 382 iterrev = e[3]
383 383 else:
384 384 iterrev -= 1
385 385 if iterrev in chaininfocache:
386 386 t = chaininfocache[iterrev]
387 387 clen += t[0]
388 388 compresseddeltalen += t[1]
389 389 break
390 390 e = index[iterrev]
391 391 else:
392 392 # Add text length of base since decompressing that also takes
393 393 # work. For cache hits the length is already included.
394 394 compresseddeltalen += e[1]
395 395 r = (clen, compresseddeltalen)
396 396 chaininfocache[rev] = r
397 397 return r
398 398
399 399 def flags(self, rev):
400 400 return self.index[rev][0] & 0xFFFF
401 401 def rawsize(self, rev):
402 402 """return the length of the uncompressed text for a given revision"""
403 403 l = self.index[rev][2]
404 404 if l >= 0:
405 405 return l
406 406
407 407 t = self.revision(self.node(rev))
408 408 return len(t)
409 409 size = rawsize
410 410
411 411 def ancestors(self, revs, stoprev=0, inclusive=False):
412 412 """Generate the ancestors of 'revs' in reverse topological order.
413 413 Does not generate revs lower than stoprev.
414 414
415 415 See the documentation for ancestor.lazyancestors for more details."""
416 416
417 417 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
418 418 inclusive=inclusive)
419 419
420 420 def descendants(self, revs):
421 421 """Generate the descendants of 'revs' in revision order.
422 422
423 423 Yield a sequence of revision numbers starting with a child of
424 424 some rev in revs, i.e., each revision is *not* considered a
425 425 descendant of itself. Results are ordered by revision number (a
426 426 topological sort)."""
427 427 first = min(revs)
428 428 if first == nullrev:
429 429 for i in self:
430 430 yield i
431 431 return
432 432
433 433 seen = set(revs)
434 434 for i in self.revs(start=first + 1):
435 435 for x in self.parentrevs(i):
436 436 if x != nullrev and x in seen:
437 437 seen.add(i)
438 438 yield i
439 439 break
440 440
441 441 def findcommonmissing(self, common=None, heads=None):
442 442 """Return a tuple of the ancestors of common and the ancestors of heads
443 443 that are not ancestors of common. In revset terminology, we return the
444 444 tuple:
445 445
446 446 ::common, (::heads) - (::common)
447 447
448 448 The list is sorted by revision number, meaning it is
449 449 topologically sorted.
450 450
451 451 'heads' and 'common' are both lists of node IDs. If heads is
452 452 not supplied, uses all of the revlog's heads. If common is not
453 453 supplied, uses nullid."""
454 454 if common is None:
455 455 common = [nullid]
456 456 if heads is None:
457 457 heads = self.heads()
458 458
459 459 common = [self.rev(n) for n in common]
460 460 heads = [self.rev(n) for n in heads]
461 461
462 462 # we want the ancestors, but inclusive
463 463 class lazyset(object):
464 464 def __init__(self, lazyvalues):
465 465 self.addedvalues = set()
466 466 self.lazyvalues = lazyvalues
467 467
468 468 def __contains__(self, value):
469 469 return value in self.addedvalues or value in self.lazyvalues
470 470
471 471 def __iter__(self):
472 472 added = self.addedvalues
473 473 for r in added:
474 474 yield r
475 475 for r in self.lazyvalues:
476 476 if not r in added:
477 477 yield r
478 478
479 479 def add(self, value):
480 480 self.addedvalues.add(value)
481 481
482 482 def update(self, values):
483 483 self.addedvalues.update(values)
484 484
485 485 has = lazyset(self.ancestors(common))
486 486 has.add(nullrev)
487 487 has.update(common)
488 488
489 489 # take all ancestors from heads that aren't in has
490 490 missing = set()
491 491 visit = collections.deque(r for r in heads if r not in has)
492 492 while visit:
493 493 r = visit.popleft()
494 494 if r in missing:
495 495 continue
496 496 else:
497 497 missing.add(r)
498 498 for p in self.parentrevs(r):
499 499 if p not in has:
500 500 visit.append(p)
501 501 missing = list(missing)
502 502 missing.sort()
503 503 return has, [self.node(r) for r in missing]
504 504
505 505 def incrementalmissingrevs(self, common=None):
506 506 """Return an object that can be used to incrementally compute the
507 507 revision numbers of the ancestors of arbitrary sets that are not
508 508 ancestors of common. This is an ancestor.incrementalmissingancestors
509 509 object.
510 510
511 511 'common' is a list of revision numbers. If common is not supplied, uses
512 512 nullrev.
513 513 """
514 514 if common is None:
515 515 common = [nullrev]
516 516
517 517 return ancestor.incrementalmissingancestors(self.parentrevs, common)
518 518
519 519 def findmissingrevs(self, common=None, heads=None):
520 520 """Return the revision numbers of the ancestors of heads that
521 521 are not ancestors of common.
522 522
523 523 More specifically, return a list of revision numbers corresponding to
524 524 nodes N such that every N satisfies the following constraints:
525 525
526 526 1. N is an ancestor of some node in 'heads'
527 527 2. N is not an ancestor of any node in 'common'
528 528
529 529 The list is sorted by revision number, meaning it is
530 530 topologically sorted.
531 531
532 532 'heads' and 'common' are both lists of revision numbers. If heads is
533 533 not supplied, uses all of the revlog's heads. If common is not
534 534 supplied, uses nullid."""
535 535 if common is None:
536 536 common = [nullrev]
537 537 if heads is None:
538 538 heads = self.headrevs()
539 539
540 540 inc = self.incrementalmissingrevs(common=common)
541 541 return inc.missingancestors(heads)
542 542
543 543 def findmissing(self, common=None, heads=None):
544 544 """Return the ancestors of heads that are not ancestors of common.
545 545
546 546 More specifically, return a list of nodes N such that every N
547 547 satisfies the following constraints:
548 548
549 549 1. N is an ancestor of some node in 'heads'
550 550 2. N is not an ancestor of any node in 'common'
551 551
552 552 The list is sorted by revision number, meaning it is
553 553 topologically sorted.
554 554
555 555 'heads' and 'common' are both lists of node IDs. If heads is
556 556 not supplied, uses all of the revlog's heads. If common is not
557 557 supplied, uses nullid."""
558 558 if common is None:
559 559 common = [nullid]
560 560 if heads is None:
561 561 heads = self.heads()
562 562
563 563 common = [self.rev(n) for n in common]
564 564 heads = [self.rev(n) for n in heads]
565 565
566 566 inc = self.incrementalmissingrevs(common=common)
567 567 return [self.node(r) for r in inc.missingancestors(heads)]
568 568
569 569 def nodesbetween(self, roots=None, heads=None):
570 570 """Return a topological path from 'roots' to 'heads'.
571 571
572 572 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
573 573 topologically sorted list of all nodes N that satisfy both of
574 574 these constraints:
575 575
576 576 1. N is a descendant of some node in 'roots'
577 577 2. N is an ancestor of some node in 'heads'
578 578
579 579 Every node is considered to be both a descendant and an ancestor
580 580 of itself, so every reachable node in 'roots' and 'heads' will be
581 581 included in 'nodes'.
582 582
583 583 'outroots' is the list of reachable nodes in 'roots', i.e., the
584 584 subset of 'roots' that is returned in 'nodes'. Likewise,
585 585 'outheads' is the subset of 'heads' that is also in 'nodes'.
586 586
587 587 'roots' and 'heads' are both lists of node IDs. If 'roots' is
588 588 unspecified, uses nullid as the only root. If 'heads' is
589 589 unspecified, uses list of all of the revlog's heads."""
590 590 nonodes = ([], [], [])
591 591 if roots is not None:
592 592 roots = list(roots)
593 593 if not roots:
594 594 return nonodes
595 595 lowestrev = min([self.rev(n) for n in roots])
596 596 else:
597 597 roots = [nullid] # Everybody's a descendant of nullid
598 598 lowestrev = nullrev
599 599 if (lowestrev == nullrev) and (heads is None):
600 600 # We want _all_ the nodes!
601 601 return ([self.node(r) for r in self], [nullid], list(self.heads()))
602 602 if heads is None:
603 603 # All nodes are ancestors, so the latest ancestor is the last
604 604 # node.
605 605 highestrev = len(self) - 1
606 606 # Set ancestors to None to signal that every node is an ancestor.
607 607 ancestors = None
608 608 # Set heads to an empty dictionary for later discovery of heads
609 609 heads = {}
610 610 else:
611 611 heads = list(heads)
612 612 if not heads:
613 613 return nonodes
614 614 ancestors = set()
615 615 # Turn heads into a dictionary so we can remove 'fake' heads.
616 616 # Also, later we will be using it to filter out the heads we can't
617 617 # find from roots.
618 618 heads = dict.fromkeys(heads, False)
619 619 # Start at the top and keep marking parents until we're done.
620 620 nodestotag = set(heads)
621 621 # Remember where the top was so we can use it as a limit later.
622 622 highestrev = max([self.rev(n) for n in nodestotag])
623 623 while nodestotag:
624 624 # grab a node to tag
625 625 n = nodestotag.pop()
626 626 # Never tag nullid
627 627 if n == nullid:
628 628 continue
629 629 # A node's revision number represents its place in a
630 630 # topologically sorted list of nodes.
631 631 r = self.rev(n)
632 632 if r >= lowestrev:
633 633 if n not in ancestors:
634 634 # If we are possibly a descendant of one of the roots
635 635 # and we haven't already been marked as an ancestor
636 636 ancestors.add(n) # Mark as ancestor
637 637 # Add non-nullid parents to list of nodes to tag.
638 638 nodestotag.update([p for p in self.parents(n) if
639 639 p != nullid])
640 640 elif n in heads: # We've seen it before, is it a fake head?
641 641 # So it is, real heads should not be the ancestors of
642 642 # any other heads.
643 643 heads.pop(n)
644 644 if not ancestors:
645 645 return nonodes
646 646 # Now that we have our set of ancestors, we want to remove any
647 647 # roots that are not ancestors.
648 648
649 649 # If one of the roots was nullid, everything is included anyway.
650 650 if lowestrev > nullrev:
651 651 # But, since we weren't, let's recompute the lowest rev to not
652 652 # include roots that aren't ancestors.
653 653
654 654 # Filter out roots that aren't ancestors of heads
655 655 roots = [n for n in roots if n in ancestors]
656 656 # Recompute the lowest revision
657 657 if roots:
658 658 lowestrev = min([self.rev(n) for n in roots])
659 659 else:
660 660 # No more roots? Return empty list
661 661 return nonodes
662 662 else:
663 663 # We are descending from nullid, and don't need to care about
664 664 # any other roots.
665 665 lowestrev = nullrev
666 666 roots = [nullid]
667 667 # Transform our roots list into a set.
668 668 descendants = set(roots)
669 669 # Also, keep the original roots so we can filter out roots that aren't
670 670 # 'real' roots (i.e. are descended from other roots).
671 671 roots = descendants.copy()
672 672 # Our topologically sorted list of output nodes.
673 673 orderedout = []
674 674 # Don't start at nullid since we don't want nullid in our output list,
675 675 # and if nullid shows up in descendants, empty parents will look like
676 676 # they're descendants.
677 677 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
678 678 n = self.node(r)
679 679 isdescendant = False
680 680 if lowestrev == nullrev: # Everybody is a descendant of nullid
681 681 isdescendant = True
682 682 elif n in descendants:
683 683 # n is already a descendant
684 684 isdescendant = True
685 685 # This check only needs to be done here because all the roots
686 686 # will start being marked is descendants before the loop.
687 687 if n in roots:
688 688 # If n was a root, check if it's a 'real' root.
689 689 p = tuple(self.parents(n))
690 690 # If any of its parents are descendants, it's not a root.
691 691 if (p[0] in descendants) or (p[1] in descendants):
692 692 roots.remove(n)
693 693 else:
694 694 p = tuple(self.parents(n))
695 695 # A node is a descendant if either of its parents are
696 696 # descendants. (We seeded the dependents list with the roots
697 697 # up there, remember?)
698 698 if (p[0] in descendants) or (p[1] in descendants):
699 699 descendants.add(n)
700 700 isdescendant = True
701 701 if isdescendant and ((ancestors is None) or (n in ancestors)):
702 702 # Only include nodes that are both descendants and ancestors.
703 703 orderedout.append(n)
704 704 if (ancestors is not None) and (n in heads):
705 705 # We're trying to figure out which heads are reachable
706 706 # from roots.
707 707 # Mark this head as having been reached
708 708 heads[n] = True
709 709 elif ancestors is None:
710 710 # Otherwise, we're trying to discover the heads.
711 711 # Assume this is a head because if it isn't, the next step
712 712 # will eventually remove it.
713 713 heads[n] = True
714 714 # But, obviously its parents aren't.
715 715 for p in self.parents(n):
716 716 heads.pop(p, None)
717 717 heads = [n for n, flag in heads.iteritems() if flag]
718 718 roots = list(roots)
719 719 assert orderedout
720 720 assert roots
721 721 assert heads
722 722 return (orderedout, roots, heads)
723 723
724 724 def headrevs(self):
725 725 try:
726 726 return self.index.headrevs()
727 727 except AttributeError:
728 728 return self._headrevs()
729 729
730 730 def computephases(self, roots):
731 731 return self.index.computephasesmapsets(roots)
732 732
733 733 def _headrevs(self):
734 734 count = len(self)
735 735 if not count:
736 736 return [nullrev]
737 737 # we won't iter over filtered rev so nobody is a head at start
738 738 ishead = [0] * (count + 1)
739 739 index = self.index
740 740 for r in self:
741 741 ishead[r] = 1 # I may be an head
742 742 e = index[r]
743 743 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
744 744 return [r for r, val in enumerate(ishead) if val]
745 745
746 746 def heads(self, start=None, stop=None):
747 747 """return the list of all nodes that have no children
748 748
749 749 if start is specified, only heads that are descendants of
750 750 start will be returned
751 751 if stop is specified, it will consider all the revs from stop
752 752 as if they had no children
753 753 """
754 754 if start is None and stop is None:
755 755 if not len(self):
756 756 return [nullid]
757 757 return [self.node(r) for r in self.headrevs()]
758 758
759 759 if start is None:
760 760 start = nullid
761 761 if stop is None:
762 762 stop = []
763 763 stoprevs = set([self.rev(n) for n in stop])
764 764 startrev = self.rev(start)
765 765 reachable = set((startrev,))
766 766 heads = set((startrev,))
767 767
768 768 parentrevs = self.parentrevs
769 769 for r in self.revs(start=startrev + 1):
770 770 for p in parentrevs(r):
771 771 if p in reachable:
772 772 if r not in stoprevs:
773 773 reachable.add(r)
774 774 heads.add(r)
775 775 if p in heads and p not in stoprevs:
776 776 heads.remove(p)
777 777
778 778 return [self.node(r) for r in heads]
779 779
780 780 def children(self, node):
781 781 """find the children of a given node"""
782 782 c = []
783 783 p = self.rev(node)
784 784 for r in self.revs(start=p + 1):
785 785 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
786 786 if prevs:
787 787 for pr in prevs:
788 788 if pr == p:
789 789 c.append(self.node(r))
790 790 elif p == nullrev:
791 791 c.append(self.node(r))
792 792 return c
793 793
794 794 def descendant(self, start, end):
795 795 if start == nullrev:
796 796 return True
797 797 for i in self.descendants([start]):
798 798 if i == end:
799 799 return True
800 800 elif i > end:
801 801 break
802 802 return False
803 803
804 804 def commonancestorsheads(self, a, b):
805 805 """calculate all the heads of the common ancestors of nodes a and b"""
806 806 a, b = self.rev(a), self.rev(b)
807 807 try:
808 808 ancs = self.index.commonancestorsheads(a, b)
809 809 except (AttributeError, OverflowError): # C implementation failed
810 810 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
811 811 return map(self.node, ancs)
812 812
813 813 def isancestor(self, a, b):
814 814 """return True if node a is an ancestor of node b
815 815
816 816 The implementation of this is trivial but the use of
817 817 commonancestorsheads is not."""
818 818 return a in self.commonancestorsheads(a, b)
819 819
820 820 def ancestor(self, a, b):
821 821 """calculate the "best" common ancestor of nodes a and b"""
822 822
823 823 a, b = self.rev(a), self.rev(b)
824 824 try:
825 825 ancs = self.index.ancestors(a, b)
826 826 except (AttributeError, OverflowError):
827 827 ancs = ancestor.ancestors(self.parentrevs, a, b)
828 828 if ancs:
829 829 # choose a consistent winner when there's a tie
830 830 return min(map(self.node, ancs))
831 831 return nullid
832 832
833 833 def _match(self, id):
834 834 if isinstance(id, int):
835 835 # rev
836 836 return self.node(id)
837 837 if len(id) == 20:
838 838 # possibly a binary node
839 839 # odds of a binary node being all hex in ASCII are 1 in 10**25
840 840 try:
841 841 node = id
842 842 self.rev(node) # quick search the index
843 843 return node
844 844 except LookupError:
845 845 pass # may be partial hex id
846 846 try:
847 847 # str(rev)
848 848 rev = int(id)
849 849 if str(rev) != id:
850 850 raise ValueError
851 851 if rev < 0:
852 852 rev = len(self) + rev
853 853 if rev < 0 or rev >= len(self):
854 854 raise ValueError
855 855 return self.node(rev)
856 856 except (ValueError, OverflowError):
857 857 pass
858 858 if len(id) == 40:
859 859 try:
860 860 # a full hex nodeid?
861 861 node = bin(id)
862 862 self.rev(node)
863 863 return node
864 864 except (TypeError, LookupError):
865 865 pass
866 866
867 867 def _partialmatch(self, id):
868 868 try:
869 869 n = self.index.partialmatch(id)
870 870 if n and self.hasnode(n):
871 871 return n
872 872 return None
873 873 except RevlogError:
874 874 # parsers.c radix tree lookup gave multiple matches
875 875 # fall through to slow path that filters hidden revisions
876 876 pass
877 877 except (AttributeError, ValueError):
878 878 # we are pure python, or key was too short to search radix tree
879 879 pass
880 880
881 881 if id in self._pcache:
882 882 return self._pcache[id]
883 883
884 884 if len(id) < 40:
885 885 try:
886 886 # hex(node)[:...]
887 887 l = len(id) // 2 # grab an even number of digits
888 888 prefix = bin(id[:l * 2])
889 889 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
890 890 nl = [n for n in nl if hex(n).startswith(id) and
891 891 self.hasnode(n)]
892 892 if len(nl) > 0:
893 893 if len(nl) == 1:
894 894 self._pcache[id] = nl[0]
895 895 return nl[0]
896 896 raise LookupError(id, self.indexfile,
897 897 _('ambiguous identifier'))
898 898 return None
899 899 except TypeError:
900 900 pass
901 901
902 902 def lookup(self, id):
903 903 """locate a node based on:
904 904 - revision number or str(revision number)
905 905 - nodeid or subset of hex nodeid
906 906 """
907 907 n = self._match(id)
908 908 if n is not None:
909 909 return n
910 910 n = self._partialmatch(id)
911 911 if n:
912 912 return n
913 913
914 914 raise LookupError(id, self.indexfile, _('no match found'))
915 915
916 916 def cmp(self, node, text):
917 917 """compare text with a given file revision
918 918
919 919 returns True if text is different than what is stored.
920 920 """
921 921 p1, p2 = self.parents(node)
922 922 return hash(text, p1, p2) != node
923 923
924 924 def _addchunk(self, offset, data):
925 925 o, d = self._chunkcache
926 926 # try to add to existing cache
927 927 if o + len(d) == offset and len(d) + len(data) < _chunksize:
928 928 self._chunkcache = o, d + data
929 929 else:
930 930 self._chunkcache = offset, data
931 931
932 932 def _loadchunk(self, offset, length):
933 933 if self._inline:
934 934 df = self.opener(self.indexfile)
935 935 else:
936 936 df = self.opener(self.datafile)
937 937
938 938 # Cache data both forward and backward around the requested
939 939 # data, in a fixed size window. This helps speed up operations
940 940 # involving reading the revlog backwards.
941 941 cachesize = self._chunkcachesize
942 942 realoffset = offset & ~(cachesize - 1)
943 943 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
944 944 - realoffset)
945 945 df.seek(realoffset)
946 946 d = df.read(reallength)
947 947 df.close()
948 948 self._addchunk(realoffset, d)
949 949 if offset != realoffset or reallength != length:
950 950 return util.buffer(d, offset - realoffset, length)
951 951 return d
952 952
953 953 def _getchunk(self, offset, length):
954 954 o, d = self._chunkcache
955 955 l = len(d)
956 956
957 957 # is it in the cache?
958 958 cachestart = offset - o
959 959 cacheend = cachestart + length
960 960 if cachestart >= 0 and cacheend <= l:
961 961 if cachestart == 0 and cacheend == l:
962 962 return d # avoid a copy
963 963 return util.buffer(d, cachestart, cacheend - cachestart)
964 964
965 965 return self._loadchunk(offset, length)
966 966
967 967 def _chunkraw(self, startrev, endrev):
968 968 start = self.start(startrev)
969 969 end = self.end(endrev)
970 970 if self._inline:
971 971 start += (startrev + 1) * self._io.size
972 972 end += (endrev + 1) * self._io.size
973 973 length = end - start
974 974 return self._getchunk(start, length)
975 975
976 976 def _chunk(self, rev):
977 977 return decompress(self._chunkraw(rev, rev))
978 978
979 979 def _chunks(self, revs):
980 980 '''faster version of [self._chunk(rev) for rev in revs]
981 981
982 982 Assumes that revs is in ascending order.'''
983 983 if not revs:
984 984 return []
985 985 start = self.start
986 986 length = self.length
987 987 inline = self._inline
988 988 iosize = self._io.size
989 989 buffer = util.buffer
990 990
991 991 l = []
992 992 ladd = l.append
993 993
994 994 # preload the cache
995 995 try:
996 996 while True:
997 997 # ensure that the cache doesn't change out from under us
998 998 _cache = self._chunkcache
999 999 self._chunkraw(revs[0], revs[-1])
1000 1000 if _cache == self._chunkcache:
1001 1001 break
1002 1002 offset, data = _cache
1003 1003 except OverflowError:
1004 1004 # issue4215 - we can't cache a run of chunks greater than
1005 1005 # 2G on Windows
1006 1006 return [self._chunk(rev) for rev in revs]
1007 1007
1008 1008 for rev in revs:
1009 1009 chunkstart = start(rev)
1010 1010 if inline:
1011 1011 chunkstart += (rev + 1) * iosize
1012 1012 chunklength = length(rev)
1013 1013 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1014 1014
1015 1015 return l
1016 1016
1017 1017 def _chunkclear(self):
1018 1018 self._chunkcache = (0, '')
1019 1019
1020 1020 def deltaparent(self, rev):
1021 1021 """return deltaparent of the given revision"""
1022 1022 base = self.index[rev][3]
1023 1023 if base == rev:
1024 1024 return nullrev
1025 1025 elif self._generaldelta:
1026 1026 return base
1027 1027 else:
1028 1028 return rev - 1
1029 1029
1030 1030 def revdiff(self, rev1, rev2):
1031 1031 """return or calculate a delta between two revisions"""
1032 1032 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1033 1033 return str(self._chunk(rev2))
1034 1034
1035 1035 return mdiff.textdiff(self.revision(rev1),
1036 1036 self.revision(rev2))
1037 1037
1038 1038 def revision(self, nodeorrev):
1039 1039 """return an uncompressed revision of a given node or revision
1040 1040 number.
1041 1041 """
1042 1042 if isinstance(nodeorrev, int):
1043 1043 rev = nodeorrev
1044 1044 node = self.node(rev)
1045 1045 else:
1046 1046 node = nodeorrev
1047 1047 rev = None
1048 1048
1049 1049 _cache = self._cache # grab local copy of cache to avoid thread race
1050 1050 cachedrev = None
1051 1051 if node == nullid:
1052 1052 return ""
1053 1053 if _cache:
1054 1054 if _cache[0] == node:
1055 1055 return _cache[2]
1056 1056 cachedrev = _cache[1]
1057 1057
1058 1058 # look up what we need to read
1059 1059 text = None
1060 1060 if rev is None:
1061 1061 rev = self.rev(node)
1062 1062
1063 1063 # check rev flags
1064 1064 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1065 1065 raise RevlogError(_('incompatible revision flag %x') %
1066 1066 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1067 1067
1068 1068 # build delta chain
1069 1069 chain = []
1070 1070 index = self.index # for performance
1071 1071 generaldelta = self._generaldelta
1072 1072 iterrev = rev
1073 1073 e = index[iterrev]
1074 1074 while iterrev != e[3] and iterrev != cachedrev:
1075 1075 chain.append(iterrev)
1076 1076 if generaldelta:
1077 1077 iterrev = e[3]
1078 1078 else:
1079 1079 iterrev -= 1
1080 1080 e = index[iterrev]
1081 1081
1082 1082 if iterrev == cachedrev:
1083 1083 # cache hit
1084 1084 text = _cache[2]
1085 1085 else:
1086 1086 chain.append(iterrev)
1087 1087 chain.reverse()
1088 1088
1089 1089 # drop cache to save memory
1090 1090 self._cache = None
1091 1091
1092 1092 bins = self._chunks(chain)
1093 1093 if text is None:
1094 1094 text = str(bins[0])
1095 1095 bins = bins[1:]
1096 1096
1097 1097 text = mdiff.patches(text, bins)
1098 1098
1099 1099 text = self._checkhash(text, node, rev)
1100 1100
1101 1101 self._cache = (node, rev, text)
1102 1102 return text
1103 1103
1104 1104 def hash(self, text, p1, p2):
1105 1105 """Compute a node hash.
1106 1106
1107 1107 Available as a function so that subclasses can replace the hash
1108 1108 as needed.
1109 1109 """
1110 1110 return hash(text, p1, p2)
1111 1111
1112 1112 def _checkhash(self, text, node, rev):
1113 1113 p1, p2 = self.parents(node)
1114 1114 self.checkhash(text, p1, p2, node, rev)
1115 1115 return text
1116 1116
1117 1117 def checkhash(self, text, p1, p2, node, rev=None):
1118 1118 if node != self.hash(text, p1, p2):
1119 1119 revornode = rev
1120 1120 if revornode is None:
1121 1121 revornode = templatefilters.short(hex(node))
1122 1122 raise RevlogError(_("integrity check failed on %s:%s")
1123 1123 % (self.indexfile, revornode))
1124 1124
1125 1125 def checkinlinesize(self, tr, fp=None):
1126 1126 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1127 1127 return
1128 1128
1129 1129 trinfo = tr.find(self.indexfile)
1130 1130 if trinfo is None:
1131 1131 raise RevlogError(_("%s not found in the transaction")
1132 1132 % self.indexfile)
1133 1133
1134 1134 trindex = trinfo[2]
1135 1135 if trindex is not None:
1136 1136 dataoff = self.start(trindex)
1137 1137 else:
1138 1138 # revlog was stripped at start of transaction, use all leftover data
1139 1139 trindex = len(self) - 1
1140 1140 dataoff = self.end(-2)
1141 1141
1142 1142 tr.add(self.datafile, dataoff)
1143 1143
1144 1144 if fp:
1145 1145 fp.flush()
1146 1146 fp.close()
1147 1147
1148 1148 df = self.opener(self.datafile, 'w')
1149 1149 try:
1150 1150 for r in self:
1151 1151 df.write(self._chunkraw(r, r))
1152 1152 finally:
1153 1153 df.close()
1154 1154
1155 1155 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1156 1156 self.version &= ~(REVLOGNGINLINEDATA)
1157 1157 self._inline = False
1158 1158 for i in self:
1159 1159 e = self._io.packentry(self.index[i], self.node, self.version, i)
1160 1160 fp.write(e)
1161 1161
1162 1162 # if we don't call close, the temp file will never replace the
1163 1163 # real index
1164 1164 fp.close()
1165 1165
1166 1166 tr.replace(self.indexfile, trindex * self._io.size)
1167 1167 self._chunkclear()
1168 1168
1169 1169 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1170 1170 node=None):
1171 1171 """add a revision to the log
1172 1172
1173 1173 text - the revision data to add
1174 1174 transaction - the transaction object used for rollback
1175 1175 link - the linkrev data to add
1176 1176 p1, p2 - the parent nodeids of the revision
1177 1177 cachedelta - an optional precomputed delta
1178 1178 node - nodeid of revision; typically node is not specified, and it is
1179 1179 computed by default as hash(text, p1, p2), however subclasses might
1180 1180 use different hashing method (and override checkhash() in such case)
1181 1181 """
1182 1182 if link == nullrev:
1183 1183 raise RevlogError(_("attempted to add linkrev -1 to %s")
1184 1184 % self.indexfile)
1185 1185
1186 1186 if len(text) > _maxentrysize:
1187 1187 raise RevlogError(
1188 1188 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1189 1189 % (self.indexfile, len(text)))
1190 1190
1191 1191 node = node or self.hash(text, p1, p2)
1192 1192 if node in self.nodemap:
1193 1193 return node
1194 1194
1195 1195 dfh = None
1196 1196 if not self._inline:
1197 1197 dfh = self.opener(self.datafile, "a")
1198 1198 ifh = self.opener(self.indexfile, "a+")
1199 1199 try:
1200 1200 return self._addrevision(node, text, transaction, link, p1, p2,
1201 1201 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1202 1202 finally:
1203 1203 if dfh:
1204 1204 dfh.close()
1205 1205 ifh.close()
1206 1206
1207 1207 def compress(self, text):
1208 1208 """ generate a possibly-compressed representation of text """
1209 1209 if not text:
1210 1210 return ("", text)
1211 1211 l = len(text)
1212 1212 bin = None
1213 1213 if l < 44:
1214 1214 pass
1215 1215 elif l > 1000000:
1216 1216 # zlib makes an internal copy, thus doubling memory usage for
1217 1217 # large files, so lets do this in pieces
1218 1218 z = zlib.compressobj()
1219 1219 p = []
1220 1220 pos = 0
1221 1221 while pos < l:
1222 1222 pos2 = pos + 2**20
1223 1223 p.append(z.compress(text[pos:pos2]))
1224 1224 pos = pos2
1225 1225 p.append(z.flush())
1226 1226 if sum(map(len, p)) < l:
1227 1227 bin = "".join(p)
1228 1228 else:
1229 1229 bin = _compress(text)
1230 1230 if bin is None or len(bin) > l:
1231 1231 if text[0] == '\0':
1232 1232 return ("", text)
1233 1233 return ('u', text)
1234 1234 return ("", bin)
1235 1235
1236 def _isgooddelta(self, d, textlen):
1237 """Returns True if the given delta is good. Good means that it is within
1238 the disk span, disk size, and chain length bounds that we know to be
1239 performant."""
1240 if d is None:
1241 return False
1242
1243 # - 'dist' is the distance from the base revision -- bounding it limits
1244 # the amount of I/O we need to do.
1245 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1246 # to apply -- bounding it limits the amount of CPU we consume.
1247 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1248 if (dist > textlen * 4 or l > textlen or
1249 compresseddeltalen > textlen * 2 or
1250 (self._maxchainlen and chainlen > self._maxchainlen)):
1251 return False
1252
1253 return True
1254
1236 1255 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1237 1256 cachedelta, ifh, dfh):
1238 1257 """internal function to add revisions to the log
1239 1258
1240 1259 see addrevision for argument descriptions.
1241 1260 invariants:
1242 1261 - text is optional (can be None); if not set, cachedelta must be set.
1243 1262 if both are set, they must correspond to each other.
1244 1263 """
1245 1264 btext = [text]
1246 1265 def buildtext():
1247 1266 if btext[0] is not None:
1248 1267 return btext[0]
1249 1268 # flush any pending writes here so we can read it in revision
1250 1269 if dfh:
1251 1270 dfh.flush()
1252 1271 ifh.flush()
1253 1272 baserev = cachedelta[0]
1254 1273 delta = cachedelta[1]
1255 1274 # special case deltas which replace entire base; no need to decode
1256 1275 # base revision. this neatly avoids censored bases, which throw when
1257 1276 # they're decoded.
1258 1277 hlen = struct.calcsize(">lll")
1259 1278 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1260 1279 len(delta) - hlen):
1261 1280 btext[0] = delta[hlen:]
1262 1281 else:
1263 1282 basetext = self.revision(self.node(baserev))
1264 1283 btext[0] = mdiff.patch(basetext, delta)
1265 1284 try:
1266 1285 self.checkhash(btext[0], p1, p2, node)
1267 1286 if flags & REVIDX_ISCENSORED:
1268 1287 raise RevlogError(_('node %s is not censored') % node)
1269 1288 except CensoredNodeError:
1270 1289 # must pass the censored index flag to add censored revisions
1271 1290 if not flags & REVIDX_ISCENSORED:
1272 1291 raise
1273 1292 return btext[0]
1274 1293
1275 1294 def builddelta(rev):
1276 1295 # can we use the cached delta?
1277 1296 if cachedelta and cachedelta[0] == rev:
1278 1297 delta = cachedelta[1]
1279 1298 else:
1280 1299 t = buildtext()
1281 1300 if self.iscensored(rev):
1282 1301 # deltas based on a censored revision must replace the
1283 1302 # full content in one patch, so delta works everywhere
1284 1303 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1285 1304 delta = header + t
1286 1305 else:
1287 1306 ptext = self.revision(self.node(rev))
1288 1307 delta = mdiff.textdiff(ptext, t)
1289 1308 data = self.compress(delta)
1290 1309 l = len(data[1]) + len(data[0])
1291 1310 if basecache[0] == rev:
1292 1311 chainbase = basecache[1]
1293 1312 else:
1294 1313 chainbase = self.chainbase(rev)
1295 1314 dist = l + offset - self.start(chainbase)
1296 1315 if self._generaldelta:
1297 1316 base = rev
1298 1317 else:
1299 1318 base = chainbase
1300 1319 chainlen, compresseddeltalen = self._chaininfo(rev)
1301 1320 chainlen += 1
1302 1321 compresseddeltalen += l
1303 1322 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1304 1323
1305 1324 curr = len(self)
1306 1325 prev = curr - 1
1307 1326 base = chainbase = curr
1308 1327 chainlen = None
1309 1328 offset = self.end(prev)
1310 1329 d = None
1311 1330 if self._basecache is None:
1312 1331 self._basecache = (prev, self.chainbase(prev))
1313 1332 basecache = self._basecache
1314 1333 p1r, p2r = self.rev(p1), self.rev(p2)
1315 1334
1316 1335 # should we try to build a delta?
1317 1336 if prev != nullrev:
1318 1337 if self._generaldelta:
1319 1338 if p1r >= basecache[1]:
1320 1339 d = builddelta(p1r)
1321 1340 elif p2r >= basecache[1]:
1322 1341 d = builddelta(p2r)
1323 1342 else:
1324 1343 d = builddelta(prev)
1325 1344 else:
1326 1345 d = builddelta(prev)
1327 1346 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1328 1347
1329 1348 # full versions are inserted when the needed deltas
1330 1349 # become comparable to the uncompressed text
1331 1350 if text is None:
1332 1351 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1333 1352 cachedelta[1])
1334 1353 else:
1335 1354 textlen = len(text)
1336 1355
1337 # - 'dist' is the distance from the base revision -- bounding it limits
1338 # the amount of I/O we need to do.
1339 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1340 # to apply -- bounding it limits the amount of CPU we consume.
1341 if (d is None or dist > textlen * 4 or l > textlen or
1342 compresseddeltalen > textlen * 2 or
1343 (self._maxchainlen and chainlen > self._maxchainlen)):
1356 if not self._isgooddelta(d, textlen):
1344 1357 text = buildtext()
1345 1358 data = self.compress(text)
1346 1359 l = len(data[1]) + len(data[0])
1347 1360 base = chainbase = curr
1348 1361
1349 1362 e = (offset_type(offset, flags), l, textlen,
1350 1363 base, link, p1r, p2r, node)
1351 1364 self.index.insert(-1, e)
1352 1365 self.nodemap[node] = curr
1353 1366
1354 1367 entry = self._io.packentry(e, self.node, self.version, curr)
1355 1368 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1356 1369
1357 1370 if type(text) == str: # only accept immutable objects
1358 1371 self._cache = (node, curr, text)
1359 1372 self._basecache = (curr, chainbase)
1360 1373 return node
1361 1374
1362 1375 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1363 1376 curr = len(self) - 1
1364 1377 if not self._inline:
1365 1378 transaction.add(self.datafile, offset)
1366 1379 transaction.add(self.indexfile, curr * len(entry))
1367 1380 if data[0]:
1368 1381 dfh.write(data[0])
1369 1382 dfh.write(data[1])
1370 1383 dfh.flush()
1371 1384 ifh.write(entry)
1372 1385 else:
1373 1386 offset += curr * self._io.size
1374 1387 transaction.add(self.indexfile, offset, curr)
1375 1388 ifh.write(entry)
1376 1389 ifh.write(data[0])
1377 1390 ifh.write(data[1])
1378 1391 self.checkinlinesize(transaction, ifh)
1379 1392
1380 1393 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1381 1394 """
1382 1395 add a delta group
1383 1396
1384 1397 given a set of deltas, add them to the revision log. the
1385 1398 first delta is against its parent, which should be in our
1386 1399 log, the rest are against the previous delta.
1387 1400
1388 1401 If ``addrevisioncb`` is defined, it will be called with arguments of
1389 1402 this revlog and the node that was added.
1390 1403 """
1391 1404
1392 1405 # track the base of the current delta log
1393 1406 content = []
1394 1407 node = None
1395 1408
1396 1409 r = len(self)
1397 1410 end = 0
1398 1411 if r:
1399 1412 end = self.end(r - 1)
1400 1413 ifh = self.opener(self.indexfile, "a+")
1401 1414 isize = r * self._io.size
1402 1415 if self._inline:
1403 1416 transaction.add(self.indexfile, end + isize, r)
1404 1417 dfh = None
1405 1418 else:
1406 1419 transaction.add(self.indexfile, isize, r)
1407 1420 transaction.add(self.datafile, end)
1408 1421 dfh = self.opener(self.datafile, "a")
1409 1422 def flush():
1410 1423 if dfh:
1411 1424 dfh.flush()
1412 1425 ifh.flush()
1413 1426 try:
1414 1427 # loop through our set of deltas
1415 1428 chain = None
1416 1429 while True:
1417 1430 chunkdata = bundle.deltachunk(chain)
1418 1431 if not chunkdata:
1419 1432 break
1420 1433 node = chunkdata['node']
1421 1434 p1 = chunkdata['p1']
1422 1435 p2 = chunkdata['p2']
1423 1436 cs = chunkdata['cs']
1424 1437 deltabase = chunkdata['deltabase']
1425 1438 delta = chunkdata['delta']
1426 1439
1427 1440 content.append(node)
1428 1441
1429 1442 link = linkmapper(cs)
1430 1443 if node in self.nodemap:
1431 1444 # this can happen if two branches make the same change
1432 1445 chain = node
1433 1446 continue
1434 1447
1435 1448 for p in (p1, p2):
1436 1449 if p not in self.nodemap:
1437 1450 raise LookupError(p, self.indexfile,
1438 1451 _('unknown parent'))
1439 1452
1440 1453 if deltabase not in self.nodemap:
1441 1454 raise LookupError(deltabase, self.indexfile,
1442 1455 _('unknown delta base'))
1443 1456
1444 1457 baserev = self.rev(deltabase)
1445 1458
1446 1459 if baserev != nullrev and self.iscensored(baserev):
1447 1460 # if base is censored, delta must be full replacement in a
1448 1461 # single patch operation
1449 1462 hlen = struct.calcsize(">lll")
1450 1463 oldlen = self.rawsize(baserev)
1451 1464 newlen = len(delta) - hlen
1452 1465 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1453 1466 raise error.CensoredBaseError(self.indexfile,
1454 1467 self.node(baserev))
1455 1468
1456 1469 flags = REVIDX_DEFAULT_FLAGS
1457 1470 if self._peek_iscensored(baserev, delta, flush):
1458 1471 flags |= REVIDX_ISCENSORED
1459 1472
1460 1473 chain = self._addrevision(node, None, transaction, link,
1461 1474 p1, p2, flags, (baserev, delta),
1462 1475 ifh, dfh)
1463 1476
1464 1477 if addrevisioncb:
1465 1478 # Data for added revision can't be read unless flushed
1466 1479 # because _loadchunk always opensa new file handle and
1467 1480 # there is no guarantee data was actually written yet.
1468 1481 flush()
1469 1482 addrevisioncb(self, chain)
1470 1483
1471 1484 if not dfh and not self._inline:
1472 1485 # addrevision switched from inline to conventional
1473 1486 # reopen the index
1474 1487 ifh.close()
1475 1488 dfh = self.opener(self.datafile, "a")
1476 1489 ifh = self.opener(self.indexfile, "a")
1477 1490 finally:
1478 1491 if dfh:
1479 1492 dfh.close()
1480 1493 ifh.close()
1481 1494
1482 1495 return content
1483 1496
1484 1497 def iscensored(self, rev):
1485 1498 """Check if a file revision is censored."""
1486 1499 return False
1487 1500
1488 1501 def _peek_iscensored(self, baserev, delta, flush):
1489 1502 """Quickly check if a delta produces a censored revision."""
1490 1503 return False
1491 1504
1492 1505 def getstrippoint(self, minlink):
1493 1506 """find the minimum rev that must be stripped to strip the linkrev
1494 1507
1495 1508 Returns a tuple containing the minimum rev and a set of all revs that
1496 1509 have linkrevs that will be broken by this strip.
1497 1510 """
1498 1511 brokenrevs = set()
1499 1512 strippoint = len(self)
1500 1513
1501 1514 heads = {}
1502 1515 futurelargelinkrevs = set()
1503 1516 for head in self.headrevs():
1504 1517 headlinkrev = self.linkrev(head)
1505 1518 heads[head] = headlinkrev
1506 1519 if headlinkrev >= minlink:
1507 1520 futurelargelinkrevs.add(headlinkrev)
1508 1521
1509 1522 # This algorithm involves walking down the rev graph, starting at the
1510 1523 # heads. Since the revs are topologically sorted according to linkrev,
1511 1524 # once all head linkrevs are below the minlink, we know there are
1512 1525 # no more revs that could have a linkrev greater than minlink.
1513 1526 # So we can stop walking.
1514 1527 while futurelargelinkrevs:
1515 1528 strippoint -= 1
1516 1529 linkrev = heads.pop(strippoint)
1517 1530
1518 1531 if linkrev < minlink:
1519 1532 brokenrevs.add(strippoint)
1520 1533 else:
1521 1534 futurelargelinkrevs.remove(linkrev)
1522 1535
1523 1536 for p in self.parentrevs(strippoint):
1524 1537 if p != nullrev:
1525 1538 plinkrev = self.linkrev(p)
1526 1539 heads[p] = plinkrev
1527 1540 if plinkrev >= minlink:
1528 1541 futurelargelinkrevs.add(plinkrev)
1529 1542
1530 1543 return strippoint, brokenrevs
1531 1544
1532 1545 def strip(self, minlink, transaction):
1533 1546 """truncate the revlog on the first revision with a linkrev >= minlink
1534 1547
1535 1548 This function is called when we're stripping revision minlink and
1536 1549 its descendants from the repository.
1537 1550
1538 1551 We have to remove all revisions with linkrev >= minlink, because
1539 1552 the equivalent changelog revisions will be renumbered after the
1540 1553 strip.
1541 1554
1542 1555 So we truncate the revlog on the first of these revisions, and
1543 1556 trust that the caller has saved the revisions that shouldn't be
1544 1557 removed and that it'll re-add them after this truncation.
1545 1558 """
1546 1559 if len(self) == 0:
1547 1560 return
1548 1561
1549 1562 rev, _ = self.getstrippoint(minlink)
1550 1563 if rev == len(self):
1551 1564 return
1552 1565
1553 1566 # first truncate the files on disk
1554 1567 end = self.start(rev)
1555 1568 if not self._inline:
1556 1569 transaction.add(self.datafile, end)
1557 1570 end = rev * self._io.size
1558 1571 else:
1559 1572 end += rev * self._io.size
1560 1573
1561 1574 transaction.add(self.indexfile, end)
1562 1575
1563 1576 # then reset internal state in memory to forget those revisions
1564 1577 self._cache = None
1565 1578 self._chaininfocache = {}
1566 1579 self._chunkclear()
1567 1580 for x in xrange(rev, len(self)):
1568 1581 del self.nodemap[self.node(x)]
1569 1582
1570 1583 del self.index[rev:-1]
1571 1584
1572 1585 def checksize(self):
1573 1586 expected = 0
1574 1587 if len(self):
1575 1588 expected = max(0, self.end(len(self) - 1))
1576 1589
1577 1590 try:
1578 1591 f = self.opener(self.datafile)
1579 1592 f.seek(0, 2)
1580 1593 actual = f.tell()
1581 1594 f.close()
1582 1595 dd = actual - expected
1583 1596 except IOError as inst:
1584 1597 if inst.errno != errno.ENOENT:
1585 1598 raise
1586 1599 dd = 0
1587 1600
1588 1601 try:
1589 1602 f = self.opener(self.indexfile)
1590 1603 f.seek(0, 2)
1591 1604 actual = f.tell()
1592 1605 f.close()
1593 1606 s = self._io.size
1594 1607 i = max(0, actual // s)
1595 1608 di = actual - (i * s)
1596 1609 if self._inline:
1597 1610 databytes = 0
1598 1611 for r in self:
1599 1612 databytes += max(0, self.length(r))
1600 1613 dd = 0
1601 1614 di = actual - len(self) * s - databytes
1602 1615 except IOError as inst:
1603 1616 if inst.errno != errno.ENOENT:
1604 1617 raise
1605 1618 di = 0
1606 1619
1607 1620 return (dd, di)
1608 1621
1609 1622 def files(self):
1610 1623 res = [self.indexfile]
1611 1624 if not self._inline:
1612 1625 res.append(self.datafile)
1613 1626 return res
General Comments 0
You need to be logged in to leave comments. Login now