##// END OF EJS Templates
revlog: add instance variable controlling delta chain use...
Gregory Szorc -
r30154:5e72129d default
parent child Browse files
Show More
@@ -1,1794 +1,1796
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = hashlib.sha1(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = hashlib.sha1(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215
216 216 If checkambig, indexfile is opened with checkambig=True at
217 217 writing, to avoid file stat ambiguity.
218 218 """
219 219 def __init__(self, opener, indexfile, checkambig=False):
220 220 """
221 221 create a revlog object
222 222
223 223 opener is a function that abstracts the file opening operation
224 224 and can be used to implement COW semantics or the like.
225 225 """
226 226 self.indexfile = indexfile
227 227 self.datafile = indexfile[:-2] + ".d"
228 228 self.opener = opener
229 229 # When True, indexfile is opened with checkambig=True at writing, to
230 230 # avoid file stat ambiguity.
231 231 self._checkambig = checkambig
232 232 # 3-tuple of (node, rev, text) for a raw revision.
233 233 self._cache = None
234 234 # Maps rev to chain base rev.
235 235 self._chainbasecache = util.lrucachedict(100)
236 236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 237 self._chunkcache = (0, '')
238 238 # How much data to read and cache into the raw revlog data cache.
239 239 self._chunkcachesize = 65536
240 240 self._maxchainlen = None
241 241 self._aggressivemergedeltas = False
242 242 self.index = []
243 243 # Mapping of partial identifiers to full nodes.
244 244 self._pcache = {}
245 245 # Mapping of revision integer to full node.
246 246 self._nodecache = {nullid: nullrev}
247 247 self._nodepos = None
248 248
249 249 v = REVLOG_DEFAULT_VERSION
250 250 opts = getattr(opener, 'options', None)
251 251 if opts is not None:
252 252 if 'revlogv1' in opts:
253 253 if 'generaldelta' in opts:
254 254 v |= REVLOGGENERALDELTA
255 255 else:
256 256 v = 0
257 257 if 'chunkcachesize' in opts:
258 258 self._chunkcachesize = opts['chunkcachesize']
259 259 if 'maxchainlen' in opts:
260 260 self._maxchainlen = opts['maxchainlen']
261 261 if 'aggressivemergedeltas' in opts:
262 262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264 264
265 265 if self._chunkcachesize <= 0:
266 266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 267 'than 0') % self._chunkcachesize)
268 268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 270 'of 2') % self._chunkcachesize)
271 271
272 272 indexdata = ''
273 273 self._initempty = True
274 274 try:
275 275 f = self.opener(self.indexfile)
276 276 indexdata = f.read()
277 277 f.close()
278 278 if len(indexdata) > 0:
279 279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 280 self._initempty = False
281 281 except IOError as inst:
282 282 if inst.errno != errno.ENOENT:
283 283 raise
284 284
285 285 self.version = v
286 286 self._inline = v & REVLOGNGINLINEDATA
287 287 self._generaldelta = v & REVLOGGENERALDELTA
288 288 flags = v & ~0xFFFF
289 289 fmt = v & 0xFFFF
290 290 if fmt == REVLOGV0 and flags:
291 291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 292 % (self.indexfile, flags >> 16))
293 293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 295 % (self.indexfile, flags >> 16))
296 296 elif fmt > REVLOGNG:
297 297 raise RevlogError(_("index %s unknown format %d")
298 298 % (self.indexfile, fmt))
299 299
300 self._storedeltachains = True
301
300 302 self._io = revlogio()
301 303 if self.version == REVLOGV0:
302 304 self._io = revlogoldio()
303 305 try:
304 306 d = self._io.parseindex(indexdata, self._inline)
305 307 except (ValueError, IndexError):
306 308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
307 309 self.index, nodemap, self._chunkcache = d
308 310 if nodemap is not None:
309 311 self.nodemap = self._nodecache = nodemap
310 312 if not self._chunkcache:
311 313 self._chunkclear()
312 314 # revnum -> (chain-length, sum-delta-length)
313 315 self._chaininfocache = {}
314 316
315 317 def tip(self):
316 318 return self.node(len(self.index) - 2)
317 319 def __contains__(self, rev):
318 320 return 0 <= rev < len(self)
319 321 def __len__(self):
320 322 return len(self.index) - 1
321 323 def __iter__(self):
322 324 return iter(xrange(len(self)))
323 325 def revs(self, start=0, stop=None):
324 326 """iterate over all rev in this revlog (from start to stop)"""
325 327 step = 1
326 328 if stop is not None:
327 329 if start > stop:
328 330 step = -1
329 331 stop += step
330 332 else:
331 333 stop = len(self)
332 334 return xrange(start, stop, step)
333 335
334 336 @util.propertycache
335 337 def nodemap(self):
336 338 self.rev(self.node(0))
337 339 return self._nodecache
338 340
339 341 def hasnode(self, node):
340 342 try:
341 343 self.rev(node)
342 344 return True
343 345 except KeyError:
344 346 return False
345 347
346 348 def clearcaches(self):
347 349 self._cache = None
348 350 self._chainbasecache.clear()
349 351 self._chunkcache = (0, '')
350 352 self._pcache = {}
351 353
352 354 try:
353 355 self._nodecache.clearcaches()
354 356 except AttributeError:
355 357 self._nodecache = {nullid: nullrev}
356 358 self._nodepos = None
357 359
358 360 def rev(self, node):
359 361 try:
360 362 return self._nodecache[node]
361 363 except TypeError:
362 364 raise
363 365 except RevlogError:
364 366 # parsers.c radix tree lookup failed
365 367 raise LookupError(node, self.indexfile, _('no node'))
366 368 except KeyError:
367 369 # pure python cache lookup failed
368 370 n = self._nodecache
369 371 i = self.index
370 372 p = self._nodepos
371 373 if p is None:
372 374 p = len(i) - 2
373 375 for r in xrange(p, -1, -1):
374 376 v = i[r][7]
375 377 n[v] = r
376 378 if v == node:
377 379 self._nodepos = r - 1
378 380 return r
379 381 raise LookupError(node, self.indexfile, _('no node'))
380 382
381 383 def node(self, rev):
382 384 return self.index[rev][7]
383 385 def linkrev(self, rev):
384 386 return self.index[rev][4]
385 387 def parents(self, node):
386 388 i = self.index
387 389 d = i[self.rev(node)]
388 390 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
389 391 def parentrevs(self, rev):
390 392 return self.index[rev][5:7]
391 393 def start(self, rev):
392 394 return int(self.index[rev][0] >> 16)
393 395 def end(self, rev):
394 396 return self.start(rev) + self.length(rev)
395 397 def length(self, rev):
396 398 return self.index[rev][1]
397 399 def chainbase(self, rev):
398 400 base = self._chainbasecache.get(rev)
399 401 if base is not None:
400 402 return base
401 403
402 404 index = self.index
403 405 base = index[rev][3]
404 406 while base != rev:
405 407 rev = base
406 408 base = index[rev][3]
407 409
408 410 self._chainbasecache[rev] = base
409 411 return base
410 412 def chainlen(self, rev):
411 413 return self._chaininfo(rev)[0]
412 414
413 415 def _chaininfo(self, rev):
414 416 chaininfocache = self._chaininfocache
415 417 if rev in chaininfocache:
416 418 return chaininfocache[rev]
417 419 index = self.index
418 420 generaldelta = self._generaldelta
419 421 iterrev = rev
420 422 e = index[iterrev]
421 423 clen = 0
422 424 compresseddeltalen = 0
423 425 while iterrev != e[3]:
424 426 clen += 1
425 427 compresseddeltalen += e[1]
426 428 if generaldelta:
427 429 iterrev = e[3]
428 430 else:
429 431 iterrev -= 1
430 432 if iterrev in chaininfocache:
431 433 t = chaininfocache[iterrev]
432 434 clen += t[0]
433 435 compresseddeltalen += t[1]
434 436 break
435 437 e = index[iterrev]
436 438 else:
437 439 # Add text length of base since decompressing that also takes
438 440 # work. For cache hits the length is already included.
439 441 compresseddeltalen += e[1]
440 442 r = (clen, compresseddeltalen)
441 443 chaininfocache[rev] = r
442 444 return r
443 445
444 446 def _deltachain(self, rev, stoprev=None):
445 447 """Obtain the delta chain for a revision.
446 448
447 449 ``stoprev`` specifies a revision to stop at. If not specified, we
448 450 stop at the base of the chain.
449 451
450 452 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
451 453 revs in ascending order and ``stopped`` is a bool indicating whether
452 454 ``stoprev`` was hit.
453 455 """
454 456 chain = []
455 457
456 458 # Alias to prevent attribute lookup in tight loop.
457 459 index = self.index
458 460 generaldelta = self._generaldelta
459 461
460 462 iterrev = rev
461 463 e = index[iterrev]
462 464 while iterrev != e[3] and iterrev != stoprev:
463 465 chain.append(iterrev)
464 466 if generaldelta:
465 467 iterrev = e[3]
466 468 else:
467 469 iterrev -= 1
468 470 e = index[iterrev]
469 471
470 472 if iterrev == stoprev:
471 473 stopped = True
472 474 else:
473 475 chain.append(iterrev)
474 476 stopped = False
475 477
476 478 chain.reverse()
477 479 return chain, stopped
478 480
479 481 def flags(self, rev):
480 482 return self.index[rev][0] & 0xFFFF
481 483 def rawsize(self, rev):
482 484 """return the length of the uncompressed text for a given revision"""
483 485 l = self.index[rev][2]
484 486 if l >= 0:
485 487 return l
486 488
487 489 t = self.revision(self.node(rev))
488 490 return len(t)
489 491 size = rawsize
490 492
491 493 def ancestors(self, revs, stoprev=0, inclusive=False):
492 494 """Generate the ancestors of 'revs' in reverse topological order.
493 495 Does not generate revs lower than stoprev.
494 496
495 497 See the documentation for ancestor.lazyancestors for more details."""
496 498
497 499 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
498 500 inclusive=inclusive)
499 501
500 502 def descendants(self, revs):
501 503 """Generate the descendants of 'revs' in revision order.
502 504
503 505 Yield a sequence of revision numbers starting with a child of
504 506 some rev in revs, i.e., each revision is *not* considered a
505 507 descendant of itself. Results are ordered by revision number (a
506 508 topological sort)."""
507 509 first = min(revs)
508 510 if first == nullrev:
509 511 for i in self:
510 512 yield i
511 513 return
512 514
513 515 seen = set(revs)
514 516 for i in self.revs(start=first + 1):
515 517 for x in self.parentrevs(i):
516 518 if x != nullrev and x in seen:
517 519 seen.add(i)
518 520 yield i
519 521 break
520 522
521 523 def findcommonmissing(self, common=None, heads=None):
522 524 """Return a tuple of the ancestors of common and the ancestors of heads
523 525 that are not ancestors of common. In revset terminology, we return the
524 526 tuple:
525 527
526 528 ::common, (::heads) - (::common)
527 529
528 530 The list is sorted by revision number, meaning it is
529 531 topologically sorted.
530 532
531 533 'heads' and 'common' are both lists of node IDs. If heads is
532 534 not supplied, uses all of the revlog's heads. If common is not
533 535 supplied, uses nullid."""
534 536 if common is None:
535 537 common = [nullid]
536 538 if heads is None:
537 539 heads = self.heads()
538 540
539 541 common = [self.rev(n) for n in common]
540 542 heads = [self.rev(n) for n in heads]
541 543
542 544 # we want the ancestors, but inclusive
543 545 class lazyset(object):
544 546 def __init__(self, lazyvalues):
545 547 self.addedvalues = set()
546 548 self.lazyvalues = lazyvalues
547 549
548 550 def __contains__(self, value):
549 551 return value in self.addedvalues or value in self.lazyvalues
550 552
551 553 def __iter__(self):
552 554 added = self.addedvalues
553 555 for r in added:
554 556 yield r
555 557 for r in self.lazyvalues:
556 558 if not r in added:
557 559 yield r
558 560
559 561 def add(self, value):
560 562 self.addedvalues.add(value)
561 563
562 564 def update(self, values):
563 565 self.addedvalues.update(values)
564 566
565 567 has = lazyset(self.ancestors(common))
566 568 has.add(nullrev)
567 569 has.update(common)
568 570
569 571 # take all ancestors from heads that aren't in has
570 572 missing = set()
571 573 visit = collections.deque(r for r in heads if r not in has)
572 574 while visit:
573 575 r = visit.popleft()
574 576 if r in missing:
575 577 continue
576 578 else:
577 579 missing.add(r)
578 580 for p in self.parentrevs(r):
579 581 if p not in has:
580 582 visit.append(p)
581 583 missing = list(missing)
582 584 missing.sort()
583 585 return has, [self.node(r) for r in missing]
584 586
585 587 def incrementalmissingrevs(self, common=None):
586 588 """Return an object that can be used to incrementally compute the
587 589 revision numbers of the ancestors of arbitrary sets that are not
588 590 ancestors of common. This is an ancestor.incrementalmissingancestors
589 591 object.
590 592
591 593 'common' is a list of revision numbers. If common is not supplied, uses
592 594 nullrev.
593 595 """
594 596 if common is None:
595 597 common = [nullrev]
596 598
597 599 return ancestor.incrementalmissingancestors(self.parentrevs, common)
598 600
599 601 def findmissingrevs(self, common=None, heads=None):
600 602 """Return the revision numbers of the ancestors of heads that
601 603 are not ancestors of common.
602 604
603 605 More specifically, return a list of revision numbers corresponding to
604 606 nodes N such that every N satisfies the following constraints:
605 607
606 608 1. N is an ancestor of some node in 'heads'
607 609 2. N is not an ancestor of any node in 'common'
608 610
609 611 The list is sorted by revision number, meaning it is
610 612 topologically sorted.
611 613
612 614 'heads' and 'common' are both lists of revision numbers. If heads is
613 615 not supplied, uses all of the revlog's heads. If common is not
614 616 supplied, uses nullid."""
615 617 if common is None:
616 618 common = [nullrev]
617 619 if heads is None:
618 620 heads = self.headrevs()
619 621
620 622 inc = self.incrementalmissingrevs(common=common)
621 623 return inc.missingancestors(heads)
622 624
623 625 def findmissing(self, common=None, heads=None):
624 626 """Return the ancestors of heads that are not ancestors of common.
625 627
626 628 More specifically, return a list of nodes N such that every N
627 629 satisfies the following constraints:
628 630
629 631 1. N is an ancestor of some node in 'heads'
630 632 2. N is not an ancestor of any node in 'common'
631 633
632 634 The list is sorted by revision number, meaning it is
633 635 topologically sorted.
634 636
635 637 'heads' and 'common' are both lists of node IDs. If heads is
636 638 not supplied, uses all of the revlog's heads. If common is not
637 639 supplied, uses nullid."""
638 640 if common is None:
639 641 common = [nullid]
640 642 if heads is None:
641 643 heads = self.heads()
642 644
643 645 common = [self.rev(n) for n in common]
644 646 heads = [self.rev(n) for n in heads]
645 647
646 648 inc = self.incrementalmissingrevs(common=common)
647 649 return [self.node(r) for r in inc.missingancestors(heads)]
648 650
649 651 def nodesbetween(self, roots=None, heads=None):
650 652 """Return a topological path from 'roots' to 'heads'.
651 653
652 654 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
653 655 topologically sorted list of all nodes N that satisfy both of
654 656 these constraints:
655 657
656 658 1. N is a descendant of some node in 'roots'
657 659 2. N is an ancestor of some node in 'heads'
658 660
659 661 Every node is considered to be both a descendant and an ancestor
660 662 of itself, so every reachable node in 'roots' and 'heads' will be
661 663 included in 'nodes'.
662 664
663 665 'outroots' is the list of reachable nodes in 'roots', i.e., the
664 666 subset of 'roots' that is returned in 'nodes'. Likewise,
665 667 'outheads' is the subset of 'heads' that is also in 'nodes'.
666 668
667 669 'roots' and 'heads' are both lists of node IDs. If 'roots' is
668 670 unspecified, uses nullid as the only root. If 'heads' is
669 671 unspecified, uses list of all of the revlog's heads."""
670 672 nonodes = ([], [], [])
671 673 if roots is not None:
672 674 roots = list(roots)
673 675 if not roots:
674 676 return nonodes
675 677 lowestrev = min([self.rev(n) for n in roots])
676 678 else:
677 679 roots = [nullid] # Everybody's a descendant of nullid
678 680 lowestrev = nullrev
679 681 if (lowestrev == nullrev) and (heads is None):
680 682 # We want _all_ the nodes!
681 683 return ([self.node(r) for r in self], [nullid], list(self.heads()))
682 684 if heads is None:
683 685 # All nodes are ancestors, so the latest ancestor is the last
684 686 # node.
685 687 highestrev = len(self) - 1
686 688 # Set ancestors to None to signal that every node is an ancestor.
687 689 ancestors = None
688 690 # Set heads to an empty dictionary for later discovery of heads
689 691 heads = {}
690 692 else:
691 693 heads = list(heads)
692 694 if not heads:
693 695 return nonodes
694 696 ancestors = set()
695 697 # Turn heads into a dictionary so we can remove 'fake' heads.
696 698 # Also, later we will be using it to filter out the heads we can't
697 699 # find from roots.
698 700 heads = dict.fromkeys(heads, False)
699 701 # Start at the top and keep marking parents until we're done.
700 702 nodestotag = set(heads)
701 703 # Remember where the top was so we can use it as a limit later.
702 704 highestrev = max([self.rev(n) for n in nodestotag])
703 705 while nodestotag:
704 706 # grab a node to tag
705 707 n = nodestotag.pop()
706 708 # Never tag nullid
707 709 if n == nullid:
708 710 continue
709 711 # A node's revision number represents its place in a
710 712 # topologically sorted list of nodes.
711 713 r = self.rev(n)
712 714 if r >= lowestrev:
713 715 if n not in ancestors:
714 716 # If we are possibly a descendant of one of the roots
715 717 # and we haven't already been marked as an ancestor
716 718 ancestors.add(n) # Mark as ancestor
717 719 # Add non-nullid parents to list of nodes to tag.
718 720 nodestotag.update([p for p in self.parents(n) if
719 721 p != nullid])
720 722 elif n in heads: # We've seen it before, is it a fake head?
721 723 # So it is, real heads should not be the ancestors of
722 724 # any other heads.
723 725 heads.pop(n)
724 726 if not ancestors:
725 727 return nonodes
726 728 # Now that we have our set of ancestors, we want to remove any
727 729 # roots that are not ancestors.
728 730
729 731 # If one of the roots was nullid, everything is included anyway.
730 732 if lowestrev > nullrev:
731 733 # But, since we weren't, let's recompute the lowest rev to not
732 734 # include roots that aren't ancestors.
733 735
734 736 # Filter out roots that aren't ancestors of heads
735 737 roots = [n for n in roots if n in ancestors]
736 738 # Recompute the lowest revision
737 739 if roots:
738 740 lowestrev = min([self.rev(n) for n in roots])
739 741 else:
740 742 # No more roots? Return empty list
741 743 return nonodes
742 744 else:
743 745 # We are descending from nullid, and don't need to care about
744 746 # any other roots.
745 747 lowestrev = nullrev
746 748 roots = [nullid]
747 749 # Transform our roots list into a set.
748 750 descendants = set(roots)
749 751 # Also, keep the original roots so we can filter out roots that aren't
750 752 # 'real' roots (i.e. are descended from other roots).
751 753 roots = descendants.copy()
752 754 # Our topologically sorted list of output nodes.
753 755 orderedout = []
754 756 # Don't start at nullid since we don't want nullid in our output list,
755 757 # and if nullid shows up in descendants, empty parents will look like
756 758 # they're descendants.
757 759 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
758 760 n = self.node(r)
759 761 isdescendant = False
760 762 if lowestrev == nullrev: # Everybody is a descendant of nullid
761 763 isdescendant = True
762 764 elif n in descendants:
763 765 # n is already a descendant
764 766 isdescendant = True
765 767 # This check only needs to be done here because all the roots
766 768 # will start being marked is descendants before the loop.
767 769 if n in roots:
768 770 # If n was a root, check if it's a 'real' root.
769 771 p = tuple(self.parents(n))
770 772 # If any of its parents are descendants, it's not a root.
771 773 if (p[0] in descendants) or (p[1] in descendants):
772 774 roots.remove(n)
773 775 else:
774 776 p = tuple(self.parents(n))
775 777 # A node is a descendant if either of its parents are
776 778 # descendants. (We seeded the dependents list with the roots
777 779 # up there, remember?)
778 780 if (p[0] in descendants) or (p[1] in descendants):
779 781 descendants.add(n)
780 782 isdescendant = True
781 783 if isdescendant and ((ancestors is None) or (n in ancestors)):
782 784 # Only include nodes that are both descendants and ancestors.
783 785 orderedout.append(n)
784 786 if (ancestors is not None) and (n in heads):
785 787 # We're trying to figure out which heads are reachable
786 788 # from roots.
787 789 # Mark this head as having been reached
788 790 heads[n] = True
789 791 elif ancestors is None:
790 792 # Otherwise, we're trying to discover the heads.
791 793 # Assume this is a head because if it isn't, the next step
792 794 # will eventually remove it.
793 795 heads[n] = True
794 796 # But, obviously its parents aren't.
795 797 for p in self.parents(n):
796 798 heads.pop(p, None)
797 799 heads = [n for n, flag in heads.iteritems() if flag]
798 800 roots = list(roots)
799 801 assert orderedout
800 802 assert roots
801 803 assert heads
802 804 return (orderedout, roots, heads)
803 805
804 806 def headrevs(self):
805 807 try:
806 808 return self.index.headrevs()
807 809 except AttributeError:
808 810 return self._headrevs()
809 811
810 812 def computephases(self, roots):
811 813 return self.index.computephasesmapsets(roots)
812 814
813 815 def _headrevs(self):
814 816 count = len(self)
815 817 if not count:
816 818 return [nullrev]
817 819 # we won't iter over filtered rev so nobody is a head at start
818 820 ishead = [0] * (count + 1)
819 821 index = self.index
820 822 for r in self:
821 823 ishead[r] = 1 # I may be an head
822 824 e = index[r]
823 825 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
824 826 return [r for r, val in enumerate(ishead) if val]
825 827
826 828 def heads(self, start=None, stop=None):
827 829 """return the list of all nodes that have no children
828 830
829 831 if start is specified, only heads that are descendants of
830 832 start will be returned
831 833 if stop is specified, it will consider all the revs from stop
832 834 as if they had no children
833 835 """
834 836 if start is None and stop is None:
835 837 if not len(self):
836 838 return [nullid]
837 839 return [self.node(r) for r in self.headrevs()]
838 840
839 841 if start is None:
840 842 start = nullid
841 843 if stop is None:
842 844 stop = []
843 845 stoprevs = set([self.rev(n) for n in stop])
844 846 startrev = self.rev(start)
845 847 reachable = set((startrev,))
846 848 heads = set((startrev,))
847 849
848 850 parentrevs = self.parentrevs
849 851 for r in self.revs(start=startrev + 1):
850 852 for p in parentrevs(r):
851 853 if p in reachable:
852 854 if r not in stoprevs:
853 855 reachable.add(r)
854 856 heads.add(r)
855 857 if p in heads and p not in stoprevs:
856 858 heads.remove(p)
857 859
858 860 return [self.node(r) for r in heads]
859 861
860 862 def children(self, node):
861 863 """find the children of a given node"""
862 864 c = []
863 865 p = self.rev(node)
864 866 for r in self.revs(start=p + 1):
865 867 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
866 868 if prevs:
867 869 for pr in prevs:
868 870 if pr == p:
869 871 c.append(self.node(r))
870 872 elif p == nullrev:
871 873 c.append(self.node(r))
872 874 return c
873 875
874 876 def descendant(self, start, end):
875 877 if start == nullrev:
876 878 return True
877 879 for i in self.descendants([start]):
878 880 if i == end:
879 881 return True
880 882 elif i > end:
881 883 break
882 884 return False
883 885
884 886 def commonancestorsheads(self, a, b):
885 887 """calculate all the heads of the common ancestors of nodes a and b"""
886 888 a, b = self.rev(a), self.rev(b)
887 889 try:
888 890 ancs = self.index.commonancestorsheads(a, b)
889 891 except (AttributeError, OverflowError): # C implementation failed
890 892 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
891 893 return map(self.node, ancs)
892 894
893 895 def isancestor(self, a, b):
894 896 """return True if node a is an ancestor of node b
895 897
896 898 The implementation of this is trivial but the use of
897 899 commonancestorsheads is not."""
898 900 return a in self.commonancestorsheads(a, b)
899 901
900 902 def ancestor(self, a, b):
901 903 """calculate the "best" common ancestor of nodes a and b"""
902 904
903 905 a, b = self.rev(a), self.rev(b)
904 906 try:
905 907 ancs = self.index.ancestors(a, b)
906 908 except (AttributeError, OverflowError):
907 909 ancs = ancestor.ancestors(self.parentrevs, a, b)
908 910 if ancs:
909 911 # choose a consistent winner when there's a tie
910 912 return min(map(self.node, ancs))
911 913 return nullid
912 914
913 915 def _match(self, id):
914 916 if isinstance(id, int):
915 917 # rev
916 918 return self.node(id)
917 919 if len(id) == 20:
918 920 # possibly a binary node
919 921 # odds of a binary node being all hex in ASCII are 1 in 10**25
920 922 try:
921 923 node = id
922 924 self.rev(node) # quick search the index
923 925 return node
924 926 except LookupError:
925 927 pass # may be partial hex id
926 928 try:
927 929 # str(rev)
928 930 rev = int(id)
929 931 if str(rev) != id:
930 932 raise ValueError
931 933 if rev < 0:
932 934 rev = len(self) + rev
933 935 if rev < 0 or rev >= len(self):
934 936 raise ValueError
935 937 return self.node(rev)
936 938 except (ValueError, OverflowError):
937 939 pass
938 940 if len(id) == 40:
939 941 try:
940 942 # a full hex nodeid?
941 943 node = bin(id)
942 944 self.rev(node)
943 945 return node
944 946 except (TypeError, LookupError):
945 947 pass
946 948
947 949 def _partialmatch(self, id):
948 950 try:
949 951 n = self.index.partialmatch(id)
950 952 if n and self.hasnode(n):
951 953 return n
952 954 return None
953 955 except RevlogError:
954 956 # parsers.c radix tree lookup gave multiple matches
955 957 # fast path: for unfiltered changelog, radix tree is accurate
956 958 if not getattr(self, 'filteredrevs', None):
957 959 raise LookupError(id, self.indexfile,
958 960 _('ambiguous identifier'))
959 961 # fall through to slow path that filters hidden revisions
960 962 except (AttributeError, ValueError):
961 963 # we are pure python, or key was too short to search radix tree
962 964 pass
963 965
964 966 if id in self._pcache:
965 967 return self._pcache[id]
966 968
967 969 if len(id) < 40:
968 970 try:
969 971 # hex(node)[:...]
970 972 l = len(id) // 2 # grab an even number of digits
971 973 prefix = bin(id[:l * 2])
972 974 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
973 975 nl = [n for n in nl if hex(n).startswith(id) and
974 976 self.hasnode(n)]
975 977 if len(nl) > 0:
976 978 if len(nl) == 1:
977 979 self._pcache[id] = nl[0]
978 980 return nl[0]
979 981 raise LookupError(id, self.indexfile,
980 982 _('ambiguous identifier'))
981 983 return None
982 984 except TypeError:
983 985 pass
984 986
985 987 def lookup(self, id):
986 988 """locate a node based on:
987 989 - revision number or str(revision number)
988 990 - nodeid or subset of hex nodeid
989 991 """
990 992 n = self._match(id)
991 993 if n is not None:
992 994 return n
993 995 n = self._partialmatch(id)
994 996 if n:
995 997 return n
996 998
997 999 raise LookupError(id, self.indexfile, _('no match found'))
998 1000
999 1001 def cmp(self, node, text):
1000 1002 """compare text with a given file revision
1001 1003
1002 1004 returns True if text is different than what is stored.
1003 1005 """
1004 1006 p1, p2 = self.parents(node)
1005 1007 return hash(text, p1, p2) != node
1006 1008
1007 1009 def _addchunk(self, offset, data):
1008 1010 """Add a segment to the revlog cache.
1009 1011
1010 1012 Accepts an absolute offset and the data that is at that location.
1011 1013 """
1012 1014 o, d = self._chunkcache
1013 1015 # try to add to existing cache
1014 1016 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1015 1017 self._chunkcache = o, d + data
1016 1018 else:
1017 1019 self._chunkcache = offset, data
1018 1020
1019 1021 def _loadchunk(self, offset, length, df=None):
1020 1022 """Load a segment of raw data from the revlog.
1021 1023
1022 1024 Accepts an absolute offset, length to read, and an optional existing
1023 1025 file handle to read from.
1024 1026
1025 1027 If an existing file handle is passed, it will be seeked and the
1026 1028 original seek position will NOT be restored.
1027 1029
1028 1030 Returns a str or buffer of raw byte data.
1029 1031 """
1030 1032 if df is not None:
1031 1033 closehandle = False
1032 1034 else:
1033 1035 if self._inline:
1034 1036 df = self.opener(self.indexfile)
1035 1037 else:
1036 1038 df = self.opener(self.datafile)
1037 1039 closehandle = True
1038 1040
1039 1041 # Cache data both forward and backward around the requested
1040 1042 # data, in a fixed size window. This helps speed up operations
1041 1043 # involving reading the revlog backwards.
1042 1044 cachesize = self._chunkcachesize
1043 1045 realoffset = offset & ~(cachesize - 1)
1044 1046 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1045 1047 - realoffset)
1046 1048 df.seek(realoffset)
1047 1049 d = df.read(reallength)
1048 1050 if closehandle:
1049 1051 df.close()
1050 1052 self._addchunk(realoffset, d)
1051 1053 if offset != realoffset or reallength != length:
1052 1054 return util.buffer(d, offset - realoffset, length)
1053 1055 return d
1054 1056
1055 1057 def _getchunk(self, offset, length, df=None):
1056 1058 """Obtain a segment of raw data from the revlog.
1057 1059
1058 1060 Accepts an absolute offset, length of bytes to obtain, and an
1059 1061 optional file handle to the already-opened revlog. If the file
1060 1062 handle is used, it's original seek position will not be preserved.
1061 1063
1062 1064 Requests for data may be returned from a cache.
1063 1065
1064 1066 Returns a str or a buffer instance of raw byte data.
1065 1067 """
1066 1068 o, d = self._chunkcache
1067 1069 l = len(d)
1068 1070
1069 1071 # is it in the cache?
1070 1072 cachestart = offset - o
1071 1073 cacheend = cachestart + length
1072 1074 if cachestart >= 0 and cacheend <= l:
1073 1075 if cachestart == 0 and cacheend == l:
1074 1076 return d # avoid a copy
1075 1077 return util.buffer(d, cachestart, cacheend - cachestart)
1076 1078
1077 1079 return self._loadchunk(offset, length, df=df)
1078 1080
1079 1081 def _chunkraw(self, startrev, endrev, df=None):
1080 1082 """Obtain a segment of raw data corresponding to a range of revisions.
1081 1083
1082 1084 Accepts the start and end revisions and an optional already-open
1083 1085 file handle to be used for reading. If the file handle is read, its
1084 1086 seek position will not be preserved.
1085 1087
1086 1088 Requests for data may be satisfied by a cache.
1087 1089
1088 1090 Returns a 2-tuple of (offset, data) for the requested range of
1089 1091 revisions. Offset is the integer offset from the beginning of the
1090 1092 revlog and data is a str or buffer of the raw byte data.
1091 1093
1092 1094 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1093 1095 to determine where each revision's data begins and ends.
1094 1096 """
1095 1097 start = self.start(startrev)
1096 1098 end = self.end(endrev)
1097 1099 if self._inline:
1098 1100 start += (startrev + 1) * self._io.size
1099 1101 end += (endrev + 1) * self._io.size
1100 1102 length = end - start
1101 1103
1102 1104 return start, self._getchunk(start, length, df=df)
1103 1105
1104 1106 def _chunk(self, rev, df=None):
1105 1107 """Obtain a single decompressed chunk for a revision.
1106 1108
1107 1109 Accepts an integer revision and an optional already-open file handle
1108 1110 to be used for reading. If used, the seek position of the file will not
1109 1111 be preserved.
1110 1112
1111 1113 Returns a str holding uncompressed data for the requested revision.
1112 1114 """
1113 1115 return decompress(self._chunkraw(rev, rev, df=df)[1])
1114 1116
1115 1117 def _chunks(self, revs, df=None):
1116 1118 """Obtain decompressed chunks for the specified revisions.
1117 1119
1118 1120 Accepts an iterable of numeric revisions that are assumed to be in
1119 1121 ascending order. Also accepts an optional already-open file handle
1120 1122 to be used for reading. If used, the seek position of the file will
1121 1123 not be preserved.
1122 1124
1123 1125 This function is similar to calling ``self._chunk()`` multiple times,
1124 1126 but is faster.
1125 1127
1126 1128 Returns a list with decompressed data for each requested revision.
1127 1129 """
1128 1130 if not revs:
1129 1131 return []
1130 1132 start = self.start
1131 1133 length = self.length
1132 1134 inline = self._inline
1133 1135 iosize = self._io.size
1134 1136 buffer = util.buffer
1135 1137
1136 1138 l = []
1137 1139 ladd = l.append
1138 1140
1139 1141 try:
1140 1142 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1141 1143 except OverflowError:
1142 1144 # issue4215 - we can't cache a run of chunks greater than
1143 1145 # 2G on Windows
1144 1146 return [self._chunk(rev, df=df) for rev in revs]
1145 1147
1146 1148 for rev in revs:
1147 1149 chunkstart = start(rev)
1148 1150 if inline:
1149 1151 chunkstart += (rev + 1) * iosize
1150 1152 chunklength = length(rev)
1151 1153 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1152 1154
1153 1155 return l
1154 1156
1155 1157 def _chunkclear(self):
1156 1158 """Clear the raw chunk cache."""
1157 1159 self._chunkcache = (0, '')
1158 1160
1159 1161 def deltaparent(self, rev):
1160 1162 """return deltaparent of the given revision"""
1161 1163 base = self.index[rev][3]
1162 1164 if base == rev:
1163 1165 return nullrev
1164 1166 elif self._generaldelta:
1165 1167 return base
1166 1168 else:
1167 1169 return rev - 1
1168 1170
1169 1171 def revdiff(self, rev1, rev2):
1170 1172 """return or calculate a delta between two revisions"""
1171 1173 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1172 1174 return str(self._chunk(rev2))
1173 1175
1174 1176 return mdiff.textdiff(self.revision(rev1),
1175 1177 self.revision(rev2))
1176 1178
1177 1179 def revision(self, nodeorrev, _df=None):
1178 1180 """return an uncompressed revision of a given node or revision
1179 1181 number.
1180 1182
1181 1183 _df is an existing file handle to read from. It is meant to only be
1182 1184 used internally.
1183 1185 """
1184 1186 if isinstance(nodeorrev, int):
1185 1187 rev = nodeorrev
1186 1188 node = self.node(rev)
1187 1189 else:
1188 1190 node = nodeorrev
1189 1191 rev = None
1190 1192
1191 1193 cachedrev = None
1192 1194 if node == nullid:
1193 1195 return ""
1194 1196 if self._cache:
1195 1197 if self._cache[0] == node:
1196 1198 return self._cache[2]
1197 1199 cachedrev = self._cache[1]
1198 1200
1199 1201 # look up what we need to read
1200 1202 text = None
1201 1203 if rev is None:
1202 1204 rev = self.rev(node)
1203 1205
1204 1206 # check rev flags
1205 1207 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1206 1208 raise RevlogError(_('incompatible revision flag %x') %
1207 1209 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1208 1210
1209 1211 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1210 1212 if stopped:
1211 1213 text = self._cache[2]
1212 1214
1213 1215 # drop cache to save memory
1214 1216 self._cache = None
1215 1217
1216 1218 bins = self._chunks(chain, df=_df)
1217 1219 if text is None:
1218 1220 text = str(bins[0])
1219 1221 bins = bins[1:]
1220 1222
1221 1223 text = mdiff.patches(text, bins)
1222 1224
1223 1225 text = self._checkhash(text, node, rev)
1224 1226
1225 1227 self._cache = (node, rev, text)
1226 1228 return text
1227 1229
1228 1230 def hash(self, text, p1, p2):
1229 1231 """Compute a node hash.
1230 1232
1231 1233 Available as a function so that subclasses can replace the hash
1232 1234 as needed.
1233 1235 """
1234 1236 return hash(text, p1, p2)
1235 1237
1236 1238 def _checkhash(self, text, node, rev):
1237 1239 p1, p2 = self.parents(node)
1238 1240 self.checkhash(text, p1, p2, node, rev)
1239 1241 return text
1240 1242
1241 1243 def checkhash(self, text, p1, p2, node, rev=None):
1242 1244 if node != self.hash(text, p1, p2):
1243 1245 revornode = rev
1244 1246 if revornode is None:
1245 1247 revornode = templatefilters.short(hex(node))
1246 1248 raise RevlogError(_("integrity check failed on %s:%s")
1247 1249 % (self.indexfile, revornode))
1248 1250
1249 1251 def checkinlinesize(self, tr, fp=None):
1250 1252 """Check if the revlog is too big for inline and convert if so.
1251 1253
1252 1254 This should be called after revisions are added to the revlog. If the
1253 1255 revlog has grown too large to be an inline revlog, it will convert it
1254 1256 to use multiple index and data files.
1255 1257 """
1256 1258 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1257 1259 return
1258 1260
1259 1261 trinfo = tr.find(self.indexfile)
1260 1262 if trinfo is None:
1261 1263 raise RevlogError(_("%s not found in the transaction")
1262 1264 % self.indexfile)
1263 1265
1264 1266 trindex = trinfo[2]
1265 1267 if trindex is not None:
1266 1268 dataoff = self.start(trindex)
1267 1269 else:
1268 1270 # revlog was stripped at start of transaction, use all leftover data
1269 1271 trindex = len(self) - 1
1270 1272 dataoff = self.end(-2)
1271 1273
1272 1274 tr.add(self.datafile, dataoff)
1273 1275
1274 1276 if fp:
1275 1277 fp.flush()
1276 1278 fp.close()
1277 1279
1278 1280 df = self.opener(self.datafile, 'w')
1279 1281 try:
1280 1282 for r in self:
1281 1283 df.write(self._chunkraw(r, r)[1])
1282 1284 finally:
1283 1285 df.close()
1284 1286
1285 1287 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1286 1288 checkambig=self._checkambig)
1287 1289 self.version &= ~(REVLOGNGINLINEDATA)
1288 1290 self._inline = False
1289 1291 for i in self:
1290 1292 e = self._io.packentry(self.index[i], self.node, self.version, i)
1291 1293 fp.write(e)
1292 1294
1293 1295 # if we don't call close, the temp file will never replace the
1294 1296 # real index
1295 1297 fp.close()
1296 1298
1297 1299 tr.replace(self.indexfile, trindex * self._io.size)
1298 1300 self._chunkclear()
1299 1301
1300 1302 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1301 1303 node=None):
1302 1304 """add a revision to the log
1303 1305
1304 1306 text - the revision data to add
1305 1307 transaction - the transaction object used for rollback
1306 1308 link - the linkrev data to add
1307 1309 p1, p2 - the parent nodeids of the revision
1308 1310 cachedelta - an optional precomputed delta
1309 1311 node - nodeid of revision; typically node is not specified, and it is
1310 1312 computed by default as hash(text, p1, p2), however subclasses might
1311 1313 use different hashing method (and override checkhash() in such case)
1312 1314 """
1313 1315 if link == nullrev:
1314 1316 raise RevlogError(_("attempted to add linkrev -1 to %s")
1315 1317 % self.indexfile)
1316 1318
1317 1319 if len(text) > _maxentrysize:
1318 1320 raise RevlogError(
1319 1321 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1320 1322 % (self.indexfile, len(text)))
1321 1323
1322 1324 node = node or self.hash(text, p1, p2)
1323 1325 if node in self.nodemap:
1324 1326 return node
1325 1327
1326 1328 dfh = None
1327 1329 if not self._inline:
1328 1330 dfh = self.opener(self.datafile, "a+")
1329 1331 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1330 1332 try:
1331 1333 return self._addrevision(node, text, transaction, link, p1, p2,
1332 1334 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1333 1335 finally:
1334 1336 if dfh:
1335 1337 dfh.close()
1336 1338 ifh.close()
1337 1339
1338 1340 def compress(self, text):
1339 1341 """ generate a possibly-compressed representation of text """
1340 1342 if not text:
1341 1343 return ("", text)
1342 1344 l = len(text)
1343 1345 bin = None
1344 1346 if l < 44:
1345 1347 pass
1346 1348 elif l > 1000000:
1347 1349 # zlib makes an internal copy, thus doubling memory usage for
1348 1350 # large files, so lets do this in pieces
1349 1351 z = zlib.compressobj()
1350 1352 p = []
1351 1353 pos = 0
1352 1354 while pos < l:
1353 1355 pos2 = pos + 2**20
1354 1356 p.append(z.compress(text[pos:pos2]))
1355 1357 pos = pos2
1356 1358 p.append(z.flush())
1357 1359 if sum(map(len, p)) < l:
1358 1360 bin = "".join(p)
1359 1361 else:
1360 1362 bin = _compress(text)
1361 1363 if bin is None or len(bin) > l:
1362 1364 if text[0] == '\0':
1363 1365 return ("", text)
1364 1366 return ('u', text)
1365 1367 return ("", bin)
1366 1368
1367 1369 def _isgooddelta(self, d, textlen):
1368 1370 """Returns True if the given delta is good. Good means that it is within
1369 1371 the disk span, disk size, and chain length bounds that we know to be
1370 1372 performant."""
1371 1373 if d is None:
1372 1374 return False
1373 1375
1374 1376 # - 'dist' is the distance from the base revision -- bounding it limits
1375 1377 # the amount of I/O we need to do.
1376 1378 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1377 1379 # to apply -- bounding it limits the amount of CPU we consume.
1378 1380 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1379 1381 if (dist > textlen * 4 or l > textlen or
1380 1382 compresseddeltalen > textlen * 2 or
1381 1383 (self._maxchainlen and chainlen > self._maxchainlen)):
1382 1384 return False
1383 1385
1384 1386 return True
1385 1387
1386 1388 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1387 1389 cachedelta, ifh, dfh, alwayscache=False):
1388 1390 """internal function to add revisions to the log
1389 1391
1390 1392 see addrevision for argument descriptions.
1391 1393 invariants:
1392 1394 - text is optional (can be None); if not set, cachedelta must be set.
1393 1395 if both are set, they must correspond to each other.
1394 1396 """
1395 1397 btext = [text]
1396 1398 def buildtext():
1397 1399 if btext[0] is not None:
1398 1400 return btext[0]
1399 1401 baserev = cachedelta[0]
1400 1402 delta = cachedelta[1]
1401 1403 # special case deltas which replace entire base; no need to decode
1402 1404 # base revision. this neatly avoids censored bases, which throw when
1403 1405 # they're decoded.
1404 1406 hlen = struct.calcsize(">lll")
1405 1407 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1406 1408 len(delta) - hlen):
1407 1409 btext[0] = delta[hlen:]
1408 1410 else:
1409 1411 if self._inline:
1410 1412 fh = ifh
1411 1413 else:
1412 1414 fh = dfh
1413 1415 basetext = self.revision(self.node(baserev), _df=fh)
1414 1416 btext[0] = mdiff.patch(basetext, delta)
1415 1417 try:
1416 1418 self.checkhash(btext[0], p1, p2, node)
1417 1419 if flags & REVIDX_ISCENSORED:
1418 1420 raise RevlogError(_('node %s is not censored') % node)
1419 1421 except CensoredNodeError:
1420 1422 # must pass the censored index flag to add censored revisions
1421 1423 if not flags & REVIDX_ISCENSORED:
1422 1424 raise
1423 1425 return btext[0]
1424 1426
1425 1427 def builddelta(rev):
1426 1428 # can we use the cached delta?
1427 1429 if cachedelta and cachedelta[0] == rev:
1428 1430 delta = cachedelta[1]
1429 1431 else:
1430 1432 t = buildtext()
1431 1433 if self.iscensored(rev):
1432 1434 # deltas based on a censored revision must replace the
1433 1435 # full content in one patch, so delta works everywhere
1434 1436 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1435 1437 delta = header + t
1436 1438 else:
1437 1439 if self._inline:
1438 1440 fh = ifh
1439 1441 else:
1440 1442 fh = dfh
1441 1443 ptext = self.revision(self.node(rev), _df=fh)
1442 1444 delta = mdiff.textdiff(ptext, t)
1443 1445 header, data = self.compress(delta)
1444 1446 deltalen = len(header) + len(data)
1445 1447 chainbase = self.chainbase(rev)
1446 1448 dist = deltalen + offset - self.start(chainbase)
1447 1449 if self._generaldelta:
1448 1450 base = rev
1449 1451 else:
1450 1452 base = chainbase
1451 1453 chainlen, compresseddeltalen = self._chaininfo(rev)
1452 1454 chainlen += 1
1453 1455 compresseddeltalen += deltalen
1454 1456 return (dist, deltalen, (header, data), base,
1455 1457 chainbase, chainlen, compresseddeltalen)
1456 1458
1457 1459 curr = len(self)
1458 1460 prev = curr - 1
1459 1461 offset = self.end(prev)
1460 1462 delta = None
1461 1463 p1r, p2r = self.rev(p1), self.rev(p2)
1462 1464
1463 1465 # full versions are inserted when the needed deltas
1464 1466 # become comparable to the uncompressed text
1465 1467 if text is None:
1466 1468 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1467 1469 cachedelta[1])
1468 1470 else:
1469 1471 textlen = len(text)
1470 1472
1471 1473 # should we try to build a delta?
1472 if prev != nullrev:
1474 if prev != nullrev and self._storedeltachains:
1473 1475 tested = set()
1474 1476 # This condition is true most of the time when processing
1475 1477 # changegroup data into a generaldelta repo. The only time it
1476 1478 # isn't true is if this is the first revision in a delta chain
1477 1479 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1478 1480 if cachedelta and self._generaldelta and self._lazydeltabase:
1479 1481 # Assume what we received from the server is a good choice
1480 1482 # build delta will reuse the cache
1481 1483 candidatedelta = builddelta(cachedelta[0])
1482 1484 tested.add(cachedelta[0])
1483 1485 if self._isgooddelta(candidatedelta, textlen):
1484 1486 delta = candidatedelta
1485 1487 if delta is None and self._generaldelta:
1486 1488 # exclude already lazy tested base if any
1487 1489 parents = [p for p in (p1r, p2r)
1488 1490 if p != nullrev and p not in tested]
1489 1491 if parents and not self._aggressivemergedeltas:
1490 1492 # Pick whichever parent is closer to us (to minimize the
1491 1493 # chance of having to build a fulltext).
1492 1494 parents = [max(parents)]
1493 1495 tested.update(parents)
1494 1496 pdeltas = []
1495 1497 for p in parents:
1496 1498 pd = builddelta(p)
1497 1499 if self._isgooddelta(pd, textlen):
1498 1500 pdeltas.append(pd)
1499 1501 if pdeltas:
1500 1502 delta = min(pdeltas, key=lambda x: x[1])
1501 1503 if delta is None and prev not in tested:
1502 1504 # other approach failed try against prev to hopefully save us a
1503 1505 # fulltext.
1504 1506 candidatedelta = builddelta(prev)
1505 1507 if self._isgooddelta(candidatedelta, textlen):
1506 1508 delta = candidatedelta
1507 1509 if delta is not None:
1508 1510 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1509 1511 else:
1510 1512 text = buildtext()
1511 1513 data = self.compress(text)
1512 1514 l = len(data[1]) + len(data[0])
1513 1515 base = chainbase = curr
1514 1516
1515 1517 e = (offset_type(offset, flags), l, textlen,
1516 1518 base, link, p1r, p2r, node)
1517 1519 self.index.insert(-1, e)
1518 1520 self.nodemap[node] = curr
1519 1521
1520 1522 entry = self._io.packentry(e, self.node, self.version, curr)
1521 1523 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1522 1524
1523 1525 if alwayscache and text is None:
1524 1526 text = buildtext()
1525 1527
1526 1528 if type(text) == str: # only accept immutable objects
1527 1529 self._cache = (node, curr, text)
1528 1530 self._chainbasecache[curr] = chainbase
1529 1531 return node
1530 1532
1531 1533 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1532 1534 # Files opened in a+ mode have inconsistent behavior on various
1533 1535 # platforms. Windows requires that a file positioning call be made
1534 1536 # when the file handle transitions between reads and writes. See
1535 1537 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1536 1538 # platforms, Python or the platform itself can be buggy. Some versions
1537 1539 # of Solaris have been observed to not append at the end of the file
1538 1540 # if the file was seeked to before the end. See issue4943 for more.
1539 1541 #
1540 1542 # We work around this issue by inserting a seek() before writing.
1541 1543 # Note: This is likely not necessary on Python 3.
1542 1544 ifh.seek(0, os.SEEK_END)
1543 1545 if dfh:
1544 1546 dfh.seek(0, os.SEEK_END)
1545 1547
1546 1548 curr = len(self) - 1
1547 1549 if not self._inline:
1548 1550 transaction.add(self.datafile, offset)
1549 1551 transaction.add(self.indexfile, curr * len(entry))
1550 1552 if data[0]:
1551 1553 dfh.write(data[0])
1552 1554 dfh.write(data[1])
1553 1555 ifh.write(entry)
1554 1556 else:
1555 1557 offset += curr * self._io.size
1556 1558 transaction.add(self.indexfile, offset, curr)
1557 1559 ifh.write(entry)
1558 1560 ifh.write(data[0])
1559 1561 ifh.write(data[1])
1560 1562 self.checkinlinesize(transaction, ifh)
1561 1563
1562 1564 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1563 1565 """
1564 1566 add a delta group
1565 1567
1566 1568 given a set of deltas, add them to the revision log. the
1567 1569 first delta is against its parent, which should be in our
1568 1570 log, the rest are against the previous delta.
1569 1571
1570 1572 If ``addrevisioncb`` is defined, it will be called with arguments of
1571 1573 this revlog and the node that was added.
1572 1574 """
1573 1575
1574 1576 # track the base of the current delta log
1575 1577 content = []
1576 1578 node = None
1577 1579
1578 1580 r = len(self)
1579 1581 end = 0
1580 1582 if r:
1581 1583 end = self.end(r - 1)
1582 1584 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1583 1585 isize = r * self._io.size
1584 1586 if self._inline:
1585 1587 transaction.add(self.indexfile, end + isize, r)
1586 1588 dfh = None
1587 1589 else:
1588 1590 transaction.add(self.indexfile, isize, r)
1589 1591 transaction.add(self.datafile, end)
1590 1592 dfh = self.opener(self.datafile, "a+")
1591 1593 def flush():
1592 1594 if dfh:
1593 1595 dfh.flush()
1594 1596 ifh.flush()
1595 1597 try:
1596 1598 # loop through our set of deltas
1597 1599 chain = None
1598 1600 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1599 1601 node = chunkdata['node']
1600 1602 p1 = chunkdata['p1']
1601 1603 p2 = chunkdata['p2']
1602 1604 cs = chunkdata['cs']
1603 1605 deltabase = chunkdata['deltabase']
1604 1606 delta = chunkdata['delta']
1605 1607 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1606 1608
1607 1609 content.append(node)
1608 1610
1609 1611 link = linkmapper(cs)
1610 1612 if node in self.nodemap:
1611 1613 # this can happen if two branches make the same change
1612 1614 chain = node
1613 1615 continue
1614 1616
1615 1617 for p in (p1, p2):
1616 1618 if p not in self.nodemap:
1617 1619 raise LookupError(p, self.indexfile,
1618 1620 _('unknown parent'))
1619 1621
1620 1622 if deltabase not in self.nodemap:
1621 1623 raise LookupError(deltabase, self.indexfile,
1622 1624 _('unknown delta base'))
1623 1625
1624 1626 baserev = self.rev(deltabase)
1625 1627
1626 1628 if baserev != nullrev and self.iscensored(baserev):
1627 1629 # if base is censored, delta must be full replacement in a
1628 1630 # single patch operation
1629 1631 hlen = struct.calcsize(">lll")
1630 1632 oldlen = self.rawsize(baserev)
1631 1633 newlen = len(delta) - hlen
1632 1634 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1633 1635 raise error.CensoredBaseError(self.indexfile,
1634 1636 self.node(baserev))
1635 1637
1636 1638 if not flags and self._peek_iscensored(baserev, delta, flush):
1637 1639 flags |= REVIDX_ISCENSORED
1638 1640
1639 1641 # We assume consumers of addrevisioncb will want to retrieve
1640 1642 # the added revision, which will require a call to
1641 1643 # revision(). revision() will fast path if there is a cache
1642 1644 # hit. So, we tell _addrevision() to always cache in this case.
1643 1645 chain = self._addrevision(node, None, transaction, link,
1644 1646 p1, p2, flags, (baserev, delta),
1645 1647 ifh, dfh,
1646 1648 alwayscache=bool(addrevisioncb))
1647 1649
1648 1650 if addrevisioncb:
1649 1651 addrevisioncb(self, chain)
1650 1652
1651 1653 if not dfh and not self._inline:
1652 1654 # addrevision switched from inline to conventional
1653 1655 # reopen the index
1654 1656 ifh.close()
1655 1657 dfh = self.opener(self.datafile, "a+")
1656 1658 ifh = self.opener(self.indexfile, "a+",
1657 1659 checkambig=self._checkambig)
1658 1660 finally:
1659 1661 if dfh:
1660 1662 dfh.close()
1661 1663 ifh.close()
1662 1664
1663 1665 return content
1664 1666
1665 1667 def iscensored(self, rev):
1666 1668 """Check if a file revision is censored."""
1667 1669 return False
1668 1670
1669 1671 def _peek_iscensored(self, baserev, delta, flush):
1670 1672 """Quickly check if a delta produces a censored revision."""
1671 1673 return False
1672 1674
1673 1675 def getstrippoint(self, minlink):
1674 1676 """find the minimum rev that must be stripped to strip the linkrev
1675 1677
1676 1678 Returns a tuple containing the minimum rev and a set of all revs that
1677 1679 have linkrevs that will be broken by this strip.
1678 1680 """
1679 1681 brokenrevs = set()
1680 1682 strippoint = len(self)
1681 1683
1682 1684 heads = {}
1683 1685 futurelargelinkrevs = set()
1684 1686 for head in self.headrevs():
1685 1687 headlinkrev = self.linkrev(head)
1686 1688 heads[head] = headlinkrev
1687 1689 if headlinkrev >= minlink:
1688 1690 futurelargelinkrevs.add(headlinkrev)
1689 1691
1690 1692 # This algorithm involves walking down the rev graph, starting at the
1691 1693 # heads. Since the revs are topologically sorted according to linkrev,
1692 1694 # once all head linkrevs are below the minlink, we know there are
1693 1695 # no more revs that could have a linkrev greater than minlink.
1694 1696 # So we can stop walking.
1695 1697 while futurelargelinkrevs:
1696 1698 strippoint -= 1
1697 1699 linkrev = heads.pop(strippoint)
1698 1700
1699 1701 if linkrev < minlink:
1700 1702 brokenrevs.add(strippoint)
1701 1703 else:
1702 1704 futurelargelinkrevs.remove(linkrev)
1703 1705
1704 1706 for p in self.parentrevs(strippoint):
1705 1707 if p != nullrev:
1706 1708 plinkrev = self.linkrev(p)
1707 1709 heads[p] = plinkrev
1708 1710 if plinkrev >= minlink:
1709 1711 futurelargelinkrevs.add(plinkrev)
1710 1712
1711 1713 return strippoint, brokenrevs
1712 1714
1713 1715 def strip(self, minlink, transaction):
1714 1716 """truncate the revlog on the first revision with a linkrev >= minlink
1715 1717
1716 1718 This function is called when we're stripping revision minlink and
1717 1719 its descendants from the repository.
1718 1720
1719 1721 We have to remove all revisions with linkrev >= minlink, because
1720 1722 the equivalent changelog revisions will be renumbered after the
1721 1723 strip.
1722 1724
1723 1725 So we truncate the revlog on the first of these revisions, and
1724 1726 trust that the caller has saved the revisions that shouldn't be
1725 1727 removed and that it'll re-add them after this truncation.
1726 1728 """
1727 1729 if len(self) == 0:
1728 1730 return
1729 1731
1730 1732 rev, _ = self.getstrippoint(minlink)
1731 1733 if rev == len(self):
1732 1734 return
1733 1735
1734 1736 # first truncate the files on disk
1735 1737 end = self.start(rev)
1736 1738 if not self._inline:
1737 1739 transaction.add(self.datafile, end)
1738 1740 end = rev * self._io.size
1739 1741 else:
1740 1742 end += rev * self._io.size
1741 1743
1742 1744 transaction.add(self.indexfile, end)
1743 1745
1744 1746 # then reset internal state in memory to forget those revisions
1745 1747 self._cache = None
1746 1748 self._chaininfocache = {}
1747 1749 self._chunkclear()
1748 1750 for x in xrange(rev, len(self)):
1749 1751 del self.nodemap[self.node(x)]
1750 1752
1751 1753 del self.index[rev:-1]
1752 1754
1753 1755 def checksize(self):
1754 1756 expected = 0
1755 1757 if len(self):
1756 1758 expected = max(0, self.end(len(self) - 1))
1757 1759
1758 1760 try:
1759 1761 f = self.opener(self.datafile)
1760 1762 f.seek(0, 2)
1761 1763 actual = f.tell()
1762 1764 f.close()
1763 1765 dd = actual - expected
1764 1766 except IOError as inst:
1765 1767 if inst.errno != errno.ENOENT:
1766 1768 raise
1767 1769 dd = 0
1768 1770
1769 1771 try:
1770 1772 f = self.opener(self.indexfile)
1771 1773 f.seek(0, 2)
1772 1774 actual = f.tell()
1773 1775 f.close()
1774 1776 s = self._io.size
1775 1777 i = max(0, actual // s)
1776 1778 di = actual - (i * s)
1777 1779 if self._inline:
1778 1780 databytes = 0
1779 1781 for r in self:
1780 1782 databytes += max(0, self.length(r))
1781 1783 dd = 0
1782 1784 di = actual - len(self) * s - databytes
1783 1785 except IOError as inst:
1784 1786 if inst.errno != errno.ENOENT:
1785 1787 raise
1786 1788 di = 0
1787 1789
1788 1790 return (dd, di)
1789 1791
1790 1792 def files(self):
1791 1793 res = [self.indexfile]
1792 1794 if not self._inline:
1793 1795 res.append(self.datafile)
1794 1796 return res
General Comments 0
You need to be logged in to leave comments. Login now