##// END OF EJS Templates
revlog: use an LRU cache for delta chain bases...
Gregory Szorc -
r29830:92ac2baa default
parent child Browse files
Show More
@@ -1,1782 +1,1781
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = hashlib.sha1(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = hashlib.sha1(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215 """
216 216 def __init__(self, opener, indexfile):
217 217 """
218 218 create a revlog object
219 219
220 220 opener is a function that abstracts the file opening operation
221 221 and can be used to implement COW semantics or the like.
222 222 """
223 223 self.indexfile = indexfile
224 224 self.datafile = indexfile[:-2] + ".d"
225 225 self.opener = opener
226 226 # 3-tuple of (node, rev, text) for a raw revision.
227 227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 # begins at for a revision.
230 self._basecache = None
228 # Maps rev to chain base rev.
229 self._chainbasecache = util.lrucachedict(100)
231 230 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 231 self._chunkcache = (0, '')
233 232 # How much data to read and cache into the raw revlog data cache.
234 233 self._chunkcachesize = 65536
235 234 self._maxchainlen = None
236 235 self._aggressivemergedeltas = False
237 236 self.index = []
238 237 # Mapping of partial identifiers to full nodes.
239 238 self._pcache = {}
240 239 # Mapping of revision integer to full node.
241 240 self._nodecache = {nullid: nullrev}
242 241 self._nodepos = None
243 242
244 243 v = REVLOG_DEFAULT_VERSION
245 244 opts = getattr(opener, 'options', None)
246 245 if opts is not None:
247 246 if 'revlogv1' in opts:
248 247 if 'generaldelta' in opts:
249 248 v |= REVLOGGENERALDELTA
250 249 else:
251 250 v = 0
252 251 if 'chunkcachesize' in opts:
253 252 self._chunkcachesize = opts['chunkcachesize']
254 253 if 'maxchainlen' in opts:
255 254 self._maxchainlen = opts['maxchainlen']
256 255 if 'aggressivemergedeltas' in opts:
257 256 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 257 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259 258
260 259 if self._chunkcachesize <= 0:
261 260 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 261 'than 0') % self._chunkcachesize)
263 262 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 263 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 264 'of 2') % self._chunkcachesize)
266 265
267 266 indexdata = ''
268 267 self._initempty = True
269 268 try:
270 269 f = self.opener(self.indexfile)
271 270 indexdata = f.read()
272 271 f.close()
273 272 if len(indexdata) > 0:
274 273 v = struct.unpack(versionformat, indexdata[:4])[0]
275 274 self._initempty = False
276 275 except IOError as inst:
277 276 if inst.errno != errno.ENOENT:
278 277 raise
279 278
280 279 self.version = v
281 280 self._inline = v & REVLOGNGINLINEDATA
282 281 self._generaldelta = v & REVLOGGENERALDELTA
283 282 flags = v & ~0xFFFF
284 283 fmt = v & 0xFFFF
285 284 if fmt == REVLOGV0 and flags:
286 285 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 286 % (self.indexfile, flags >> 16))
288 287 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 288 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 289 % (self.indexfile, flags >> 16))
291 290 elif fmt > REVLOGNG:
292 291 raise RevlogError(_("index %s unknown format %d")
293 292 % (self.indexfile, fmt))
294 293
295 294 self._io = revlogio()
296 295 if self.version == REVLOGV0:
297 296 self._io = revlogoldio()
298 297 try:
299 298 d = self._io.parseindex(indexdata, self._inline)
300 299 except (ValueError, IndexError):
301 300 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 301 self.index, nodemap, self._chunkcache = d
303 302 if nodemap is not None:
304 303 self.nodemap = self._nodecache = nodemap
305 304 if not self._chunkcache:
306 305 self._chunkclear()
307 306 # revnum -> (chain-length, sum-delta-length)
308 307 self._chaininfocache = {}
309 308
310 309 def tip(self):
311 310 return self.node(len(self.index) - 2)
312 311 def __contains__(self, rev):
313 312 return 0 <= rev < len(self)
314 313 def __len__(self):
315 314 return len(self.index) - 1
316 315 def __iter__(self):
317 316 return iter(xrange(len(self)))
318 317 def revs(self, start=0, stop=None):
319 318 """iterate over all rev in this revlog (from start to stop)"""
320 319 step = 1
321 320 if stop is not None:
322 321 if start > stop:
323 322 step = -1
324 323 stop += step
325 324 else:
326 325 stop = len(self)
327 326 return xrange(start, stop, step)
328 327
329 328 @util.propertycache
330 329 def nodemap(self):
331 330 self.rev(self.node(0))
332 331 return self._nodecache
333 332
334 333 def hasnode(self, node):
335 334 try:
336 335 self.rev(node)
337 336 return True
338 337 except KeyError:
339 338 return False
340 339
341 340 def clearcaches(self):
342 341 self._cache = None
343 self._basecache = None
342 self._chainbasecache.clear()
344 343 self._chunkcache = (0, '')
345 344 self._pcache = {}
346 345
347 346 try:
348 347 self._nodecache.clearcaches()
349 348 except AttributeError:
350 349 self._nodecache = {nullid: nullrev}
351 350 self._nodepos = None
352 351
353 352 def rev(self, node):
354 353 try:
355 354 return self._nodecache[node]
356 355 except TypeError:
357 356 raise
358 357 except RevlogError:
359 358 # parsers.c radix tree lookup failed
360 359 raise LookupError(node, self.indexfile, _('no node'))
361 360 except KeyError:
362 361 # pure python cache lookup failed
363 362 n = self._nodecache
364 363 i = self.index
365 364 p = self._nodepos
366 365 if p is None:
367 366 p = len(i) - 2
368 367 for r in xrange(p, -1, -1):
369 368 v = i[r][7]
370 369 n[v] = r
371 370 if v == node:
372 371 self._nodepos = r - 1
373 372 return r
374 373 raise LookupError(node, self.indexfile, _('no node'))
375 374
376 375 def node(self, rev):
377 376 return self.index[rev][7]
378 377 def linkrev(self, rev):
379 378 return self.index[rev][4]
380 379 def parents(self, node):
381 380 i = self.index
382 381 d = i[self.rev(node)]
383 382 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 383 def parentrevs(self, rev):
385 384 return self.index[rev][5:7]
386 385 def start(self, rev):
387 386 return int(self.index[rev][0] >> 16)
388 387 def end(self, rev):
389 388 return self.start(rev) + self.length(rev)
390 389 def length(self, rev):
391 390 return self.index[rev][1]
392 391 def chainbase(self, rev):
392 base = self._chainbasecache.get(rev)
393 if base is not None:
394 return base
395
393 396 index = self.index
394 397 base = index[rev][3]
395 398 while base != rev:
396 399 rev = base
397 400 base = index[rev][3]
401
402 self._chainbasecache[rev] = base
398 403 return base
399 404 def chainlen(self, rev):
400 405 return self._chaininfo(rev)[0]
401 406
402 407 def _chaininfo(self, rev):
403 408 chaininfocache = self._chaininfocache
404 409 if rev in chaininfocache:
405 410 return chaininfocache[rev]
406 411 index = self.index
407 412 generaldelta = self._generaldelta
408 413 iterrev = rev
409 414 e = index[iterrev]
410 415 clen = 0
411 416 compresseddeltalen = 0
412 417 while iterrev != e[3]:
413 418 clen += 1
414 419 compresseddeltalen += e[1]
415 420 if generaldelta:
416 421 iterrev = e[3]
417 422 else:
418 423 iterrev -= 1
419 424 if iterrev in chaininfocache:
420 425 t = chaininfocache[iterrev]
421 426 clen += t[0]
422 427 compresseddeltalen += t[1]
423 428 break
424 429 e = index[iterrev]
425 430 else:
426 431 # Add text length of base since decompressing that also takes
427 432 # work. For cache hits the length is already included.
428 433 compresseddeltalen += e[1]
429 434 r = (clen, compresseddeltalen)
430 435 chaininfocache[rev] = r
431 436 return r
432 437
433 438 def _deltachain(self, rev, stoprev=None):
434 439 """Obtain the delta chain for a revision.
435 440
436 441 ``stoprev`` specifies a revision to stop at. If not specified, we
437 442 stop at the base of the chain.
438 443
439 444 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 445 revs in ascending order and ``stopped`` is a bool indicating whether
441 446 ``stoprev`` was hit.
442 447 """
443 448 chain = []
444 449
445 450 # Alias to prevent attribute lookup in tight loop.
446 451 index = self.index
447 452 generaldelta = self._generaldelta
448 453
449 454 iterrev = rev
450 455 e = index[iterrev]
451 456 while iterrev != e[3] and iterrev != stoprev:
452 457 chain.append(iterrev)
453 458 if generaldelta:
454 459 iterrev = e[3]
455 460 else:
456 461 iterrev -= 1
457 462 e = index[iterrev]
458 463
459 464 if iterrev == stoprev:
460 465 stopped = True
461 466 else:
462 467 chain.append(iterrev)
463 468 stopped = False
464 469
465 470 chain.reverse()
466 471 return chain, stopped
467 472
468 473 def flags(self, rev):
469 474 return self.index[rev][0] & 0xFFFF
470 475 def rawsize(self, rev):
471 476 """return the length of the uncompressed text for a given revision"""
472 477 l = self.index[rev][2]
473 478 if l >= 0:
474 479 return l
475 480
476 481 t = self.revision(self.node(rev))
477 482 return len(t)
478 483 size = rawsize
479 484
480 485 def ancestors(self, revs, stoprev=0, inclusive=False):
481 486 """Generate the ancestors of 'revs' in reverse topological order.
482 487 Does not generate revs lower than stoprev.
483 488
484 489 See the documentation for ancestor.lazyancestors for more details."""
485 490
486 491 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
487 492 inclusive=inclusive)
488 493
489 494 def descendants(self, revs):
490 495 """Generate the descendants of 'revs' in revision order.
491 496
492 497 Yield a sequence of revision numbers starting with a child of
493 498 some rev in revs, i.e., each revision is *not* considered a
494 499 descendant of itself. Results are ordered by revision number (a
495 500 topological sort)."""
496 501 first = min(revs)
497 502 if first == nullrev:
498 503 for i in self:
499 504 yield i
500 505 return
501 506
502 507 seen = set(revs)
503 508 for i in self.revs(start=first + 1):
504 509 for x in self.parentrevs(i):
505 510 if x != nullrev and x in seen:
506 511 seen.add(i)
507 512 yield i
508 513 break
509 514
510 515 def findcommonmissing(self, common=None, heads=None):
511 516 """Return a tuple of the ancestors of common and the ancestors of heads
512 517 that are not ancestors of common. In revset terminology, we return the
513 518 tuple:
514 519
515 520 ::common, (::heads) - (::common)
516 521
517 522 The list is sorted by revision number, meaning it is
518 523 topologically sorted.
519 524
520 525 'heads' and 'common' are both lists of node IDs. If heads is
521 526 not supplied, uses all of the revlog's heads. If common is not
522 527 supplied, uses nullid."""
523 528 if common is None:
524 529 common = [nullid]
525 530 if heads is None:
526 531 heads = self.heads()
527 532
528 533 common = [self.rev(n) for n in common]
529 534 heads = [self.rev(n) for n in heads]
530 535
531 536 # we want the ancestors, but inclusive
532 537 class lazyset(object):
533 538 def __init__(self, lazyvalues):
534 539 self.addedvalues = set()
535 540 self.lazyvalues = lazyvalues
536 541
537 542 def __contains__(self, value):
538 543 return value in self.addedvalues or value in self.lazyvalues
539 544
540 545 def __iter__(self):
541 546 added = self.addedvalues
542 547 for r in added:
543 548 yield r
544 549 for r in self.lazyvalues:
545 550 if not r in added:
546 551 yield r
547 552
548 553 def add(self, value):
549 554 self.addedvalues.add(value)
550 555
551 556 def update(self, values):
552 557 self.addedvalues.update(values)
553 558
554 559 has = lazyset(self.ancestors(common))
555 560 has.add(nullrev)
556 561 has.update(common)
557 562
558 563 # take all ancestors from heads that aren't in has
559 564 missing = set()
560 565 visit = collections.deque(r for r in heads if r not in has)
561 566 while visit:
562 567 r = visit.popleft()
563 568 if r in missing:
564 569 continue
565 570 else:
566 571 missing.add(r)
567 572 for p in self.parentrevs(r):
568 573 if p not in has:
569 574 visit.append(p)
570 575 missing = list(missing)
571 576 missing.sort()
572 577 return has, [self.node(r) for r in missing]
573 578
574 579 def incrementalmissingrevs(self, common=None):
575 580 """Return an object that can be used to incrementally compute the
576 581 revision numbers of the ancestors of arbitrary sets that are not
577 582 ancestors of common. This is an ancestor.incrementalmissingancestors
578 583 object.
579 584
580 585 'common' is a list of revision numbers. If common is not supplied, uses
581 586 nullrev.
582 587 """
583 588 if common is None:
584 589 common = [nullrev]
585 590
586 591 return ancestor.incrementalmissingancestors(self.parentrevs, common)
587 592
588 593 def findmissingrevs(self, common=None, heads=None):
589 594 """Return the revision numbers of the ancestors of heads that
590 595 are not ancestors of common.
591 596
592 597 More specifically, return a list of revision numbers corresponding to
593 598 nodes N such that every N satisfies the following constraints:
594 599
595 600 1. N is an ancestor of some node in 'heads'
596 601 2. N is not an ancestor of any node in 'common'
597 602
598 603 The list is sorted by revision number, meaning it is
599 604 topologically sorted.
600 605
601 606 'heads' and 'common' are both lists of revision numbers. If heads is
602 607 not supplied, uses all of the revlog's heads. If common is not
603 608 supplied, uses nullid."""
604 609 if common is None:
605 610 common = [nullrev]
606 611 if heads is None:
607 612 heads = self.headrevs()
608 613
609 614 inc = self.incrementalmissingrevs(common=common)
610 615 return inc.missingancestors(heads)
611 616
612 617 def findmissing(self, common=None, heads=None):
613 618 """Return the ancestors of heads that are not ancestors of common.
614 619
615 620 More specifically, return a list of nodes N such that every N
616 621 satisfies the following constraints:
617 622
618 623 1. N is an ancestor of some node in 'heads'
619 624 2. N is not an ancestor of any node in 'common'
620 625
621 626 The list is sorted by revision number, meaning it is
622 627 topologically sorted.
623 628
624 629 'heads' and 'common' are both lists of node IDs. If heads is
625 630 not supplied, uses all of the revlog's heads. If common is not
626 631 supplied, uses nullid."""
627 632 if common is None:
628 633 common = [nullid]
629 634 if heads is None:
630 635 heads = self.heads()
631 636
632 637 common = [self.rev(n) for n in common]
633 638 heads = [self.rev(n) for n in heads]
634 639
635 640 inc = self.incrementalmissingrevs(common=common)
636 641 return [self.node(r) for r in inc.missingancestors(heads)]
637 642
638 643 def nodesbetween(self, roots=None, heads=None):
639 644 """Return a topological path from 'roots' to 'heads'.
640 645
641 646 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
642 647 topologically sorted list of all nodes N that satisfy both of
643 648 these constraints:
644 649
645 650 1. N is a descendant of some node in 'roots'
646 651 2. N is an ancestor of some node in 'heads'
647 652
648 653 Every node is considered to be both a descendant and an ancestor
649 654 of itself, so every reachable node in 'roots' and 'heads' will be
650 655 included in 'nodes'.
651 656
652 657 'outroots' is the list of reachable nodes in 'roots', i.e., the
653 658 subset of 'roots' that is returned in 'nodes'. Likewise,
654 659 'outheads' is the subset of 'heads' that is also in 'nodes'.
655 660
656 661 'roots' and 'heads' are both lists of node IDs. If 'roots' is
657 662 unspecified, uses nullid as the only root. If 'heads' is
658 663 unspecified, uses list of all of the revlog's heads."""
659 664 nonodes = ([], [], [])
660 665 if roots is not None:
661 666 roots = list(roots)
662 667 if not roots:
663 668 return nonodes
664 669 lowestrev = min([self.rev(n) for n in roots])
665 670 else:
666 671 roots = [nullid] # Everybody's a descendant of nullid
667 672 lowestrev = nullrev
668 673 if (lowestrev == nullrev) and (heads is None):
669 674 # We want _all_ the nodes!
670 675 return ([self.node(r) for r in self], [nullid], list(self.heads()))
671 676 if heads is None:
672 677 # All nodes are ancestors, so the latest ancestor is the last
673 678 # node.
674 679 highestrev = len(self) - 1
675 680 # Set ancestors to None to signal that every node is an ancestor.
676 681 ancestors = None
677 682 # Set heads to an empty dictionary for later discovery of heads
678 683 heads = {}
679 684 else:
680 685 heads = list(heads)
681 686 if not heads:
682 687 return nonodes
683 688 ancestors = set()
684 689 # Turn heads into a dictionary so we can remove 'fake' heads.
685 690 # Also, later we will be using it to filter out the heads we can't
686 691 # find from roots.
687 692 heads = dict.fromkeys(heads, False)
688 693 # Start at the top and keep marking parents until we're done.
689 694 nodestotag = set(heads)
690 695 # Remember where the top was so we can use it as a limit later.
691 696 highestrev = max([self.rev(n) for n in nodestotag])
692 697 while nodestotag:
693 698 # grab a node to tag
694 699 n = nodestotag.pop()
695 700 # Never tag nullid
696 701 if n == nullid:
697 702 continue
698 703 # A node's revision number represents its place in a
699 704 # topologically sorted list of nodes.
700 705 r = self.rev(n)
701 706 if r >= lowestrev:
702 707 if n not in ancestors:
703 708 # If we are possibly a descendant of one of the roots
704 709 # and we haven't already been marked as an ancestor
705 710 ancestors.add(n) # Mark as ancestor
706 711 # Add non-nullid parents to list of nodes to tag.
707 712 nodestotag.update([p for p in self.parents(n) if
708 713 p != nullid])
709 714 elif n in heads: # We've seen it before, is it a fake head?
710 715 # So it is, real heads should not be the ancestors of
711 716 # any other heads.
712 717 heads.pop(n)
713 718 if not ancestors:
714 719 return nonodes
715 720 # Now that we have our set of ancestors, we want to remove any
716 721 # roots that are not ancestors.
717 722
718 723 # If one of the roots was nullid, everything is included anyway.
719 724 if lowestrev > nullrev:
720 725 # But, since we weren't, let's recompute the lowest rev to not
721 726 # include roots that aren't ancestors.
722 727
723 728 # Filter out roots that aren't ancestors of heads
724 729 roots = [n for n in roots if n in ancestors]
725 730 # Recompute the lowest revision
726 731 if roots:
727 732 lowestrev = min([self.rev(n) for n in roots])
728 733 else:
729 734 # No more roots? Return empty list
730 735 return nonodes
731 736 else:
732 737 # We are descending from nullid, and don't need to care about
733 738 # any other roots.
734 739 lowestrev = nullrev
735 740 roots = [nullid]
736 741 # Transform our roots list into a set.
737 742 descendants = set(roots)
738 743 # Also, keep the original roots so we can filter out roots that aren't
739 744 # 'real' roots (i.e. are descended from other roots).
740 745 roots = descendants.copy()
741 746 # Our topologically sorted list of output nodes.
742 747 orderedout = []
743 748 # Don't start at nullid since we don't want nullid in our output list,
744 749 # and if nullid shows up in descendants, empty parents will look like
745 750 # they're descendants.
746 751 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
747 752 n = self.node(r)
748 753 isdescendant = False
749 754 if lowestrev == nullrev: # Everybody is a descendant of nullid
750 755 isdescendant = True
751 756 elif n in descendants:
752 757 # n is already a descendant
753 758 isdescendant = True
754 759 # This check only needs to be done here because all the roots
755 760 # will start being marked is descendants before the loop.
756 761 if n in roots:
757 762 # If n was a root, check if it's a 'real' root.
758 763 p = tuple(self.parents(n))
759 764 # If any of its parents are descendants, it's not a root.
760 765 if (p[0] in descendants) or (p[1] in descendants):
761 766 roots.remove(n)
762 767 else:
763 768 p = tuple(self.parents(n))
764 769 # A node is a descendant if either of its parents are
765 770 # descendants. (We seeded the dependents list with the roots
766 771 # up there, remember?)
767 772 if (p[0] in descendants) or (p[1] in descendants):
768 773 descendants.add(n)
769 774 isdescendant = True
770 775 if isdescendant and ((ancestors is None) or (n in ancestors)):
771 776 # Only include nodes that are both descendants and ancestors.
772 777 orderedout.append(n)
773 778 if (ancestors is not None) and (n in heads):
774 779 # We're trying to figure out which heads are reachable
775 780 # from roots.
776 781 # Mark this head as having been reached
777 782 heads[n] = True
778 783 elif ancestors is None:
779 784 # Otherwise, we're trying to discover the heads.
780 785 # Assume this is a head because if it isn't, the next step
781 786 # will eventually remove it.
782 787 heads[n] = True
783 788 # But, obviously its parents aren't.
784 789 for p in self.parents(n):
785 790 heads.pop(p, None)
786 791 heads = [n for n, flag in heads.iteritems() if flag]
787 792 roots = list(roots)
788 793 assert orderedout
789 794 assert roots
790 795 assert heads
791 796 return (orderedout, roots, heads)
792 797
793 798 def headrevs(self):
794 799 try:
795 800 return self.index.headrevs()
796 801 except AttributeError:
797 802 return self._headrevs()
798 803
799 804 def computephases(self, roots):
800 805 return self.index.computephasesmapsets(roots)
801 806
802 807 def _headrevs(self):
803 808 count = len(self)
804 809 if not count:
805 810 return [nullrev]
806 811 # we won't iter over filtered rev so nobody is a head at start
807 812 ishead = [0] * (count + 1)
808 813 index = self.index
809 814 for r in self:
810 815 ishead[r] = 1 # I may be an head
811 816 e = index[r]
812 817 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
813 818 return [r for r, val in enumerate(ishead) if val]
814 819
815 820 def heads(self, start=None, stop=None):
816 821 """return the list of all nodes that have no children
817 822
818 823 if start is specified, only heads that are descendants of
819 824 start will be returned
820 825 if stop is specified, it will consider all the revs from stop
821 826 as if they had no children
822 827 """
823 828 if start is None and stop is None:
824 829 if not len(self):
825 830 return [nullid]
826 831 return [self.node(r) for r in self.headrevs()]
827 832
828 833 if start is None:
829 834 start = nullid
830 835 if stop is None:
831 836 stop = []
832 837 stoprevs = set([self.rev(n) for n in stop])
833 838 startrev = self.rev(start)
834 839 reachable = set((startrev,))
835 840 heads = set((startrev,))
836 841
837 842 parentrevs = self.parentrevs
838 843 for r in self.revs(start=startrev + 1):
839 844 for p in parentrevs(r):
840 845 if p in reachable:
841 846 if r not in stoprevs:
842 847 reachable.add(r)
843 848 heads.add(r)
844 849 if p in heads and p not in stoprevs:
845 850 heads.remove(p)
846 851
847 852 return [self.node(r) for r in heads]
848 853
849 854 def children(self, node):
850 855 """find the children of a given node"""
851 856 c = []
852 857 p = self.rev(node)
853 858 for r in self.revs(start=p + 1):
854 859 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
855 860 if prevs:
856 861 for pr in prevs:
857 862 if pr == p:
858 863 c.append(self.node(r))
859 864 elif p == nullrev:
860 865 c.append(self.node(r))
861 866 return c
862 867
863 868 def descendant(self, start, end):
864 869 if start == nullrev:
865 870 return True
866 871 for i in self.descendants([start]):
867 872 if i == end:
868 873 return True
869 874 elif i > end:
870 875 break
871 876 return False
872 877
873 878 def commonancestorsheads(self, a, b):
874 879 """calculate all the heads of the common ancestors of nodes a and b"""
875 880 a, b = self.rev(a), self.rev(b)
876 881 try:
877 882 ancs = self.index.commonancestorsheads(a, b)
878 883 except (AttributeError, OverflowError): # C implementation failed
879 884 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
880 885 return map(self.node, ancs)
881 886
882 887 def isancestor(self, a, b):
883 888 """return True if node a is an ancestor of node b
884 889
885 890 The implementation of this is trivial but the use of
886 891 commonancestorsheads is not."""
887 892 return a in self.commonancestorsheads(a, b)
888 893
889 894 def ancestor(self, a, b):
890 895 """calculate the "best" common ancestor of nodes a and b"""
891 896
892 897 a, b = self.rev(a), self.rev(b)
893 898 try:
894 899 ancs = self.index.ancestors(a, b)
895 900 except (AttributeError, OverflowError):
896 901 ancs = ancestor.ancestors(self.parentrevs, a, b)
897 902 if ancs:
898 903 # choose a consistent winner when there's a tie
899 904 return min(map(self.node, ancs))
900 905 return nullid
901 906
902 907 def _match(self, id):
903 908 if isinstance(id, int):
904 909 # rev
905 910 return self.node(id)
906 911 if len(id) == 20:
907 912 # possibly a binary node
908 913 # odds of a binary node being all hex in ASCII are 1 in 10**25
909 914 try:
910 915 node = id
911 916 self.rev(node) # quick search the index
912 917 return node
913 918 except LookupError:
914 919 pass # may be partial hex id
915 920 try:
916 921 # str(rev)
917 922 rev = int(id)
918 923 if str(rev) != id:
919 924 raise ValueError
920 925 if rev < 0:
921 926 rev = len(self) + rev
922 927 if rev < 0 or rev >= len(self):
923 928 raise ValueError
924 929 return self.node(rev)
925 930 except (ValueError, OverflowError):
926 931 pass
927 932 if len(id) == 40:
928 933 try:
929 934 # a full hex nodeid?
930 935 node = bin(id)
931 936 self.rev(node)
932 937 return node
933 938 except (TypeError, LookupError):
934 939 pass
935 940
936 941 def _partialmatch(self, id):
937 942 try:
938 943 n = self.index.partialmatch(id)
939 944 if n and self.hasnode(n):
940 945 return n
941 946 return None
942 947 except RevlogError:
943 948 # parsers.c radix tree lookup gave multiple matches
944 949 # fast path: for unfiltered changelog, radix tree is accurate
945 950 if not getattr(self, 'filteredrevs', None):
946 951 raise LookupError(id, self.indexfile,
947 952 _('ambiguous identifier'))
948 953 # fall through to slow path that filters hidden revisions
949 954 except (AttributeError, ValueError):
950 955 # we are pure python, or key was too short to search radix tree
951 956 pass
952 957
953 958 if id in self._pcache:
954 959 return self._pcache[id]
955 960
956 961 if len(id) < 40:
957 962 try:
958 963 # hex(node)[:...]
959 964 l = len(id) // 2 # grab an even number of digits
960 965 prefix = bin(id[:l * 2])
961 966 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
962 967 nl = [n for n in nl if hex(n).startswith(id) and
963 968 self.hasnode(n)]
964 969 if len(nl) > 0:
965 970 if len(nl) == 1:
966 971 self._pcache[id] = nl[0]
967 972 return nl[0]
968 973 raise LookupError(id, self.indexfile,
969 974 _('ambiguous identifier'))
970 975 return None
971 976 except TypeError:
972 977 pass
973 978
974 979 def lookup(self, id):
975 980 """locate a node based on:
976 981 - revision number or str(revision number)
977 982 - nodeid or subset of hex nodeid
978 983 """
979 984 n = self._match(id)
980 985 if n is not None:
981 986 return n
982 987 n = self._partialmatch(id)
983 988 if n:
984 989 return n
985 990
986 991 raise LookupError(id, self.indexfile, _('no match found'))
987 992
988 993 def cmp(self, node, text):
989 994 """compare text with a given file revision
990 995
991 996 returns True if text is different than what is stored.
992 997 """
993 998 p1, p2 = self.parents(node)
994 999 return hash(text, p1, p2) != node
995 1000
996 1001 def _addchunk(self, offset, data):
997 1002 """Add a segment to the revlog cache.
998 1003
999 1004 Accepts an absolute offset and the data that is at that location.
1000 1005 """
1001 1006 o, d = self._chunkcache
1002 1007 # try to add to existing cache
1003 1008 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1004 1009 self._chunkcache = o, d + data
1005 1010 else:
1006 1011 self._chunkcache = offset, data
1007 1012
1008 1013 def _loadchunk(self, offset, length, df=None):
1009 1014 """Load a segment of raw data from the revlog.
1010 1015
1011 1016 Accepts an absolute offset, length to read, and an optional existing
1012 1017 file handle to read from.
1013 1018
1014 1019 If an existing file handle is passed, it will be seeked and the
1015 1020 original seek position will NOT be restored.
1016 1021
1017 1022 Returns a str or buffer of raw byte data.
1018 1023 """
1019 1024 if df is not None:
1020 1025 closehandle = False
1021 1026 else:
1022 1027 if self._inline:
1023 1028 df = self.opener(self.indexfile)
1024 1029 else:
1025 1030 df = self.opener(self.datafile)
1026 1031 closehandle = True
1027 1032
1028 1033 # Cache data both forward and backward around the requested
1029 1034 # data, in a fixed size window. This helps speed up operations
1030 1035 # involving reading the revlog backwards.
1031 1036 cachesize = self._chunkcachesize
1032 1037 realoffset = offset & ~(cachesize - 1)
1033 1038 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1034 1039 - realoffset)
1035 1040 df.seek(realoffset)
1036 1041 d = df.read(reallength)
1037 1042 if closehandle:
1038 1043 df.close()
1039 1044 self._addchunk(realoffset, d)
1040 1045 if offset != realoffset or reallength != length:
1041 1046 return util.buffer(d, offset - realoffset, length)
1042 1047 return d
1043 1048
1044 1049 def _getchunk(self, offset, length, df=None):
1045 1050 """Obtain a segment of raw data from the revlog.
1046 1051
1047 1052 Accepts an absolute offset, length of bytes to obtain, and an
1048 1053 optional file handle to the already-opened revlog. If the file
1049 1054 handle is used, it's original seek position will not be preserved.
1050 1055
1051 1056 Requests for data may be returned from a cache.
1052 1057
1053 1058 Returns a str or a buffer instance of raw byte data.
1054 1059 """
1055 1060 o, d = self._chunkcache
1056 1061 l = len(d)
1057 1062
1058 1063 # is it in the cache?
1059 1064 cachestart = offset - o
1060 1065 cacheend = cachestart + length
1061 1066 if cachestart >= 0 and cacheend <= l:
1062 1067 if cachestart == 0 and cacheend == l:
1063 1068 return d # avoid a copy
1064 1069 return util.buffer(d, cachestart, cacheend - cachestart)
1065 1070
1066 1071 return self._loadchunk(offset, length, df=df)
1067 1072
1068 1073 def _chunkraw(self, startrev, endrev, df=None):
1069 1074 """Obtain a segment of raw data corresponding to a range of revisions.
1070 1075
1071 1076 Accepts the start and end revisions and an optional already-open
1072 1077 file handle to be used for reading. If the file handle is read, its
1073 1078 seek position will not be preserved.
1074 1079
1075 1080 Requests for data may be satisfied by a cache.
1076 1081
1077 1082 Returns a 2-tuple of (offset, data) for the requested range of
1078 1083 revisions. Offset is the integer offset from the beginning of the
1079 1084 revlog and data is a str or buffer of the raw byte data.
1080 1085
1081 1086 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1082 1087 to determine where each revision's data begins and ends.
1083 1088 """
1084 1089 start = self.start(startrev)
1085 1090 end = self.end(endrev)
1086 1091 if self._inline:
1087 1092 start += (startrev + 1) * self._io.size
1088 1093 end += (endrev + 1) * self._io.size
1089 1094 length = end - start
1090 1095
1091 1096 return start, self._getchunk(start, length, df=df)
1092 1097
1093 1098 def _chunk(self, rev, df=None):
1094 1099 """Obtain a single decompressed chunk for a revision.
1095 1100
1096 1101 Accepts an integer revision and an optional already-open file handle
1097 1102 to be used for reading. If used, the seek position of the file will not
1098 1103 be preserved.
1099 1104
1100 1105 Returns a str holding uncompressed data for the requested revision.
1101 1106 """
1102 1107 return decompress(self._chunkraw(rev, rev, df=df)[1])
1103 1108
1104 1109 def _chunks(self, revs, df=None):
1105 1110 """Obtain decompressed chunks for the specified revisions.
1106 1111
1107 1112 Accepts an iterable of numeric revisions that are assumed to be in
1108 1113 ascending order. Also accepts an optional already-open file handle
1109 1114 to be used for reading. If used, the seek position of the file will
1110 1115 not be preserved.
1111 1116
1112 1117 This function is similar to calling ``self._chunk()`` multiple times,
1113 1118 but is faster.
1114 1119
1115 1120 Returns a list with decompressed data for each requested revision.
1116 1121 """
1117 1122 if not revs:
1118 1123 return []
1119 1124 start = self.start
1120 1125 length = self.length
1121 1126 inline = self._inline
1122 1127 iosize = self._io.size
1123 1128 buffer = util.buffer
1124 1129
1125 1130 l = []
1126 1131 ladd = l.append
1127 1132
1128 1133 try:
1129 1134 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1130 1135 except OverflowError:
1131 1136 # issue4215 - we can't cache a run of chunks greater than
1132 1137 # 2G on Windows
1133 1138 return [self._chunk(rev, df=df) for rev in revs]
1134 1139
1135 1140 for rev in revs:
1136 1141 chunkstart = start(rev)
1137 1142 if inline:
1138 1143 chunkstart += (rev + 1) * iosize
1139 1144 chunklength = length(rev)
1140 1145 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1141 1146
1142 1147 return l
1143 1148
1144 1149 def _chunkclear(self):
1145 1150 """Clear the raw chunk cache."""
1146 1151 self._chunkcache = (0, '')
1147 1152
1148 1153 def deltaparent(self, rev):
1149 1154 """return deltaparent of the given revision"""
1150 1155 base = self.index[rev][3]
1151 1156 if base == rev:
1152 1157 return nullrev
1153 1158 elif self._generaldelta:
1154 1159 return base
1155 1160 else:
1156 1161 return rev - 1
1157 1162
1158 1163 def revdiff(self, rev1, rev2):
1159 1164 """return or calculate a delta between two revisions"""
1160 1165 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1161 1166 return str(self._chunk(rev2))
1162 1167
1163 1168 return mdiff.textdiff(self.revision(rev1),
1164 1169 self.revision(rev2))
1165 1170
1166 1171 def revision(self, nodeorrev, _df=None):
1167 1172 """return an uncompressed revision of a given node or revision
1168 1173 number.
1169 1174
1170 1175 _df is an existing file handle to read from. It is meant to only be
1171 1176 used internally.
1172 1177 """
1173 1178 if isinstance(nodeorrev, int):
1174 1179 rev = nodeorrev
1175 1180 node = self.node(rev)
1176 1181 else:
1177 1182 node = nodeorrev
1178 1183 rev = None
1179 1184
1180 1185 cachedrev = None
1181 1186 if node == nullid:
1182 1187 return ""
1183 1188 if self._cache:
1184 1189 if self._cache[0] == node:
1185 1190 return self._cache[2]
1186 1191 cachedrev = self._cache[1]
1187 1192
1188 1193 # look up what we need to read
1189 1194 text = None
1190 1195 if rev is None:
1191 1196 rev = self.rev(node)
1192 1197
1193 1198 # check rev flags
1194 1199 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1195 1200 raise RevlogError(_('incompatible revision flag %x') %
1196 1201 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1197 1202
1198 1203 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1199 1204 if stopped:
1200 1205 text = self._cache[2]
1201 1206
1202 1207 # drop cache to save memory
1203 1208 self._cache = None
1204 1209
1205 1210 bins = self._chunks(chain, df=_df)
1206 1211 if text is None:
1207 1212 text = str(bins[0])
1208 1213 bins = bins[1:]
1209 1214
1210 1215 text = mdiff.patches(text, bins)
1211 1216
1212 1217 text = self._checkhash(text, node, rev)
1213 1218
1214 1219 self._cache = (node, rev, text)
1215 1220 return text
1216 1221
1217 1222 def hash(self, text, p1, p2):
1218 1223 """Compute a node hash.
1219 1224
1220 1225 Available as a function so that subclasses can replace the hash
1221 1226 as needed.
1222 1227 """
1223 1228 return hash(text, p1, p2)
1224 1229
1225 1230 def _checkhash(self, text, node, rev):
1226 1231 p1, p2 = self.parents(node)
1227 1232 self.checkhash(text, p1, p2, node, rev)
1228 1233 return text
1229 1234
1230 1235 def checkhash(self, text, p1, p2, node, rev=None):
1231 1236 if node != self.hash(text, p1, p2):
1232 1237 revornode = rev
1233 1238 if revornode is None:
1234 1239 revornode = templatefilters.short(hex(node))
1235 1240 raise RevlogError(_("integrity check failed on %s:%s")
1236 1241 % (self.indexfile, revornode))
1237 1242
1238 1243 def checkinlinesize(self, tr, fp=None):
1239 1244 """Check if the revlog is too big for inline and convert if so.
1240 1245
1241 1246 This should be called after revisions are added to the revlog. If the
1242 1247 revlog has grown too large to be an inline revlog, it will convert it
1243 1248 to use multiple index and data files.
1244 1249 """
1245 1250 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1246 1251 return
1247 1252
1248 1253 trinfo = tr.find(self.indexfile)
1249 1254 if trinfo is None:
1250 1255 raise RevlogError(_("%s not found in the transaction")
1251 1256 % self.indexfile)
1252 1257
1253 1258 trindex = trinfo[2]
1254 1259 if trindex is not None:
1255 1260 dataoff = self.start(trindex)
1256 1261 else:
1257 1262 # revlog was stripped at start of transaction, use all leftover data
1258 1263 trindex = len(self) - 1
1259 1264 dataoff = self.end(-2)
1260 1265
1261 1266 tr.add(self.datafile, dataoff)
1262 1267
1263 1268 if fp:
1264 1269 fp.flush()
1265 1270 fp.close()
1266 1271
1267 1272 df = self.opener(self.datafile, 'w')
1268 1273 try:
1269 1274 for r in self:
1270 1275 df.write(self._chunkraw(r, r)[1])
1271 1276 finally:
1272 1277 df.close()
1273 1278
1274 1279 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1275 1280 self.version &= ~(REVLOGNGINLINEDATA)
1276 1281 self._inline = False
1277 1282 for i in self:
1278 1283 e = self._io.packentry(self.index[i], self.node, self.version, i)
1279 1284 fp.write(e)
1280 1285
1281 1286 # if we don't call close, the temp file will never replace the
1282 1287 # real index
1283 1288 fp.close()
1284 1289
1285 1290 tr.replace(self.indexfile, trindex * self._io.size)
1286 1291 self._chunkclear()
1287 1292
1288 1293 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1289 1294 node=None):
1290 1295 """add a revision to the log
1291 1296
1292 1297 text - the revision data to add
1293 1298 transaction - the transaction object used for rollback
1294 1299 link - the linkrev data to add
1295 1300 p1, p2 - the parent nodeids of the revision
1296 1301 cachedelta - an optional precomputed delta
1297 1302 node - nodeid of revision; typically node is not specified, and it is
1298 1303 computed by default as hash(text, p1, p2), however subclasses might
1299 1304 use different hashing method (and override checkhash() in such case)
1300 1305 """
1301 1306 if link == nullrev:
1302 1307 raise RevlogError(_("attempted to add linkrev -1 to %s")
1303 1308 % self.indexfile)
1304 1309
1305 1310 if len(text) > _maxentrysize:
1306 1311 raise RevlogError(
1307 1312 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1308 1313 % (self.indexfile, len(text)))
1309 1314
1310 1315 node = node or self.hash(text, p1, p2)
1311 1316 if node in self.nodemap:
1312 1317 return node
1313 1318
1314 1319 dfh = None
1315 1320 if not self._inline:
1316 1321 dfh = self.opener(self.datafile, "a+")
1317 1322 ifh = self.opener(self.indexfile, "a+")
1318 1323 try:
1319 1324 return self._addrevision(node, text, transaction, link, p1, p2,
1320 1325 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1321 1326 finally:
1322 1327 if dfh:
1323 1328 dfh.close()
1324 1329 ifh.close()
1325 1330
1326 1331 def compress(self, text):
1327 1332 """ generate a possibly-compressed representation of text """
1328 1333 if not text:
1329 1334 return ("", text)
1330 1335 l = len(text)
1331 1336 bin = None
1332 1337 if l < 44:
1333 1338 pass
1334 1339 elif l > 1000000:
1335 1340 # zlib makes an internal copy, thus doubling memory usage for
1336 1341 # large files, so lets do this in pieces
1337 1342 z = zlib.compressobj()
1338 1343 p = []
1339 1344 pos = 0
1340 1345 while pos < l:
1341 1346 pos2 = pos + 2**20
1342 1347 p.append(z.compress(text[pos:pos2]))
1343 1348 pos = pos2
1344 1349 p.append(z.flush())
1345 1350 if sum(map(len, p)) < l:
1346 1351 bin = "".join(p)
1347 1352 else:
1348 1353 bin = _compress(text)
1349 1354 if bin is None or len(bin) > l:
1350 1355 if text[0] == '\0':
1351 1356 return ("", text)
1352 1357 return ('u', text)
1353 1358 return ("", bin)
1354 1359
1355 1360 def _isgooddelta(self, d, textlen):
1356 1361 """Returns True if the given delta is good. Good means that it is within
1357 1362 the disk span, disk size, and chain length bounds that we know to be
1358 1363 performant."""
1359 1364 if d is None:
1360 1365 return False
1361 1366
1362 1367 # - 'dist' is the distance from the base revision -- bounding it limits
1363 1368 # the amount of I/O we need to do.
1364 1369 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1365 1370 # to apply -- bounding it limits the amount of CPU we consume.
1366 1371 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1367 1372 if (dist > textlen * 4 or l > textlen or
1368 1373 compresseddeltalen > textlen * 2 or
1369 1374 (self._maxchainlen and chainlen > self._maxchainlen)):
1370 1375 return False
1371 1376
1372 1377 return True
1373 1378
1374 1379 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1375 1380 cachedelta, ifh, dfh, alwayscache=False):
1376 1381 """internal function to add revisions to the log
1377 1382
1378 1383 see addrevision for argument descriptions.
1379 1384 invariants:
1380 1385 - text is optional (can be None); if not set, cachedelta must be set.
1381 1386 if both are set, they must correspond to each other.
1382 1387 """
1383 1388 btext = [text]
1384 1389 def buildtext():
1385 1390 if btext[0] is not None:
1386 1391 return btext[0]
1387 1392 baserev = cachedelta[0]
1388 1393 delta = cachedelta[1]
1389 1394 # special case deltas which replace entire base; no need to decode
1390 1395 # base revision. this neatly avoids censored bases, which throw when
1391 1396 # they're decoded.
1392 1397 hlen = struct.calcsize(">lll")
1393 1398 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1394 1399 len(delta) - hlen):
1395 1400 btext[0] = delta[hlen:]
1396 1401 else:
1397 1402 if self._inline:
1398 1403 fh = ifh
1399 1404 else:
1400 1405 fh = dfh
1401 1406 basetext = self.revision(self.node(baserev), _df=fh)
1402 1407 btext[0] = mdiff.patch(basetext, delta)
1403 1408 try:
1404 1409 self.checkhash(btext[0], p1, p2, node)
1405 1410 if flags & REVIDX_ISCENSORED:
1406 1411 raise RevlogError(_('node %s is not censored') % node)
1407 1412 except CensoredNodeError:
1408 1413 # must pass the censored index flag to add censored revisions
1409 1414 if not flags & REVIDX_ISCENSORED:
1410 1415 raise
1411 1416 return btext[0]
1412 1417
1413 1418 def builddelta(rev):
1414 1419 # can we use the cached delta?
1415 1420 if cachedelta and cachedelta[0] == rev:
1416 1421 delta = cachedelta[1]
1417 1422 else:
1418 1423 t = buildtext()
1419 1424 if self.iscensored(rev):
1420 1425 # deltas based on a censored revision must replace the
1421 1426 # full content in one patch, so delta works everywhere
1422 1427 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1423 1428 delta = header + t
1424 1429 else:
1425 1430 if self._inline:
1426 1431 fh = ifh
1427 1432 else:
1428 1433 fh = dfh
1429 1434 ptext = self.revision(self.node(rev), _df=fh)
1430 1435 delta = mdiff.textdiff(ptext, t)
1431 1436 data = self.compress(delta)
1432 1437 l = len(data[1]) + len(data[0])
1433 if basecache[0] == rev:
1434 chainbase = basecache[1]
1435 else:
1436 chainbase = self.chainbase(rev)
1438 chainbase = self.chainbase(rev)
1437 1439 dist = l + offset - self.start(chainbase)
1438 1440 if self._generaldelta:
1439 1441 base = rev
1440 1442 else:
1441 1443 base = chainbase
1442 1444 chainlen, compresseddeltalen = self._chaininfo(rev)
1443 1445 chainlen += 1
1444 1446 compresseddeltalen += l
1445 1447 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1446 1448
1447 1449 curr = len(self)
1448 1450 prev = curr - 1
1449 1451 offset = self.end(prev)
1450 1452 delta = None
1451 if self._basecache is None:
1452 self._basecache = (prev, self.chainbase(prev))
1453 basecache = self._basecache
1454 1453 p1r, p2r = self.rev(p1), self.rev(p2)
1455 1454
1456 1455 # full versions are inserted when the needed deltas
1457 1456 # become comparable to the uncompressed text
1458 1457 if text is None:
1459 1458 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1460 1459 cachedelta[1])
1461 1460 else:
1462 1461 textlen = len(text)
1463 1462
1464 1463 # should we try to build a delta?
1465 1464 if prev != nullrev:
1466 1465 tested = set()
1467 1466 if cachedelta and self._generaldelta and self._lazydeltabase:
1468 1467 # Assume what we received from the server is a good choice
1469 1468 # build delta will reuse the cache
1470 1469 candidatedelta = builddelta(cachedelta[0])
1471 1470 tested.add(cachedelta[0])
1472 1471 if self._isgooddelta(candidatedelta, textlen):
1473 1472 delta = candidatedelta
1474 1473 if delta is None and self._generaldelta:
1475 1474 # exclude already lazy tested base if any
1476 1475 parents = [p for p in (p1r, p2r)
1477 1476 if p != nullrev and p not in tested]
1478 1477 if parents and not self._aggressivemergedeltas:
1479 1478 # Pick whichever parent is closer to us (to minimize the
1480 1479 # chance of having to build a fulltext).
1481 1480 parents = [max(parents)]
1482 1481 tested.update(parents)
1483 1482 pdeltas = []
1484 1483 for p in parents:
1485 1484 pd = builddelta(p)
1486 1485 if self._isgooddelta(pd, textlen):
1487 1486 pdeltas.append(pd)
1488 1487 if pdeltas:
1489 1488 delta = min(pdeltas, key=lambda x: x[1])
1490 1489 if delta is None and prev not in tested:
1491 1490 # other approach failed try against prev to hopefully save us a
1492 1491 # fulltext.
1493 1492 candidatedelta = builddelta(prev)
1494 1493 if self._isgooddelta(candidatedelta, textlen):
1495 1494 delta = candidatedelta
1496 1495 if delta is not None:
1497 1496 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1498 1497 else:
1499 1498 text = buildtext()
1500 1499 data = self.compress(text)
1501 1500 l = len(data[1]) + len(data[0])
1502 1501 base = chainbase = curr
1503 1502
1504 1503 e = (offset_type(offset, flags), l, textlen,
1505 1504 base, link, p1r, p2r, node)
1506 1505 self.index.insert(-1, e)
1507 1506 self.nodemap[node] = curr
1508 1507
1509 1508 entry = self._io.packentry(e, self.node, self.version, curr)
1510 1509 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1511 1510
1512 1511 if alwayscache and text is None:
1513 1512 text = buildtext()
1514 1513
1515 1514 if type(text) == str: # only accept immutable objects
1516 1515 self._cache = (node, curr, text)
1517 self._basecache = (curr, chainbase)
1516 self._chainbasecache[curr] = chainbase
1518 1517 return node
1519 1518
1520 1519 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1521 1520 # Files opened in a+ mode have inconsistent behavior on various
1522 1521 # platforms. Windows requires that a file positioning call be made
1523 1522 # when the file handle transitions between reads and writes. See
1524 1523 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1525 1524 # platforms, Python or the platform itself can be buggy. Some versions
1526 1525 # of Solaris have been observed to not append at the end of the file
1527 1526 # if the file was seeked to before the end. See issue4943 for more.
1528 1527 #
1529 1528 # We work around this issue by inserting a seek() before writing.
1530 1529 # Note: This is likely not necessary on Python 3.
1531 1530 ifh.seek(0, os.SEEK_END)
1532 1531 if dfh:
1533 1532 dfh.seek(0, os.SEEK_END)
1534 1533
1535 1534 curr = len(self) - 1
1536 1535 if not self._inline:
1537 1536 transaction.add(self.datafile, offset)
1538 1537 transaction.add(self.indexfile, curr * len(entry))
1539 1538 if data[0]:
1540 1539 dfh.write(data[0])
1541 1540 dfh.write(data[1])
1542 1541 ifh.write(entry)
1543 1542 else:
1544 1543 offset += curr * self._io.size
1545 1544 transaction.add(self.indexfile, offset, curr)
1546 1545 ifh.write(entry)
1547 1546 ifh.write(data[0])
1548 1547 ifh.write(data[1])
1549 1548 self.checkinlinesize(transaction, ifh)
1550 1549
1551 1550 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1552 1551 """
1553 1552 add a delta group
1554 1553
1555 1554 given a set of deltas, add them to the revision log. the
1556 1555 first delta is against its parent, which should be in our
1557 1556 log, the rest are against the previous delta.
1558 1557
1559 1558 If ``addrevisioncb`` is defined, it will be called with arguments of
1560 1559 this revlog and the node that was added.
1561 1560 """
1562 1561
1563 1562 # track the base of the current delta log
1564 1563 content = []
1565 1564 node = None
1566 1565
1567 1566 r = len(self)
1568 1567 end = 0
1569 1568 if r:
1570 1569 end = self.end(r - 1)
1571 1570 ifh = self.opener(self.indexfile, "a+")
1572 1571 isize = r * self._io.size
1573 1572 if self._inline:
1574 1573 transaction.add(self.indexfile, end + isize, r)
1575 1574 dfh = None
1576 1575 else:
1577 1576 transaction.add(self.indexfile, isize, r)
1578 1577 transaction.add(self.datafile, end)
1579 1578 dfh = self.opener(self.datafile, "a+")
1580 1579 def flush():
1581 1580 if dfh:
1582 1581 dfh.flush()
1583 1582 ifh.flush()
1584 1583 try:
1585 1584 # loop through our set of deltas
1586 1585 chain = None
1587 1586 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1588 1587 node = chunkdata['node']
1589 1588 p1 = chunkdata['p1']
1590 1589 p2 = chunkdata['p2']
1591 1590 cs = chunkdata['cs']
1592 1591 deltabase = chunkdata['deltabase']
1593 1592 delta = chunkdata['delta']
1594 1593 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1595 1594
1596 1595 content.append(node)
1597 1596
1598 1597 link = linkmapper(cs)
1599 1598 if node in self.nodemap:
1600 1599 # this can happen if two branches make the same change
1601 1600 chain = node
1602 1601 continue
1603 1602
1604 1603 for p in (p1, p2):
1605 1604 if p not in self.nodemap:
1606 1605 raise LookupError(p, self.indexfile,
1607 1606 _('unknown parent'))
1608 1607
1609 1608 if deltabase not in self.nodemap:
1610 1609 raise LookupError(deltabase, self.indexfile,
1611 1610 _('unknown delta base'))
1612 1611
1613 1612 baserev = self.rev(deltabase)
1614 1613
1615 1614 if baserev != nullrev and self.iscensored(baserev):
1616 1615 # if base is censored, delta must be full replacement in a
1617 1616 # single patch operation
1618 1617 hlen = struct.calcsize(">lll")
1619 1618 oldlen = self.rawsize(baserev)
1620 1619 newlen = len(delta) - hlen
1621 1620 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1622 1621 raise error.CensoredBaseError(self.indexfile,
1623 1622 self.node(baserev))
1624 1623
1625 1624 if not flags and self._peek_iscensored(baserev, delta, flush):
1626 1625 flags |= REVIDX_ISCENSORED
1627 1626
1628 1627 # We assume consumers of addrevisioncb will want to retrieve
1629 1628 # the added revision, which will require a call to
1630 1629 # revision(). revision() will fast path if there is a cache
1631 1630 # hit. So, we tell _addrevision() to always cache in this case.
1632 1631 chain = self._addrevision(node, None, transaction, link,
1633 1632 p1, p2, flags, (baserev, delta),
1634 1633 ifh, dfh,
1635 1634 alwayscache=bool(addrevisioncb))
1636 1635
1637 1636 if addrevisioncb:
1638 1637 addrevisioncb(self, chain)
1639 1638
1640 1639 if not dfh and not self._inline:
1641 1640 # addrevision switched from inline to conventional
1642 1641 # reopen the index
1643 1642 ifh.close()
1644 1643 dfh = self.opener(self.datafile, "a+")
1645 1644 ifh = self.opener(self.indexfile, "a+")
1646 1645 finally:
1647 1646 if dfh:
1648 1647 dfh.close()
1649 1648 ifh.close()
1650 1649
1651 1650 return content
1652 1651
1653 1652 def iscensored(self, rev):
1654 1653 """Check if a file revision is censored."""
1655 1654 return False
1656 1655
1657 1656 def _peek_iscensored(self, baserev, delta, flush):
1658 1657 """Quickly check if a delta produces a censored revision."""
1659 1658 return False
1660 1659
1661 1660 def getstrippoint(self, minlink):
1662 1661 """find the minimum rev that must be stripped to strip the linkrev
1663 1662
1664 1663 Returns a tuple containing the minimum rev and a set of all revs that
1665 1664 have linkrevs that will be broken by this strip.
1666 1665 """
1667 1666 brokenrevs = set()
1668 1667 strippoint = len(self)
1669 1668
1670 1669 heads = {}
1671 1670 futurelargelinkrevs = set()
1672 1671 for head in self.headrevs():
1673 1672 headlinkrev = self.linkrev(head)
1674 1673 heads[head] = headlinkrev
1675 1674 if headlinkrev >= minlink:
1676 1675 futurelargelinkrevs.add(headlinkrev)
1677 1676
1678 1677 # This algorithm involves walking down the rev graph, starting at the
1679 1678 # heads. Since the revs are topologically sorted according to linkrev,
1680 1679 # once all head linkrevs are below the minlink, we know there are
1681 1680 # no more revs that could have a linkrev greater than minlink.
1682 1681 # So we can stop walking.
1683 1682 while futurelargelinkrevs:
1684 1683 strippoint -= 1
1685 1684 linkrev = heads.pop(strippoint)
1686 1685
1687 1686 if linkrev < minlink:
1688 1687 brokenrevs.add(strippoint)
1689 1688 else:
1690 1689 futurelargelinkrevs.remove(linkrev)
1691 1690
1692 1691 for p in self.parentrevs(strippoint):
1693 1692 if p != nullrev:
1694 1693 plinkrev = self.linkrev(p)
1695 1694 heads[p] = plinkrev
1696 1695 if plinkrev >= minlink:
1697 1696 futurelargelinkrevs.add(plinkrev)
1698 1697
1699 1698 return strippoint, brokenrevs
1700 1699
1701 1700 def strip(self, minlink, transaction):
1702 1701 """truncate the revlog on the first revision with a linkrev >= minlink
1703 1702
1704 1703 This function is called when we're stripping revision minlink and
1705 1704 its descendants from the repository.
1706 1705
1707 1706 We have to remove all revisions with linkrev >= minlink, because
1708 1707 the equivalent changelog revisions will be renumbered after the
1709 1708 strip.
1710 1709
1711 1710 So we truncate the revlog on the first of these revisions, and
1712 1711 trust that the caller has saved the revisions that shouldn't be
1713 1712 removed and that it'll re-add them after this truncation.
1714 1713 """
1715 1714 if len(self) == 0:
1716 1715 return
1717 1716
1718 1717 rev, _ = self.getstrippoint(minlink)
1719 1718 if rev == len(self):
1720 1719 return
1721 1720
1722 1721 # first truncate the files on disk
1723 1722 end = self.start(rev)
1724 1723 if not self._inline:
1725 1724 transaction.add(self.datafile, end)
1726 1725 end = rev * self._io.size
1727 1726 else:
1728 1727 end += rev * self._io.size
1729 1728
1730 1729 transaction.add(self.indexfile, end)
1731 1730
1732 1731 # then reset internal state in memory to forget those revisions
1733 1732 self._cache = None
1734 1733 self._chaininfocache = {}
1735 1734 self._chunkclear()
1736 1735 for x in xrange(rev, len(self)):
1737 1736 del self.nodemap[self.node(x)]
1738 1737
1739 1738 del self.index[rev:-1]
1740 1739
1741 1740 def checksize(self):
1742 1741 expected = 0
1743 1742 if len(self):
1744 1743 expected = max(0, self.end(len(self) - 1))
1745 1744
1746 1745 try:
1747 1746 f = self.opener(self.datafile)
1748 1747 f.seek(0, 2)
1749 1748 actual = f.tell()
1750 1749 f.close()
1751 1750 dd = actual - expected
1752 1751 except IOError as inst:
1753 1752 if inst.errno != errno.ENOENT:
1754 1753 raise
1755 1754 dd = 0
1756 1755
1757 1756 try:
1758 1757 f = self.opener(self.indexfile)
1759 1758 f.seek(0, 2)
1760 1759 actual = f.tell()
1761 1760 f.close()
1762 1761 s = self._io.size
1763 1762 i = max(0, actual // s)
1764 1763 di = actual - (i * s)
1765 1764 if self._inline:
1766 1765 databytes = 0
1767 1766 for r in self:
1768 1767 databytes += max(0, self.length(r))
1769 1768 dd = 0
1770 1769 di = actual - len(self) * s - databytes
1771 1770 except IOError as inst:
1772 1771 if inst.errno != errno.ENOENT:
1773 1772 raise
1774 1773 di = 0
1775 1774
1776 1775 return (dd, di)
1777 1776
1778 1777 def files(self):
1779 1778 res = [self.indexfile]
1780 1779 if not self._inline:
1781 1780 res.append(self.datafile)
1782 1781 return res
General Comments 0
You need to be logged in to leave comments. Login now