##// END OF EJS Templates
revlog: correct type in check to verify rawtext is immutable...
Augie Fackler -
r35863:be923ce4 default
parent child Browse files
Show More
@@ -1,2491 +1,2491 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80
81 81 # max size of revlog with inline data
82 82 _maxinline = 131072
83 83 _chunksize = 1048576
84 84
85 85 RevlogError = error.RevlogError
86 86 LookupError = error.LookupError
87 87 CensoredNodeError = error.CensoredNodeError
88 88 ProgrammingError = error.ProgrammingError
89 89
90 90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 91 _flagprocessors = {
92 92 REVIDX_ISCENSORED: None,
93 93 }
94 94
95 95 def addflagprocessor(flag, processor):
96 96 """Register a flag processor on a revision data flag.
97 97
98 98 Invariant:
99 99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 100 - Only one flag processor can be registered on a specific flag.
101 101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 102 following signatures:
103 103 - (read) f(self, rawtext) -> text, bool
104 104 - (write) f(self, text) -> rawtext, bool
105 105 - (raw) f(self, rawtext) -> bool
106 106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 107 directly visible to the user.
108 108 The boolean returned by these transforms is used to determine whether
109 109 the returned text can be used for hash integrity checking. For example,
110 110 if "write" returns False, then "text" is used to generate hash. If
111 111 "write" returns True, that basically means "rawtext" returned by "write"
112 112 should be used to generate hash. Usually, "write" and "read" return
113 113 different booleans. And "raw" returns a same boolean as "write".
114 114
115 115 Note: The 'raw' transform is used for changegroup generation and in some
116 116 debug commands. In this case the transform only indicates whether the
117 117 contents can be used for hash integrity checks.
118 118 """
119 119 if not flag & REVIDX_KNOWN_FLAGS:
120 120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag not in REVIDX_FLAGS_ORDER:
123 123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag in _flagprocessors:
126 126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 127 raise error.Abort(msg)
128 128 _flagprocessors[flag] = processor
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 _nullhash = hashlib.sha1(nullid)
142 142
143 143 def hash(text, p1, p2):
144 144 """generate a hash from the given text and its parent hashes
145 145
146 146 This hash combines both the current file contents and its history
147 147 in a manner that makes it easy to distinguish nodes with the same
148 148 content in the revision graph.
149 149 """
150 150 # As of now, if one of the parent node is null, p2 is null
151 151 if p2 == nullid:
152 152 # deep copy of a hash is faster than creating one
153 153 s = _nullhash.copy()
154 154 s.update(p1)
155 155 else:
156 156 # none of the parent nodes are nullid
157 157 if p1 < p2:
158 158 a = p1
159 159 b = p2
160 160 else:
161 161 a = p2
162 162 b = p1
163 163 s = hashlib.sha1(a)
164 164 s.update(b)
165 165 s.update(text)
166 166 return s.digest()
167 167
168 168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 169 """returns revs[startidx:endidx] without empty trailing revs
170 170 """
171 171 length = revlog.length
172 172
173 173 if endidx is None:
174 174 endidx = len(revs)
175 175
176 176 # Trim empty revs at the end, but never the very first revision of a chain
177 177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 178 endidx -= 1
179 179
180 180 return revs[startidx:endidx]
181 181
182 182 def _slicechunk(revlog, revs):
183 183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 184
185 185 ``revs`` is sliced into groups that should be read in one time.
186 186 Assume that revs are sorted.
187 187 """
188 188 start = revlog.start
189 189 length = revlog.length
190 190
191 191 if len(revs) <= 1:
192 192 yield revs
193 193 return
194 194
195 195 startbyte = start(revs[0])
196 196 endbyte = start(revs[-1]) + length(revs[-1])
197 197 readdata = deltachainspan = endbyte - startbyte
198 198
199 199 chainpayload = sum(length(r) for r in revs)
200 200
201 201 if deltachainspan:
202 202 density = chainpayload / float(deltachainspan)
203 203 else:
204 204 density = 1.0
205 205
206 206 # Store the gaps in a heap to have them sorted by decreasing size
207 207 gapsheap = []
208 208 heapq.heapify(gapsheap)
209 209 prevend = None
210 210 for i, rev in enumerate(revs):
211 211 revstart = start(rev)
212 212 revlen = length(rev)
213 213
214 214 # Skip empty revisions to form larger holes
215 215 if revlen == 0:
216 216 continue
217 217
218 218 if prevend is not None:
219 219 gapsize = revstart - prevend
220 220 # only consider holes that are large enough
221 221 if gapsize > revlog._srmingapsize:
222 222 heapq.heappush(gapsheap, (-gapsize, i))
223 223
224 224 prevend = revstart + revlen
225 225
226 226 # Collect the indices of the largest holes until the density is acceptable
227 227 indicesheap = []
228 228 heapq.heapify(indicesheap)
229 229 while gapsheap and density < revlog._srdensitythreshold:
230 230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 231
232 232 heapq.heappush(indicesheap, gapidx)
233 233
234 234 # the gap sizes are stored as negatives to be sorted decreasingly
235 235 # by the heap
236 236 readdata -= (-oppgapsize)
237 237 if readdata > 0:
238 238 density = chainpayload / float(readdata)
239 239 else:
240 240 density = 1.0
241 241
242 242 # Cut the revs at collected indices
243 243 previdx = 0
244 244 while indicesheap:
245 245 idx = heapq.heappop(indicesheap)
246 246
247 247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 248 if chunk:
249 249 yield chunk
250 250
251 251 previdx = idx
252 252
253 253 chunk = _trimchunk(revlog, revs, previdx)
254 254 if chunk:
255 255 yield chunk
256 256
257 257 @attr.s(slots=True, frozen=True)
258 258 class _deltainfo(object):
259 259 distance = attr.ib()
260 260 deltalen = attr.ib()
261 261 data = attr.ib()
262 262 base = attr.ib()
263 263 chainbase = attr.ib()
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 267 class _deltacomputer(object):
268 268 def __init__(self, revlog):
269 269 self.revlog = revlog
270 270
271 271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 272 """
273 273 Provides revisions that present an interest to be diffed against,
274 274 grouped by level of easiness.
275 275 """
276 276 revlog = self.revlog
277 277 curr = len(revlog)
278 278 prev = curr - 1
279 279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 280
281 281 # should we try to build a delta?
282 282 if prev != nullrev and revlog.storedeltachains:
283 283 tested = set()
284 284 # This condition is true most of the time when processing
285 285 # changegroup data into a generaldelta repo. The only time it
286 286 # isn't true is if this is the first revision in a delta chain
287 287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 289 # Assume what we received from the server is a good choice
290 290 # build delta will reuse the cache
291 291 yield (cachedelta[0],)
292 292 tested.add(cachedelta[0])
293 293
294 294 if revlog._generaldelta:
295 295 # exclude already lazy tested base if any
296 296 parents = [p for p in (p1r, p2r)
297 297 if p != nullrev and p not in tested]
298 298 if parents and not revlog._aggressivemergedeltas:
299 299 # Pick whichever parent is closer to us (to minimize the
300 300 # chance of having to build a fulltext).
301 301 parents = [max(parents)]
302 302 tested.update(parents)
303 303 yield parents
304 304
305 305 if prev not in tested:
306 306 # other approach failed try against prev to hopefully save us a
307 307 # fulltext.
308 308 yield (prev,)
309 309
310 310 def buildtext(self, revinfo, fh):
311 311 """Builds a fulltext version of a revision
312 312
313 313 revinfo: _revisioninfo instance that contains all needed info
314 314 fh: file handle to either the .i or the .d revlog file,
315 315 depending on whether it is inlined or not
316 316 """
317 317 btext = revinfo.btext
318 318 if btext[0] is not None:
319 319 return btext[0]
320 320
321 321 revlog = self.revlog
322 322 cachedelta = revinfo.cachedelta
323 323 flags = revinfo.flags
324 324 node = revinfo.node
325 325
326 326 baserev = cachedelta[0]
327 327 delta = cachedelta[1]
328 328 # special case deltas which replace entire base; no need to decode
329 329 # base revision. this neatly avoids censored bases, which throw when
330 330 # they're decoded.
331 331 hlen = struct.calcsize(">lll")
332 332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 333 len(delta) - hlen):
334 334 btext[0] = delta[hlen:]
335 335 else:
336 336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 337 btext[0] = mdiff.patch(basetext, delta)
338 338
339 339 try:
340 340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 341 btext[0], validatehash = res
342 342 if validatehash:
343 343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 344 if flags & REVIDX_ISCENSORED:
345 345 raise RevlogError(_('node %s is not censored') % node)
346 346 except CensoredNodeError:
347 347 # must pass the censored index flag to add censored revisions
348 348 if not flags & REVIDX_ISCENSORED:
349 349 raise
350 350 return btext[0]
351 351
352 352 def _builddeltadiff(self, base, revinfo, fh):
353 353 revlog = self.revlog
354 354 t = self.buildtext(revinfo, fh)
355 355 if revlog.iscensored(base):
356 356 # deltas based on a censored revision must replace the
357 357 # full content in one patch, so delta works everywhere
358 358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 359 delta = header + t
360 360 else:
361 361 ptext = revlog.revision(base, _df=fh, raw=True)
362 362 delta = mdiff.textdiff(ptext, t)
363 363
364 364 return delta
365 365
366 366 def _builddeltainfo(self, revinfo, base, fh):
367 367 # can we use the cached delta?
368 368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 369 delta = revinfo.cachedelta[1]
370 370 else:
371 371 delta = self._builddeltadiff(base, revinfo, fh)
372 372 revlog = self.revlog
373 373 header, data = revlog.compress(delta)
374 374 deltalen = len(header) + len(data)
375 375 chainbase = revlog.chainbase(base)
376 376 offset = revlog.end(len(revlog) - 1)
377 377 dist = deltalen + offset - revlog.start(chainbase)
378 378 if revlog._generaldelta:
379 379 deltabase = base
380 380 else:
381 381 deltabase = chainbase
382 382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 383 chainlen += 1
384 384 compresseddeltalen += deltalen
385 385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 386 chainbase, chainlen, compresseddeltalen)
387 387
388 388 def finddeltainfo(self, revinfo, fh):
389 389 """Find an acceptable delta against a candidate revision
390 390
391 391 revinfo: information about the revision (instance of _revisioninfo)
392 392 fh: file handle to either the .i or the .d revlog file,
393 393 depending on whether it is inlined or not
394 394
395 395 Returns the first acceptable candidate revision, as ordered by
396 396 _getcandidaterevs
397 397 """
398 398 cachedelta = revinfo.cachedelta
399 399 p1 = revinfo.p1
400 400 p2 = revinfo.p2
401 401 revlog = self.revlog
402 402
403 403 deltainfo = None
404 404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 405 nominateddeltas = []
406 406 for candidaterev in candidaterevs:
407 407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 409 nominateddeltas.append(candidatedelta)
410 410 if nominateddeltas:
411 411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 412 break
413 413
414 414 return deltainfo
415 415
416 416 @attr.s(slots=True, frozen=True)
417 417 class _revisioninfo(object):
418 418 """Information about a revision that allows building its fulltext
419 419 node: expected hash of the revision
420 420 p1, p2: parent revs of the revision
421 421 btext: built text cache consisting of a one-element list
422 422 cachedelta: (baserev, uncompressed_delta) or None
423 423 flags: flags associated to the revision storage
424 424
425 425 One of btext[0] or cachedelta must be set.
426 426 """
427 427 node = attr.ib()
428 428 p1 = attr.ib()
429 429 p2 = attr.ib()
430 430 btext = attr.ib()
431 431 textlen = attr.ib()
432 432 cachedelta = attr.ib()
433 433 flags = attr.ib()
434 434
435 435 # index v0:
436 436 # 4 bytes: offset
437 437 # 4 bytes: compressed length
438 438 # 4 bytes: base rev
439 439 # 4 bytes: link rev
440 440 # 20 bytes: parent 1 nodeid
441 441 # 20 bytes: parent 2 nodeid
442 442 # 20 bytes: nodeid
443 443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 444 indexformatv0_pack = indexformatv0.pack
445 445 indexformatv0_unpack = indexformatv0.unpack
446 446
447 447 class revlogoldio(object):
448 448 def __init__(self):
449 449 self.size = indexformatv0.size
450 450
451 451 def parseindex(self, data, inline):
452 452 s = self.size
453 453 index = []
454 454 nodemap = {nullid: nullrev}
455 455 n = off = 0
456 456 l = len(data)
457 457 while off + s <= l:
458 458 cur = data[off:off + s]
459 459 off += s
460 460 e = indexformatv0_unpack(cur)
461 461 # transform to revlogv1 format
462 462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 464 index.append(e2)
465 465 nodemap[e[6]] = n
466 466 n += 1
467 467
468 468 # add the magic null revision at -1
469 469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 return index, nodemap, None
472 472
473 473 def packentry(self, entry, node, version, rev):
474 474 if gettype(entry[0]):
475 475 raise RevlogError(_('index entry flags need revlog version 1'))
476 476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 477 node(entry[5]), node(entry[6]), entry[7])
478 478 return indexformatv0_pack(*e2)
479 479
480 480 # index ng:
481 481 # 6 bytes: offset
482 482 # 2 bytes: flags
483 483 # 4 bytes: compressed length
484 484 # 4 bytes: uncompressed length
485 485 # 4 bytes: base rev
486 486 # 4 bytes: link rev
487 487 # 4 bytes: parent 1 rev
488 488 # 4 bytes: parent 2 rev
489 489 # 32 bytes: nodeid
490 490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 491 indexformatng_pack = indexformatng.pack
492 492 versionformat = struct.Struct(">I")
493 493 versionformat_pack = versionformat.pack
494 494 versionformat_unpack = versionformat.unpack
495 495
496 496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 497 # signed integer)
498 498 _maxentrysize = 0x7fffffff
499 499
500 500 class revlogio(object):
501 501 def __init__(self):
502 502 self.size = indexformatng.size
503 503
504 504 def parseindex(self, data, inline):
505 505 # call the C implementation to parse the index data
506 506 index, cache = parsers.parse_index2(data, inline)
507 507 return index, getattr(index, 'nodemap', None), cache
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 p = indexformatng_pack(*entry)
511 511 if rev == 0:
512 512 p = versionformat_pack(version) + p[4:]
513 513 return p
514 514
515 515 class revlog(object):
516 516 """
517 517 the underlying revision storage object
518 518
519 519 A revlog consists of two parts, an index and the revision data.
520 520
521 521 The index is a file with a fixed record size containing
522 522 information on each revision, including its nodeid (hash), the
523 523 nodeids of its parents, the position and offset of its data within
524 524 the data file, and the revision it's based on. Finally, each entry
525 525 contains a linkrev entry that can serve as a pointer to external
526 526 data.
527 527
528 528 The revision data itself is a linear collection of data chunks.
529 529 Each chunk represents a revision and is usually represented as a
530 530 delta against the previous chunk. To bound lookup time, runs of
531 531 deltas are limited to about 2 times the length of the original
532 532 version data. This makes retrieval of a version proportional to
533 533 its size, or O(1) relative to the number of revisions.
534 534
535 535 Both pieces of the revlog are written to in an append-only
536 536 fashion, which means we never need to rewrite a file to insert or
537 537 remove data, and can use some simple techniques to avoid the need
538 538 for locking while reading.
539 539
540 540 If checkambig, indexfile is opened with checkambig=True at
541 541 writing, to avoid file stat ambiguity.
542 542
543 543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 544 index will be mmapped rather than read if it is larger than the
545 545 configured threshold.
546 546 """
547 547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 548 mmaplargeindex=False):
549 549 """
550 550 create a revlog object
551 551
552 552 opener is a function that abstracts the file opening operation
553 553 and can be used to implement COW semantics or the like.
554 554 """
555 555 self.indexfile = indexfile
556 556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 557 self.opener = opener
558 558 # When True, indexfile is opened with checkambig=True at writing, to
559 559 # avoid file stat ambiguity.
560 560 self._checkambig = checkambig
561 561 # 3-tuple of (node, rev, text) for a raw revision.
562 562 self._cache = None
563 563 # Maps rev to chain base rev.
564 564 self._chainbasecache = util.lrucachedict(100)
565 565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 566 self._chunkcache = (0, '')
567 567 # How much data to read and cache into the raw revlog data cache.
568 568 self._chunkcachesize = 65536
569 569 self._maxchainlen = None
570 570 self._aggressivemergedeltas = False
571 571 self.index = []
572 572 # Mapping of partial identifiers to full nodes.
573 573 self._pcache = {}
574 574 # Mapping of revision integer to full node.
575 575 self._nodecache = {nullid: nullrev}
576 576 self._nodepos = None
577 577 self._compengine = 'zlib'
578 578 self._maxdeltachainspan = -1
579 579 self._withsparseread = False
580 580 self._srdensitythreshold = 0.25
581 581 self._srmingapsize = 262144
582 582
583 583 mmapindexthreshold = None
584 584 v = REVLOG_DEFAULT_VERSION
585 585 opts = getattr(opener, 'options', None)
586 586 if opts is not None:
587 587 if 'revlogv2' in opts:
588 588 # version 2 revlogs always use generaldelta.
589 589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 590 elif 'revlogv1' in opts:
591 591 if 'generaldelta' in opts:
592 592 v |= FLAG_GENERALDELTA
593 593 else:
594 594 v = 0
595 595 if 'chunkcachesize' in opts:
596 596 self._chunkcachesize = opts['chunkcachesize']
597 597 if 'maxchainlen' in opts:
598 598 self._maxchainlen = opts['maxchainlen']
599 599 if 'aggressivemergedeltas' in opts:
600 600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 602 if 'compengine' in opts:
603 603 self._compengine = opts['compengine']
604 604 if 'maxdeltachainspan' in opts:
605 605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 607 mmapindexthreshold = opts['mmapindexthreshold']
608 608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 609 if 'sparse-read-density-threshold' in opts:
610 610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 611 if 'sparse-read-min-gap-size' in opts:
612 612 self._srmingapsize = opts['sparse-read-min-gap-size']
613 613
614 614 if self._chunkcachesize <= 0:
615 615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 616 'than 0') % self._chunkcachesize)
617 617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 619 'of 2') % self._chunkcachesize)
620 620
621 621 indexdata = ''
622 622 self._initempty = True
623 623 try:
624 624 f = self.opener(self.indexfile)
625 625 if (mmapindexthreshold is not None and
626 626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 627 indexdata = util.buffer(util.mmapread(f))
628 628 else:
629 629 indexdata = f.read()
630 630 f.close()
631 631 if len(indexdata) > 0:
632 632 v = versionformat_unpack(indexdata[:4])[0]
633 633 self._initempty = False
634 634 except IOError as inst:
635 635 if inst.errno != errno.ENOENT:
636 636 raise
637 637
638 638 self.version = v
639 639 self._inline = v & FLAG_INLINE_DATA
640 640 self._generaldelta = v & FLAG_GENERALDELTA
641 641 flags = v & ~0xFFFF
642 642 fmt = v & 0xFFFF
643 643 if fmt == REVLOGV0:
644 644 if flags:
645 645 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 646 'revlog %s') %
647 647 (flags >> 16, fmt, self.indexfile))
648 648 elif fmt == REVLOGV1:
649 649 if flags & ~REVLOGV1_FLAGS:
650 650 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 651 'revlog %s') %
652 652 (flags >> 16, fmt, self.indexfile))
653 653 elif fmt == REVLOGV2:
654 654 if flags & ~REVLOGV2_FLAGS:
655 655 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 656 'revlog %s') %
657 657 (flags >> 16, fmt, self.indexfile))
658 658 else:
659 659 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 660 (fmt, self.indexfile))
661 661
662 662 self.storedeltachains = True
663 663
664 664 self._io = revlogio()
665 665 if self.version == REVLOGV0:
666 666 self._io = revlogoldio()
667 667 try:
668 668 d = self._io.parseindex(indexdata, self._inline)
669 669 except (ValueError, IndexError):
670 670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 671 self.index, nodemap, self._chunkcache = d
672 672 if nodemap is not None:
673 673 self.nodemap = self._nodecache = nodemap
674 674 if not self._chunkcache:
675 675 self._chunkclear()
676 676 # revnum -> (chain-length, sum-delta-length)
677 677 self._chaininfocache = {}
678 678 # revlog header -> revlog compressor
679 679 self._decompressors = {}
680 680
681 681 @util.propertycache
682 682 def _compressor(self):
683 683 return util.compengines[self._compengine].revlogcompressor()
684 684
685 685 def tip(self):
686 686 return self.node(len(self.index) - 2)
687 687 def __contains__(self, rev):
688 688 return 0 <= rev < len(self)
689 689 def __len__(self):
690 690 return len(self.index) - 1
691 691 def __iter__(self):
692 692 return iter(xrange(len(self)))
693 693 def revs(self, start=0, stop=None):
694 694 """iterate over all rev in this revlog (from start to stop)"""
695 695 step = 1
696 696 if stop is not None:
697 697 if start > stop:
698 698 step = -1
699 699 stop += step
700 700 else:
701 701 stop = len(self)
702 702 return xrange(start, stop, step)
703 703
704 704 @util.propertycache
705 705 def nodemap(self):
706 706 self.rev(self.node(0))
707 707 return self._nodecache
708 708
709 709 def hasnode(self, node):
710 710 try:
711 711 self.rev(node)
712 712 return True
713 713 except KeyError:
714 714 return False
715 715
716 716 def clearcaches(self):
717 717 self._cache = None
718 718 self._chainbasecache.clear()
719 719 self._chunkcache = (0, '')
720 720 self._pcache = {}
721 721
722 722 try:
723 723 self._nodecache.clearcaches()
724 724 except AttributeError:
725 725 self._nodecache = {nullid: nullrev}
726 726 self._nodepos = None
727 727
728 728 def rev(self, node):
729 729 try:
730 730 return self._nodecache[node]
731 731 except TypeError:
732 732 raise
733 733 except RevlogError:
734 734 # parsers.c radix tree lookup failed
735 735 if node == wdirid:
736 736 raise error.WdirUnsupported
737 737 raise LookupError(node, self.indexfile, _('no node'))
738 738 except KeyError:
739 739 # pure python cache lookup failed
740 740 n = self._nodecache
741 741 i = self.index
742 742 p = self._nodepos
743 743 if p is None:
744 744 p = len(i) - 2
745 745 for r in xrange(p, -1, -1):
746 746 v = i[r][7]
747 747 n[v] = r
748 748 if v == node:
749 749 self._nodepos = r - 1
750 750 return r
751 751 if node == wdirid:
752 752 raise error.WdirUnsupported
753 753 raise LookupError(node, self.indexfile, _('no node'))
754 754
755 755 # Accessors for index entries.
756 756
757 757 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
758 758 # are flags.
759 759 def start(self, rev):
760 760 return int(self.index[rev][0] >> 16)
761 761
762 762 def flags(self, rev):
763 763 return self.index[rev][0] & 0xFFFF
764 764
765 765 def length(self, rev):
766 766 return self.index[rev][1]
767 767
768 768 def rawsize(self, rev):
769 769 """return the length of the uncompressed text for a given revision"""
770 770 l = self.index[rev][2]
771 771 if l >= 0:
772 772 return l
773 773
774 774 t = self.revision(rev, raw=True)
775 775 return len(t)
776 776
777 777 def size(self, rev):
778 778 """length of non-raw text (processed by a "read" flag processor)"""
779 779 # fast path: if no "read" flag processor could change the content,
780 780 # size is rawsize. note: ELLIPSIS is known to not change the content.
781 781 flags = self.flags(rev)
782 782 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
783 783 return self.rawsize(rev)
784 784
785 785 return len(self.revision(rev, raw=False))
786 786
787 787 def chainbase(self, rev):
788 788 base = self._chainbasecache.get(rev)
789 789 if base is not None:
790 790 return base
791 791
792 792 index = self.index
793 793 base = index[rev][3]
794 794 while base != rev:
795 795 rev = base
796 796 base = index[rev][3]
797 797
798 798 self._chainbasecache[rev] = base
799 799 return base
800 800
801 801 def linkrev(self, rev):
802 802 return self.index[rev][4]
803 803
804 804 def parentrevs(self, rev):
805 805 try:
806 806 entry = self.index[rev]
807 807 except IndexError:
808 808 if rev == wdirrev:
809 809 raise error.WdirUnsupported
810 810 raise
811 811
812 812 return entry[5], entry[6]
813 813
814 814 def node(self, rev):
815 815 try:
816 816 return self.index[rev][7]
817 817 except IndexError:
818 818 if rev == wdirrev:
819 819 raise error.WdirUnsupported
820 820 raise
821 821
822 822 # Derived from index values.
823 823
824 824 def end(self, rev):
825 825 return self.start(rev) + self.length(rev)
826 826
827 827 def parents(self, node):
828 828 i = self.index
829 829 d = i[self.rev(node)]
830 830 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
831 831
832 832 def chainlen(self, rev):
833 833 return self._chaininfo(rev)[0]
834 834
835 835 def _chaininfo(self, rev):
836 836 chaininfocache = self._chaininfocache
837 837 if rev in chaininfocache:
838 838 return chaininfocache[rev]
839 839 index = self.index
840 840 generaldelta = self._generaldelta
841 841 iterrev = rev
842 842 e = index[iterrev]
843 843 clen = 0
844 844 compresseddeltalen = 0
845 845 while iterrev != e[3]:
846 846 clen += 1
847 847 compresseddeltalen += e[1]
848 848 if generaldelta:
849 849 iterrev = e[3]
850 850 else:
851 851 iterrev -= 1
852 852 if iterrev in chaininfocache:
853 853 t = chaininfocache[iterrev]
854 854 clen += t[0]
855 855 compresseddeltalen += t[1]
856 856 break
857 857 e = index[iterrev]
858 858 else:
859 859 # Add text length of base since decompressing that also takes
860 860 # work. For cache hits the length is already included.
861 861 compresseddeltalen += e[1]
862 862 r = (clen, compresseddeltalen)
863 863 chaininfocache[rev] = r
864 864 return r
865 865
866 866 def _deltachain(self, rev, stoprev=None):
867 867 """Obtain the delta chain for a revision.
868 868
869 869 ``stoprev`` specifies a revision to stop at. If not specified, we
870 870 stop at the base of the chain.
871 871
872 872 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
873 873 revs in ascending order and ``stopped`` is a bool indicating whether
874 874 ``stoprev`` was hit.
875 875 """
876 876 # Try C implementation.
877 877 try:
878 878 return self.index.deltachain(rev, stoprev, self._generaldelta)
879 879 except AttributeError:
880 880 pass
881 881
882 882 chain = []
883 883
884 884 # Alias to prevent attribute lookup in tight loop.
885 885 index = self.index
886 886 generaldelta = self._generaldelta
887 887
888 888 iterrev = rev
889 889 e = index[iterrev]
890 890 while iterrev != e[3] and iterrev != stoprev:
891 891 chain.append(iterrev)
892 892 if generaldelta:
893 893 iterrev = e[3]
894 894 else:
895 895 iterrev -= 1
896 896 e = index[iterrev]
897 897
898 898 if iterrev == stoprev:
899 899 stopped = True
900 900 else:
901 901 chain.append(iterrev)
902 902 stopped = False
903 903
904 904 chain.reverse()
905 905 return chain, stopped
906 906
907 907 def ancestors(self, revs, stoprev=0, inclusive=False):
908 908 """Generate the ancestors of 'revs' in reverse topological order.
909 909 Does not generate revs lower than stoprev.
910 910
911 911 See the documentation for ancestor.lazyancestors for more details."""
912 912
913 913 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
914 914 inclusive=inclusive)
915 915
916 916 def descendants(self, revs):
917 917 """Generate the descendants of 'revs' in revision order.
918 918
919 919 Yield a sequence of revision numbers starting with a child of
920 920 some rev in revs, i.e., each revision is *not* considered a
921 921 descendant of itself. Results are ordered by revision number (a
922 922 topological sort)."""
923 923 first = min(revs)
924 924 if first == nullrev:
925 925 for i in self:
926 926 yield i
927 927 return
928 928
929 929 seen = set(revs)
930 930 for i in self.revs(start=first + 1):
931 931 for x in self.parentrevs(i):
932 932 if x != nullrev and x in seen:
933 933 seen.add(i)
934 934 yield i
935 935 break
936 936
937 937 def findcommonmissing(self, common=None, heads=None):
938 938 """Return a tuple of the ancestors of common and the ancestors of heads
939 939 that are not ancestors of common. In revset terminology, we return the
940 940 tuple:
941 941
942 942 ::common, (::heads) - (::common)
943 943
944 944 The list is sorted by revision number, meaning it is
945 945 topologically sorted.
946 946
947 947 'heads' and 'common' are both lists of node IDs. If heads is
948 948 not supplied, uses all of the revlog's heads. If common is not
949 949 supplied, uses nullid."""
950 950 if common is None:
951 951 common = [nullid]
952 952 if heads is None:
953 953 heads = self.heads()
954 954
955 955 common = [self.rev(n) for n in common]
956 956 heads = [self.rev(n) for n in heads]
957 957
958 958 # we want the ancestors, but inclusive
959 959 class lazyset(object):
960 960 def __init__(self, lazyvalues):
961 961 self.addedvalues = set()
962 962 self.lazyvalues = lazyvalues
963 963
964 964 def __contains__(self, value):
965 965 return value in self.addedvalues or value in self.lazyvalues
966 966
967 967 def __iter__(self):
968 968 added = self.addedvalues
969 969 for r in added:
970 970 yield r
971 971 for r in self.lazyvalues:
972 972 if not r in added:
973 973 yield r
974 974
975 975 def add(self, value):
976 976 self.addedvalues.add(value)
977 977
978 978 def update(self, values):
979 979 self.addedvalues.update(values)
980 980
981 981 has = lazyset(self.ancestors(common))
982 982 has.add(nullrev)
983 983 has.update(common)
984 984
985 985 # take all ancestors from heads that aren't in has
986 986 missing = set()
987 987 visit = collections.deque(r for r in heads if r not in has)
988 988 while visit:
989 989 r = visit.popleft()
990 990 if r in missing:
991 991 continue
992 992 else:
993 993 missing.add(r)
994 994 for p in self.parentrevs(r):
995 995 if p not in has:
996 996 visit.append(p)
997 997 missing = list(missing)
998 998 missing.sort()
999 999 return has, [self.node(miss) for miss in missing]
1000 1000
1001 1001 def incrementalmissingrevs(self, common=None):
1002 1002 """Return an object that can be used to incrementally compute the
1003 1003 revision numbers of the ancestors of arbitrary sets that are not
1004 1004 ancestors of common. This is an ancestor.incrementalmissingancestors
1005 1005 object.
1006 1006
1007 1007 'common' is a list of revision numbers. If common is not supplied, uses
1008 1008 nullrev.
1009 1009 """
1010 1010 if common is None:
1011 1011 common = [nullrev]
1012 1012
1013 1013 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1014 1014
1015 1015 def findmissingrevs(self, common=None, heads=None):
1016 1016 """Return the revision numbers of the ancestors of heads that
1017 1017 are not ancestors of common.
1018 1018
1019 1019 More specifically, return a list of revision numbers corresponding to
1020 1020 nodes N such that every N satisfies the following constraints:
1021 1021
1022 1022 1. N is an ancestor of some node in 'heads'
1023 1023 2. N is not an ancestor of any node in 'common'
1024 1024
1025 1025 The list is sorted by revision number, meaning it is
1026 1026 topologically sorted.
1027 1027
1028 1028 'heads' and 'common' are both lists of revision numbers. If heads is
1029 1029 not supplied, uses all of the revlog's heads. If common is not
1030 1030 supplied, uses nullid."""
1031 1031 if common is None:
1032 1032 common = [nullrev]
1033 1033 if heads is None:
1034 1034 heads = self.headrevs()
1035 1035
1036 1036 inc = self.incrementalmissingrevs(common=common)
1037 1037 return inc.missingancestors(heads)
1038 1038
1039 1039 def findmissing(self, common=None, heads=None):
1040 1040 """Return the ancestors of heads that are not ancestors of common.
1041 1041
1042 1042 More specifically, return a list of nodes N such that every N
1043 1043 satisfies the following constraints:
1044 1044
1045 1045 1. N is an ancestor of some node in 'heads'
1046 1046 2. N is not an ancestor of any node in 'common'
1047 1047
1048 1048 The list is sorted by revision number, meaning it is
1049 1049 topologically sorted.
1050 1050
1051 1051 'heads' and 'common' are both lists of node IDs. If heads is
1052 1052 not supplied, uses all of the revlog's heads. If common is not
1053 1053 supplied, uses nullid."""
1054 1054 if common is None:
1055 1055 common = [nullid]
1056 1056 if heads is None:
1057 1057 heads = self.heads()
1058 1058
1059 1059 common = [self.rev(n) for n in common]
1060 1060 heads = [self.rev(n) for n in heads]
1061 1061
1062 1062 inc = self.incrementalmissingrevs(common=common)
1063 1063 return [self.node(r) for r in inc.missingancestors(heads)]
1064 1064
1065 1065 def nodesbetween(self, roots=None, heads=None):
1066 1066 """Return a topological path from 'roots' to 'heads'.
1067 1067
1068 1068 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1069 1069 topologically sorted list of all nodes N that satisfy both of
1070 1070 these constraints:
1071 1071
1072 1072 1. N is a descendant of some node in 'roots'
1073 1073 2. N is an ancestor of some node in 'heads'
1074 1074
1075 1075 Every node is considered to be both a descendant and an ancestor
1076 1076 of itself, so every reachable node in 'roots' and 'heads' will be
1077 1077 included in 'nodes'.
1078 1078
1079 1079 'outroots' is the list of reachable nodes in 'roots', i.e., the
1080 1080 subset of 'roots' that is returned in 'nodes'. Likewise,
1081 1081 'outheads' is the subset of 'heads' that is also in 'nodes'.
1082 1082
1083 1083 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1084 1084 unspecified, uses nullid as the only root. If 'heads' is
1085 1085 unspecified, uses list of all of the revlog's heads."""
1086 1086 nonodes = ([], [], [])
1087 1087 if roots is not None:
1088 1088 roots = list(roots)
1089 1089 if not roots:
1090 1090 return nonodes
1091 1091 lowestrev = min([self.rev(n) for n in roots])
1092 1092 else:
1093 1093 roots = [nullid] # Everybody's a descendant of nullid
1094 1094 lowestrev = nullrev
1095 1095 if (lowestrev == nullrev) and (heads is None):
1096 1096 # We want _all_ the nodes!
1097 1097 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1098 1098 if heads is None:
1099 1099 # All nodes are ancestors, so the latest ancestor is the last
1100 1100 # node.
1101 1101 highestrev = len(self) - 1
1102 1102 # Set ancestors to None to signal that every node is an ancestor.
1103 1103 ancestors = None
1104 1104 # Set heads to an empty dictionary for later discovery of heads
1105 1105 heads = {}
1106 1106 else:
1107 1107 heads = list(heads)
1108 1108 if not heads:
1109 1109 return nonodes
1110 1110 ancestors = set()
1111 1111 # Turn heads into a dictionary so we can remove 'fake' heads.
1112 1112 # Also, later we will be using it to filter out the heads we can't
1113 1113 # find from roots.
1114 1114 heads = dict.fromkeys(heads, False)
1115 1115 # Start at the top and keep marking parents until we're done.
1116 1116 nodestotag = set(heads)
1117 1117 # Remember where the top was so we can use it as a limit later.
1118 1118 highestrev = max([self.rev(n) for n in nodestotag])
1119 1119 while nodestotag:
1120 1120 # grab a node to tag
1121 1121 n = nodestotag.pop()
1122 1122 # Never tag nullid
1123 1123 if n == nullid:
1124 1124 continue
1125 1125 # A node's revision number represents its place in a
1126 1126 # topologically sorted list of nodes.
1127 1127 r = self.rev(n)
1128 1128 if r >= lowestrev:
1129 1129 if n not in ancestors:
1130 1130 # If we are possibly a descendant of one of the roots
1131 1131 # and we haven't already been marked as an ancestor
1132 1132 ancestors.add(n) # Mark as ancestor
1133 1133 # Add non-nullid parents to list of nodes to tag.
1134 1134 nodestotag.update([p for p in self.parents(n) if
1135 1135 p != nullid])
1136 1136 elif n in heads: # We've seen it before, is it a fake head?
1137 1137 # So it is, real heads should not be the ancestors of
1138 1138 # any other heads.
1139 1139 heads.pop(n)
1140 1140 if not ancestors:
1141 1141 return nonodes
1142 1142 # Now that we have our set of ancestors, we want to remove any
1143 1143 # roots that are not ancestors.
1144 1144
1145 1145 # If one of the roots was nullid, everything is included anyway.
1146 1146 if lowestrev > nullrev:
1147 1147 # But, since we weren't, let's recompute the lowest rev to not
1148 1148 # include roots that aren't ancestors.
1149 1149
1150 1150 # Filter out roots that aren't ancestors of heads
1151 1151 roots = [root for root in roots if root in ancestors]
1152 1152 # Recompute the lowest revision
1153 1153 if roots:
1154 1154 lowestrev = min([self.rev(root) for root in roots])
1155 1155 else:
1156 1156 # No more roots? Return empty list
1157 1157 return nonodes
1158 1158 else:
1159 1159 # We are descending from nullid, and don't need to care about
1160 1160 # any other roots.
1161 1161 lowestrev = nullrev
1162 1162 roots = [nullid]
1163 1163 # Transform our roots list into a set.
1164 1164 descendants = set(roots)
1165 1165 # Also, keep the original roots so we can filter out roots that aren't
1166 1166 # 'real' roots (i.e. are descended from other roots).
1167 1167 roots = descendants.copy()
1168 1168 # Our topologically sorted list of output nodes.
1169 1169 orderedout = []
1170 1170 # Don't start at nullid since we don't want nullid in our output list,
1171 1171 # and if nullid shows up in descendants, empty parents will look like
1172 1172 # they're descendants.
1173 1173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1174 1174 n = self.node(r)
1175 1175 isdescendant = False
1176 1176 if lowestrev == nullrev: # Everybody is a descendant of nullid
1177 1177 isdescendant = True
1178 1178 elif n in descendants:
1179 1179 # n is already a descendant
1180 1180 isdescendant = True
1181 1181 # This check only needs to be done here because all the roots
1182 1182 # will start being marked is descendants before the loop.
1183 1183 if n in roots:
1184 1184 # If n was a root, check if it's a 'real' root.
1185 1185 p = tuple(self.parents(n))
1186 1186 # If any of its parents are descendants, it's not a root.
1187 1187 if (p[0] in descendants) or (p[1] in descendants):
1188 1188 roots.remove(n)
1189 1189 else:
1190 1190 p = tuple(self.parents(n))
1191 1191 # A node is a descendant if either of its parents are
1192 1192 # descendants. (We seeded the dependents list with the roots
1193 1193 # up there, remember?)
1194 1194 if (p[0] in descendants) or (p[1] in descendants):
1195 1195 descendants.add(n)
1196 1196 isdescendant = True
1197 1197 if isdescendant and ((ancestors is None) or (n in ancestors)):
1198 1198 # Only include nodes that are both descendants and ancestors.
1199 1199 orderedout.append(n)
1200 1200 if (ancestors is not None) and (n in heads):
1201 1201 # We're trying to figure out which heads are reachable
1202 1202 # from roots.
1203 1203 # Mark this head as having been reached
1204 1204 heads[n] = True
1205 1205 elif ancestors is None:
1206 1206 # Otherwise, we're trying to discover the heads.
1207 1207 # Assume this is a head because if it isn't, the next step
1208 1208 # will eventually remove it.
1209 1209 heads[n] = True
1210 1210 # But, obviously its parents aren't.
1211 1211 for p in self.parents(n):
1212 1212 heads.pop(p, None)
1213 1213 heads = [head for head, flag in heads.iteritems() if flag]
1214 1214 roots = list(roots)
1215 1215 assert orderedout
1216 1216 assert roots
1217 1217 assert heads
1218 1218 return (orderedout, roots, heads)
1219 1219
1220 1220 def headrevs(self):
1221 1221 try:
1222 1222 return self.index.headrevs()
1223 1223 except AttributeError:
1224 1224 return self._headrevs()
1225 1225
1226 1226 def computephases(self, roots):
1227 1227 return self.index.computephasesmapsets(roots)
1228 1228
1229 1229 def _headrevs(self):
1230 1230 count = len(self)
1231 1231 if not count:
1232 1232 return [nullrev]
1233 1233 # we won't iter over filtered rev so nobody is a head at start
1234 1234 ishead = [0] * (count + 1)
1235 1235 index = self.index
1236 1236 for r in self:
1237 1237 ishead[r] = 1 # I may be an head
1238 1238 e = index[r]
1239 1239 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1240 1240 return [r for r, val in enumerate(ishead) if val]
1241 1241
1242 1242 def heads(self, start=None, stop=None):
1243 1243 """return the list of all nodes that have no children
1244 1244
1245 1245 if start is specified, only heads that are descendants of
1246 1246 start will be returned
1247 1247 if stop is specified, it will consider all the revs from stop
1248 1248 as if they had no children
1249 1249 """
1250 1250 if start is None and stop is None:
1251 1251 if not len(self):
1252 1252 return [nullid]
1253 1253 return [self.node(r) for r in self.headrevs()]
1254 1254
1255 1255 if start is None:
1256 1256 start = nullid
1257 1257 if stop is None:
1258 1258 stop = []
1259 1259 stoprevs = set([self.rev(n) for n in stop])
1260 1260 startrev = self.rev(start)
1261 1261 reachable = {startrev}
1262 1262 heads = {startrev}
1263 1263
1264 1264 parentrevs = self.parentrevs
1265 1265 for r in self.revs(start=startrev + 1):
1266 1266 for p in parentrevs(r):
1267 1267 if p in reachable:
1268 1268 if r not in stoprevs:
1269 1269 reachable.add(r)
1270 1270 heads.add(r)
1271 1271 if p in heads and p not in stoprevs:
1272 1272 heads.remove(p)
1273 1273
1274 1274 return [self.node(r) for r in heads]
1275 1275
1276 1276 def children(self, node):
1277 1277 """find the children of a given node"""
1278 1278 c = []
1279 1279 p = self.rev(node)
1280 1280 for r in self.revs(start=p + 1):
1281 1281 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1282 1282 if prevs:
1283 1283 for pr in prevs:
1284 1284 if pr == p:
1285 1285 c.append(self.node(r))
1286 1286 elif p == nullrev:
1287 1287 c.append(self.node(r))
1288 1288 return c
1289 1289
1290 1290 def descendant(self, start, end):
1291 1291 if start == nullrev:
1292 1292 return True
1293 1293 for i in self.descendants([start]):
1294 1294 if i == end:
1295 1295 return True
1296 1296 elif i > end:
1297 1297 break
1298 1298 return False
1299 1299
1300 1300 def commonancestorsheads(self, a, b):
1301 1301 """calculate all the heads of the common ancestors of nodes a and b"""
1302 1302 a, b = self.rev(a), self.rev(b)
1303 1303 try:
1304 1304 ancs = self.index.commonancestorsheads(a, b)
1305 1305 except (AttributeError, OverflowError): # C implementation failed
1306 1306 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1307 1307 return pycompat.maplist(self.node, ancs)
1308 1308
1309 1309 def isancestor(self, a, b):
1310 1310 """return True if node a is an ancestor of node b
1311 1311
1312 1312 The implementation of this is trivial but the use of
1313 1313 commonancestorsheads is not."""
1314 1314 return a in self.commonancestorsheads(a, b)
1315 1315
1316 1316 def ancestor(self, a, b):
1317 1317 """calculate the "best" common ancestor of nodes a and b"""
1318 1318
1319 1319 a, b = self.rev(a), self.rev(b)
1320 1320 try:
1321 1321 ancs = self.index.ancestors(a, b)
1322 1322 except (AttributeError, OverflowError):
1323 1323 ancs = ancestor.ancestors(self.parentrevs, a, b)
1324 1324 if ancs:
1325 1325 # choose a consistent winner when there's a tie
1326 1326 return min(map(self.node, ancs))
1327 1327 return nullid
1328 1328
1329 1329 def _match(self, id):
1330 1330 if isinstance(id, int):
1331 1331 # rev
1332 1332 return self.node(id)
1333 1333 if len(id) == 20:
1334 1334 # possibly a binary node
1335 1335 # odds of a binary node being all hex in ASCII are 1 in 10**25
1336 1336 try:
1337 1337 node = id
1338 1338 self.rev(node) # quick search the index
1339 1339 return node
1340 1340 except LookupError:
1341 1341 pass # may be partial hex id
1342 1342 try:
1343 1343 # str(rev)
1344 1344 rev = int(id)
1345 1345 if str(rev) != id:
1346 1346 raise ValueError
1347 1347 if rev < 0:
1348 1348 rev = len(self) + rev
1349 1349 if rev < 0 or rev >= len(self):
1350 1350 raise ValueError
1351 1351 return self.node(rev)
1352 1352 except (ValueError, OverflowError):
1353 1353 pass
1354 1354 if len(id) == 40:
1355 1355 try:
1356 1356 # a full hex nodeid?
1357 1357 node = bin(id)
1358 1358 self.rev(node)
1359 1359 return node
1360 1360 except (TypeError, LookupError):
1361 1361 pass
1362 1362
1363 1363 def _partialmatch(self, id):
1364 1364 maybewdir = wdirhex.startswith(id)
1365 1365 try:
1366 1366 partial = self.index.partialmatch(id)
1367 1367 if partial and self.hasnode(partial):
1368 1368 if maybewdir:
1369 1369 # single 'ff...' match in radix tree, ambiguous with wdir
1370 1370 raise RevlogError
1371 1371 return partial
1372 1372 if maybewdir:
1373 1373 # no 'ff...' match in radix tree, wdir identified
1374 1374 raise error.WdirUnsupported
1375 1375 return None
1376 1376 except RevlogError:
1377 1377 # parsers.c radix tree lookup gave multiple matches
1378 1378 # fast path: for unfiltered changelog, radix tree is accurate
1379 1379 if not getattr(self, 'filteredrevs', None):
1380 1380 raise LookupError(id, self.indexfile,
1381 1381 _('ambiguous identifier'))
1382 1382 # fall through to slow path that filters hidden revisions
1383 1383 except (AttributeError, ValueError):
1384 1384 # we are pure python, or key was too short to search radix tree
1385 1385 pass
1386 1386
1387 1387 if id in self._pcache:
1388 1388 return self._pcache[id]
1389 1389
1390 1390 if len(id) < 40:
1391 1391 try:
1392 1392 # hex(node)[:...]
1393 1393 l = len(id) // 2 # grab an even number of digits
1394 1394 prefix = bin(id[:l * 2])
1395 1395 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1396 1396 nl = [n for n in nl if hex(n).startswith(id) and
1397 1397 self.hasnode(n)]
1398 1398 if len(nl) > 0:
1399 1399 if len(nl) == 1 and not maybewdir:
1400 1400 self._pcache[id] = nl[0]
1401 1401 return nl[0]
1402 1402 raise LookupError(id, self.indexfile,
1403 1403 _('ambiguous identifier'))
1404 1404 if maybewdir:
1405 1405 raise error.WdirUnsupported
1406 1406 return None
1407 1407 except (TypeError, binascii.Error):
1408 1408 pass
1409 1409
1410 1410 def lookup(self, id):
1411 1411 """locate a node based on:
1412 1412 - revision number or str(revision number)
1413 1413 - nodeid or subset of hex nodeid
1414 1414 """
1415 1415 n = self._match(id)
1416 1416 if n is not None:
1417 1417 return n
1418 1418 n = self._partialmatch(id)
1419 1419 if n:
1420 1420 return n
1421 1421
1422 1422 raise LookupError(id, self.indexfile, _('no match found'))
1423 1423
1424 1424 def shortest(self, hexnode, minlength=1):
1425 1425 """Find the shortest unambiguous prefix that matches hexnode."""
1426 1426 def isvalid(test):
1427 1427 try:
1428 1428 if self._partialmatch(test) is None:
1429 1429 return False
1430 1430
1431 1431 try:
1432 1432 i = int(test)
1433 1433 # if we are a pure int, then starting with zero will not be
1434 1434 # confused as a rev; or, obviously, if the int is larger
1435 1435 # than the value of the tip rev
1436 1436 if test[0] == '0' or i > len(self):
1437 1437 return True
1438 1438 return False
1439 1439 except ValueError:
1440 1440 return True
1441 1441 except error.RevlogError:
1442 1442 return False
1443 1443 except error.WdirUnsupported:
1444 1444 # single 'ff...' match
1445 1445 return True
1446 1446
1447 1447 shortest = hexnode
1448 1448 startlength = max(6, minlength)
1449 1449 length = startlength
1450 1450 while True:
1451 1451 test = hexnode[:length]
1452 1452 if isvalid(test):
1453 1453 shortest = test
1454 1454 if length == minlength or length > startlength:
1455 1455 return shortest
1456 1456 length -= 1
1457 1457 else:
1458 1458 length += 1
1459 1459 if len(shortest) <= length:
1460 1460 return shortest
1461 1461
1462 1462 def cmp(self, node, text):
1463 1463 """compare text with a given file revision
1464 1464
1465 1465 returns True if text is different than what is stored.
1466 1466 """
1467 1467 p1, p2 = self.parents(node)
1468 1468 return hash(text, p1, p2) != node
1469 1469
1470 1470 def _cachesegment(self, offset, data):
1471 1471 """Add a segment to the revlog cache.
1472 1472
1473 1473 Accepts an absolute offset and the data that is at that location.
1474 1474 """
1475 1475 o, d = self._chunkcache
1476 1476 # try to add to existing cache
1477 1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 1478 self._chunkcache = o, d + data
1479 1479 else:
1480 1480 self._chunkcache = offset, data
1481 1481
1482 1482 def _readsegment(self, offset, length, df=None):
1483 1483 """Load a segment of raw data from the revlog.
1484 1484
1485 1485 Accepts an absolute offset, length to read, and an optional existing
1486 1486 file handle to read from.
1487 1487
1488 1488 If an existing file handle is passed, it will be seeked and the
1489 1489 original seek position will NOT be restored.
1490 1490
1491 1491 Returns a str or buffer of raw byte data.
1492 1492 """
1493 1493 if df is not None:
1494 1494 closehandle = False
1495 1495 else:
1496 1496 if self._inline:
1497 1497 df = self.opener(self.indexfile)
1498 1498 else:
1499 1499 df = self.opener(self.datafile)
1500 1500 closehandle = True
1501 1501
1502 1502 # Cache data both forward and backward around the requested
1503 1503 # data, in a fixed size window. This helps speed up operations
1504 1504 # involving reading the revlog backwards.
1505 1505 cachesize = self._chunkcachesize
1506 1506 realoffset = offset & ~(cachesize - 1)
1507 1507 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1508 1508 - realoffset)
1509 1509 df.seek(realoffset)
1510 1510 d = df.read(reallength)
1511 1511 if closehandle:
1512 1512 df.close()
1513 1513 self._cachesegment(realoffset, d)
1514 1514 if offset != realoffset or reallength != length:
1515 1515 return util.buffer(d, offset - realoffset, length)
1516 1516 return d
1517 1517
1518 1518 def _getsegment(self, offset, length, df=None):
1519 1519 """Obtain a segment of raw data from the revlog.
1520 1520
1521 1521 Accepts an absolute offset, length of bytes to obtain, and an
1522 1522 optional file handle to the already-opened revlog. If the file
1523 1523 handle is used, it's original seek position will not be preserved.
1524 1524
1525 1525 Requests for data may be returned from a cache.
1526 1526
1527 1527 Returns a str or a buffer instance of raw byte data.
1528 1528 """
1529 1529 o, d = self._chunkcache
1530 1530 l = len(d)
1531 1531
1532 1532 # is it in the cache?
1533 1533 cachestart = offset - o
1534 1534 cacheend = cachestart + length
1535 1535 if cachestart >= 0 and cacheend <= l:
1536 1536 if cachestart == 0 and cacheend == l:
1537 1537 return d # avoid a copy
1538 1538 return util.buffer(d, cachestart, cacheend - cachestart)
1539 1539
1540 1540 return self._readsegment(offset, length, df=df)
1541 1541
1542 1542 def _getsegmentforrevs(self, startrev, endrev, df=None):
1543 1543 """Obtain a segment of raw data corresponding to a range of revisions.
1544 1544
1545 1545 Accepts the start and end revisions and an optional already-open
1546 1546 file handle to be used for reading. If the file handle is read, its
1547 1547 seek position will not be preserved.
1548 1548
1549 1549 Requests for data may be satisfied by a cache.
1550 1550
1551 1551 Returns a 2-tuple of (offset, data) for the requested range of
1552 1552 revisions. Offset is the integer offset from the beginning of the
1553 1553 revlog and data is a str or buffer of the raw byte data.
1554 1554
1555 1555 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1556 1556 to determine where each revision's data begins and ends.
1557 1557 """
1558 1558 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1559 1559 # (functions are expensive).
1560 1560 index = self.index
1561 1561 istart = index[startrev]
1562 1562 start = int(istart[0] >> 16)
1563 1563 if startrev == endrev:
1564 1564 end = start + istart[1]
1565 1565 else:
1566 1566 iend = index[endrev]
1567 1567 end = int(iend[0] >> 16) + iend[1]
1568 1568
1569 1569 if self._inline:
1570 1570 start += (startrev + 1) * self._io.size
1571 1571 end += (endrev + 1) * self._io.size
1572 1572 length = end - start
1573 1573
1574 1574 return start, self._getsegment(start, length, df=df)
1575 1575
1576 1576 def _chunk(self, rev, df=None):
1577 1577 """Obtain a single decompressed chunk for a revision.
1578 1578
1579 1579 Accepts an integer revision and an optional already-open file handle
1580 1580 to be used for reading. If used, the seek position of the file will not
1581 1581 be preserved.
1582 1582
1583 1583 Returns a str holding uncompressed data for the requested revision.
1584 1584 """
1585 1585 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1586 1586
1587 1587 def _chunks(self, revs, df=None):
1588 1588 """Obtain decompressed chunks for the specified revisions.
1589 1589
1590 1590 Accepts an iterable of numeric revisions that are assumed to be in
1591 1591 ascending order. Also accepts an optional already-open file handle
1592 1592 to be used for reading. If used, the seek position of the file will
1593 1593 not be preserved.
1594 1594
1595 1595 This function is similar to calling ``self._chunk()`` multiple times,
1596 1596 but is faster.
1597 1597
1598 1598 Returns a list with decompressed data for each requested revision.
1599 1599 """
1600 1600 if not revs:
1601 1601 return []
1602 1602 start = self.start
1603 1603 length = self.length
1604 1604 inline = self._inline
1605 1605 iosize = self._io.size
1606 1606 buffer = util.buffer
1607 1607
1608 1608 l = []
1609 1609 ladd = l.append
1610 1610
1611 1611 if not self._withsparseread:
1612 1612 slicedchunks = (revs,)
1613 1613 else:
1614 1614 slicedchunks = _slicechunk(self, revs)
1615 1615
1616 1616 for revschunk in slicedchunks:
1617 1617 firstrev = revschunk[0]
1618 1618 # Skip trailing revisions with empty diff
1619 1619 for lastrev in revschunk[::-1]:
1620 1620 if length(lastrev) != 0:
1621 1621 break
1622 1622
1623 1623 try:
1624 1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1625 1625 except OverflowError:
1626 1626 # issue4215 - we can't cache a run of chunks greater than
1627 1627 # 2G on Windows
1628 1628 return [self._chunk(rev, df=df) for rev in revschunk]
1629 1629
1630 1630 decomp = self.decompress
1631 1631 for rev in revschunk:
1632 1632 chunkstart = start(rev)
1633 1633 if inline:
1634 1634 chunkstart += (rev + 1) * iosize
1635 1635 chunklength = length(rev)
1636 1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1637 1637
1638 1638 return l
1639 1639
1640 1640 def _chunkclear(self):
1641 1641 """Clear the raw chunk cache."""
1642 1642 self._chunkcache = (0, '')
1643 1643
1644 1644 def deltaparent(self, rev):
1645 1645 """return deltaparent of the given revision"""
1646 1646 base = self.index[rev][3]
1647 1647 if base == rev:
1648 1648 return nullrev
1649 1649 elif self._generaldelta:
1650 1650 return base
1651 1651 else:
1652 1652 return rev - 1
1653 1653
1654 1654 def revdiff(self, rev1, rev2):
1655 1655 """return or calculate a delta between two revisions
1656 1656
1657 1657 The delta calculated is in binary form and is intended to be written to
1658 1658 revlog data directly. So this function needs raw revision data.
1659 1659 """
1660 1660 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1661 1661 return bytes(self._chunk(rev2))
1662 1662
1663 1663 return mdiff.textdiff(self.revision(rev1, raw=True),
1664 1664 self.revision(rev2, raw=True))
1665 1665
1666 1666 def revision(self, nodeorrev, _df=None, raw=False):
1667 1667 """return an uncompressed revision of a given node or revision
1668 1668 number.
1669 1669
1670 1670 _df - an existing file handle to read from. (internal-only)
1671 1671 raw - an optional argument specifying if the revision data is to be
1672 1672 treated as raw data when applying flag transforms. 'raw' should be set
1673 1673 to True when generating changegroups or in debug commands.
1674 1674 """
1675 1675 if isinstance(nodeorrev, int):
1676 1676 rev = nodeorrev
1677 1677 node = self.node(rev)
1678 1678 else:
1679 1679 node = nodeorrev
1680 1680 rev = None
1681 1681
1682 1682 cachedrev = None
1683 1683 flags = None
1684 1684 rawtext = None
1685 1685 if node == nullid:
1686 1686 return ""
1687 1687 if self._cache:
1688 1688 if self._cache[0] == node:
1689 1689 # _cache only stores rawtext
1690 1690 if raw:
1691 1691 return self._cache[2]
1692 1692 # duplicated, but good for perf
1693 1693 if rev is None:
1694 1694 rev = self.rev(node)
1695 1695 if flags is None:
1696 1696 flags = self.flags(rev)
1697 1697 # no extra flags set, no flag processor runs, text = rawtext
1698 1698 if flags == REVIDX_DEFAULT_FLAGS:
1699 1699 return self._cache[2]
1700 1700 # rawtext is reusable. need to run flag processor
1701 1701 rawtext = self._cache[2]
1702 1702
1703 1703 cachedrev = self._cache[1]
1704 1704
1705 1705 # look up what we need to read
1706 1706 if rawtext is None:
1707 1707 if rev is None:
1708 1708 rev = self.rev(node)
1709 1709
1710 1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1711 1711 if stopped:
1712 1712 rawtext = self._cache[2]
1713 1713
1714 1714 # drop cache to save memory
1715 1715 self._cache = None
1716 1716
1717 1717 bins = self._chunks(chain, df=_df)
1718 1718 if rawtext is None:
1719 1719 rawtext = bytes(bins[0])
1720 1720 bins = bins[1:]
1721 1721
1722 1722 rawtext = mdiff.patches(rawtext, bins)
1723 1723 self._cache = (node, rev, rawtext)
1724 1724
1725 1725 if flags is None:
1726 1726 if rev is None:
1727 1727 rev = self.rev(node)
1728 1728 flags = self.flags(rev)
1729 1729
1730 1730 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1731 1731 if validatehash:
1732 1732 self.checkhash(text, node, rev=rev)
1733 1733
1734 1734 return text
1735 1735
1736 1736 def hash(self, text, p1, p2):
1737 1737 """Compute a node hash.
1738 1738
1739 1739 Available as a function so that subclasses can replace the hash
1740 1740 as needed.
1741 1741 """
1742 1742 return hash(text, p1, p2)
1743 1743
1744 1744 def _processflags(self, text, flags, operation, raw=False):
1745 1745 """Inspect revision data flags and applies transforms defined by
1746 1746 registered flag processors.
1747 1747
1748 1748 ``text`` - the revision data to process
1749 1749 ``flags`` - the revision flags
1750 1750 ``operation`` - the operation being performed (read or write)
1751 1751 ``raw`` - an optional argument describing if the raw transform should be
1752 1752 applied.
1753 1753
1754 1754 This method processes the flags in the order (or reverse order if
1755 1755 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1756 1756 flag processors registered for present flags. The order of flags defined
1757 1757 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1758 1758
1759 1759 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1760 1760 processed text and ``validatehash`` is a bool indicating whether the
1761 1761 returned text should be checked for hash integrity.
1762 1762
1763 1763 Note: If the ``raw`` argument is set, it has precedence over the
1764 1764 operation and will only update the value of ``validatehash``.
1765 1765 """
1766 1766 # fast path: no flag processors will run
1767 1767 if flags == 0:
1768 1768 return text, True
1769 1769 if not operation in ('read', 'write'):
1770 1770 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1771 1771 # Check all flags are known.
1772 1772 if flags & ~REVIDX_KNOWN_FLAGS:
1773 1773 raise RevlogError(_("incompatible revision flag '%#x'") %
1774 1774 (flags & ~REVIDX_KNOWN_FLAGS))
1775 1775 validatehash = True
1776 1776 # Depending on the operation (read or write), the order might be
1777 1777 # reversed due to non-commutative transforms.
1778 1778 orderedflags = REVIDX_FLAGS_ORDER
1779 1779 if operation == 'write':
1780 1780 orderedflags = reversed(orderedflags)
1781 1781
1782 1782 for flag in orderedflags:
1783 1783 # If a flagprocessor has been registered for a known flag, apply the
1784 1784 # related operation transform and update result tuple.
1785 1785 if flag & flags:
1786 1786 vhash = True
1787 1787
1788 1788 if flag not in _flagprocessors:
1789 1789 message = _("missing processor for flag '%#x'") % (flag)
1790 1790 raise RevlogError(message)
1791 1791
1792 1792 processor = _flagprocessors[flag]
1793 1793 if processor is not None:
1794 1794 readtransform, writetransform, rawtransform = processor
1795 1795
1796 1796 if raw:
1797 1797 vhash = rawtransform(self, text)
1798 1798 elif operation == 'read':
1799 1799 text, vhash = readtransform(self, text)
1800 1800 else: # write operation
1801 1801 text, vhash = writetransform(self, text)
1802 1802 validatehash = validatehash and vhash
1803 1803
1804 1804 return text, validatehash
1805 1805
1806 1806 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1807 1807 """Check node hash integrity.
1808 1808
1809 1809 Available as a function so that subclasses can extend hash mismatch
1810 1810 behaviors as needed.
1811 1811 """
1812 1812 if p1 is None and p2 is None:
1813 1813 p1, p2 = self.parents(node)
1814 1814 if node != self.hash(text, p1, p2):
1815 1815 revornode = rev
1816 1816 if revornode is None:
1817 1817 revornode = templatefilters.short(hex(node))
1818 1818 raise RevlogError(_("integrity check failed on %s:%s")
1819 1819 % (self.indexfile, pycompat.bytestr(revornode)))
1820 1820
1821 1821 def checkinlinesize(self, tr, fp=None):
1822 1822 """Check if the revlog is too big for inline and convert if so.
1823 1823
1824 1824 This should be called after revisions are added to the revlog. If the
1825 1825 revlog has grown too large to be an inline revlog, it will convert it
1826 1826 to use multiple index and data files.
1827 1827 """
1828 1828 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1829 1829 return
1830 1830
1831 1831 trinfo = tr.find(self.indexfile)
1832 1832 if trinfo is None:
1833 1833 raise RevlogError(_("%s not found in the transaction")
1834 1834 % self.indexfile)
1835 1835
1836 1836 trindex = trinfo[2]
1837 1837 if trindex is not None:
1838 1838 dataoff = self.start(trindex)
1839 1839 else:
1840 1840 # revlog was stripped at start of transaction, use all leftover data
1841 1841 trindex = len(self) - 1
1842 1842 dataoff = self.end(-2)
1843 1843
1844 1844 tr.add(self.datafile, dataoff)
1845 1845
1846 1846 if fp:
1847 1847 fp.flush()
1848 1848 fp.close()
1849 1849
1850 1850 df = self.opener(self.datafile, 'w')
1851 1851 try:
1852 1852 for r in self:
1853 1853 df.write(self._getsegmentforrevs(r, r)[1])
1854 1854 finally:
1855 1855 df.close()
1856 1856
1857 1857 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1858 1858 checkambig=self._checkambig)
1859 1859 self.version &= ~FLAG_INLINE_DATA
1860 1860 self._inline = False
1861 1861 for i in self:
1862 1862 e = self._io.packentry(self.index[i], self.node, self.version, i)
1863 1863 fp.write(e)
1864 1864
1865 1865 # if we don't call close, the temp file will never replace the
1866 1866 # real index
1867 1867 fp.close()
1868 1868
1869 1869 tr.replace(self.indexfile, trindex * self._io.size)
1870 1870 self._chunkclear()
1871 1871
1872 1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1873 1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1874 1874 """add a revision to the log
1875 1875
1876 1876 text - the revision data to add
1877 1877 transaction - the transaction object used for rollback
1878 1878 link - the linkrev data to add
1879 1879 p1, p2 - the parent nodeids of the revision
1880 1880 cachedelta - an optional precomputed delta
1881 1881 node - nodeid of revision; typically node is not specified, and it is
1882 1882 computed by default as hash(text, p1, p2), however subclasses might
1883 1883 use different hashing method (and override checkhash() in such case)
1884 1884 flags - the known flags to set on the revision
1885 1885 deltacomputer - an optional _deltacomputer instance shared between
1886 1886 multiple calls
1887 1887 """
1888 1888 if link == nullrev:
1889 1889 raise RevlogError(_("attempted to add linkrev -1 to %s")
1890 1890 % self.indexfile)
1891 1891
1892 1892 if flags:
1893 1893 node = node or self.hash(text, p1, p2)
1894 1894
1895 1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1896 1896
1897 1897 # If the flag processor modifies the revision data, ignore any provided
1898 1898 # cachedelta.
1899 1899 if rawtext != text:
1900 1900 cachedelta = None
1901 1901
1902 1902 if len(rawtext) > _maxentrysize:
1903 1903 raise RevlogError(
1904 1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1905 1905 % (self.indexfile, len(rawtext)))
1906 1906
1907 1907 node = node or self.hash(rawtext, p1, p2)
1908 1908 if node in self.nodemap:
1909 1909 return node
1910 1910
1911 1911 if validatehash:
1912 1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1913 1913
1914 1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1915 1915 flags, cachedelta=cachedelta,
1916 1916 deltacomputer=deltacomputer)
1917 1917
1918 1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1919 1919 cachedelta=None, deltacomputer=None):
1920 1920 """add a raw revision with known flags, node and parents
1921 1921 useful when reusing a revision not stored in this revlog (ex: received
1922 1922 over wire, or read from an external bundle).
1923 1923 """
1924 1924 dfh = None
1925 1925 if not self._inline:
1926 1926 dfh = self.opener(self.datafile, "a+")
1927 1927 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1928 1928 try:
1929 1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1930 1930 flags, cachedelta, ifh, dfh,
1931 1931 deltacomputer=deltacomputer)
1932 1932 finally:
1933 1933 if dfh:
1934 1934 dfh.close()
1935 1935 ifh.close()
1936 1936
1937 1937 def compress(self, data):
1938 1938 """Generate a possibly-compressed representation of data."""
1939 1939 if not data:
1940 1940 return '', data
1941 1941
1942 1942 compressed = self._compressor.compress(data)
1943 1943
1944 1944 if compressed:
1945 1945 # The revlog compressor added the header in the returned data.
1946 1946 return '', compressed
1947 1947
1948 1948 if data[0:1] == '\0':
1949 1949 return '', data
1950 1950 return 'u', data
1951 1951
1952 1952 def decompress(self, data):
1953 1953 """Decompress a revlog chunk.
1954 1954
1955 1955 The chunk is expected to begin with a header identifying the
1956 1956 format type so it can be routed to an appropriate decompressor.
1957 1957 """
1958 1958 if not data:
1959 1959 return data
1960 1960
1961 1961 # Revlogs are read much more frequently than they are written and many
1962 1962 # chunks only take microseconds to decompress, so performance is
1963 1963 # important here.
1964 1964 #
1965 1965 # We can make a few assumptions about revlogs:
1966 1966 #
1967 1967 # 1) the majority of chunks will be compressed (as opposed to inline
1968 1968 # raw data).
1969 1969 # 2) decompressing *any* data will likely by at least 10x slower than
1970 1970 # returning raw inline data.
1971 1971 # 3) we want to prioritize common and officially supported compression
1972 1972 # engines
1973 1973 #
1974 1974 # It follows that we want to optimize for "decompress compressed data
1975 1975 # when encoded with common and officially supported compression engines"
1976 1976 # case over "raw data" and "data encoded by less common or non-official
1977 1977 # compression engines." That is why we have the inline lookup first
1978 1978 # followed by the compengines lookup.
1979 1979 #
1980 1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1981 1981 # compressed chunks. And this matters for changelog and manifest reads.
1982 1982 t = data[0:1]
1983 1983
1984 1984 if t == 'x':
1985 1985 try:
1986 1986 return _zlibdecompress(data)
1987 1987 except zlib.error as e:
1988 1988 raise RevlogError(_('revlog decompress error: %s') % str(e))
1989 1989 # '\0' is more common than 'u' so it goes first.
1990 1990 elif t == '\0':
1991 1991 return data
1992 1992 elif t == 'u':
1993 1993 return util.buffer(data, 1)
1994 1994
1995 1995 try:
1996 1996 compressor = self._decompressors[t]
1997 1997 except KeyError:
1998 1998 try:
1999 1999 engine = util.compengines.forrevlogheader(t)
2000 2000 compressor = engine.revlogcompressor()
2001 2001 self._decompressors[t] = compressor
2002 2002 except KeyError:
2003 2003 raise RevlogError(_('unknown compression type %r') % t)
2004 2004
2005 2005 return compressor.decompress(data)
2006 2006
2007 2007 def _isgooddeltainfo(self, d, textlen):
2008 2008 """Returns True if the given delta is good. Good means that it is within
2009 2009 the disk span, disk size, and chain length bounds that we know to be
2010 2010 performant."""
2011 2011 if d is None:
2012 2012 return False
2013 2013
2014 2014 # - 'd.distance' is the distance from the base revision -- bounding it
2015 2015 # limits the amount of I/O we need to do.
2016 2016 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2017 2017 # need to apply -- bounding it limits the amount of CPU we consume.
2018 2018
2019 2019 defaultmax = textlen * 4
2020 2020 maxdist = self._maxdeltachainspan
2021 2021 if not maxdist:
2022 2022 maxdist = d.distance # ensure the conditional pass
2023 2023 maxdist = max(maxdist, defaultmax)
2024 2024 if (d.distance > maxdist or d.deltalen > textlen or
2025 2025 d.compresseddeltalen > textlen * 2 or
2026 2026 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2027 2027 return False
2028 2028
2029 2029 return True
2030 2030
2031 2031 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2032 2032 cachedelta, ifh, dfh, alwayscache=False,
2033 2033 deltacomputer=None):
2034 2034 """internal function to add revisions to the log
2035 2035
2036 2036 see addrevision for argument descriptions.
2037 2037
2038 2038 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2039 2039
2040 2040 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2041 2041 be used.
2042 2042
2043 2043 invariants:
2044 2044 - rawtext is optional (can be None); if not set, cachedelta must be set.
2045 2045 if both are set, they must correspond to each other.
2046 2046 """
2047 2047 if node == nullid:
2048 2048 raise RevlogError(_("%s: attempt to add null revision") %
2049 2049 (self.indexfile))
2050 2050 if node == wdirid:
2051 2051 raise RevlogError(_("%s: attempt to add wdir revision") %
2052 2052 (self.indexfile))
2053 2053
2054 2054 if self._inline:
2055 2055 fh = ifh
2056 2056 else:
2057 2057 fh = dfh
2058 2058
2059 2059 btext = [rawtext]
2060 2060
2061 2061 curr = len(self)
2062 2062 prev = curr - 1
2063 2063 offset = self.end(prev)
2064 2064 p1r, p2r = self.rev(p1), self.rev(p2)
2065 2065
2066 2066 # full versions are inserted when the needed deltas
2067 2067 # become comparable to the uncompressed text
2068 2068 if rawtext is None:
2069 2069 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2070 2070 cachedelta[1])
2071 2071 else:
2072 2072 textlen = len(rawtext)
2073 2073
2074 2074 if deltacomputer is None:
2075 2075 deltacomputer = _deltacomputer(self)
2076 2076
2077 2077 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2078 2078 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2079 2079
2080 2080 if deltainfo is not None:
2081 2081 base = deltainfo.base
2082 2082 chainbase = deltainfo.chainbase
2083 2083 data = deltainfo.data
2084 2084 l = deltainfo.deltalen
2085 2085 else:
2086 2086 rawtext = deltacomputer.buildtext(revinfo, fh)
2087 2087 data = self.compress(rawtext)
2088 2088 l = len(data[1]) + len(data[0])
2089 2089 base = chainbase = curr
2090 2090
2091 2091 e = (offset_type(offset, flags), l, textlen,
2092 2092 base, link, p1r, p2r, node)
2093 2093 self.index.insert(-1, e)
2094 2094 self.nodemap[node] = curr
2095 2095
2096 2096 entry = self._io.packentry(e, self.node, self.version, curr)
2097 2097 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2098 2098
2099 2099 if alwayscache and rawtext is None:
2100 2100 rawtext = deltacomputer._buildtext(revinfo, fh)
2101 2101
2102 if type(rawtext) == str: # only accept immutable objects
2102 if type(rawtext) == bytes: # only accept immutable objects
2103 2103 self._cache = (node, curr, rawtext)
2104 2104 self._chainbasecache[curr] = chainbase
2105 2105 return node
2106 2106
2107 2107 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2108 2108 # Files opened in a+ mode have inconsistent behavior on various
2109 2109 # platforms. Windows requires that a file positioning call be made
2110 2110 # when the file handle transitions between reads and writes. See
2111 2111 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2112 2112 # platforms, Python or the platform itself can be buggy. Some versions
2113 2113 # of Solaris have been observed to not append at the end of the file
2114 2114 # if the file was seeked to before the end. See issue4943 for more.
2115 2115 #
2116 2116 # We work around this issue by inserting a seek() before writing.
2117 2117 # Note: This is likely not necessary on Python 3.
2118 2118 ifh.seek(0, os.SEEK_END)
2119 2119 if dfh:
2120 2120 dfh.seek(0, os.SEEK_END)
2121 2121
2122 2122 curr = len(self) - 1
2123 2123 if not self._inline:
2124 2124 transaction.add(self.datafile, offset)
2125 2125 transaction.add(self.indexfile, curr * len(entry))
2126 2126 if data[0]:
2127 2127 dfh.write(data[0])
2128 2128 dfh.write(data[1])
2129 2129 ifh.write(entry)
2130 2130 else:
2131 2131 offset += curr * self._io.size
2132 2132 transaction.add(self.indexfile, offset, curr)
2133 2133 ifh.write(entry)
2134 2134 ifh.write(data[0])
2135 2135 ifh.write(data[1])
2136 2136 self.checkinlinesize(transaction, ifh)
2137 2137
2138 2138 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2139 2139 """
2140 2140 add a delta group
2141 2141
2142 2142 given a set of deltas, add them to the revision log. the
2143 2143 first delta is against its parent, which should be in our
2144 2144 log, the rest are against the previous delta.
2145 2145
2146 2146 If ``addrevisioncb`` is defined, it will be called with arguments of
2147 2147 this revlog and the node that was added.
2148 2148 """
2149 2149
2150 2150 nodes = []
2151 2151
2152 2152 r = len(self)
2153 2153 end = 0
2154 2154 if r:
2155 2155 end = self.end(r - 1)
2156 2156 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2157 2157 isize = r * self._io.size
2158 2158 if self._inline:
2159 2159 transaction.add(self.indexfile, end + isize, r)
2160 2160 dfh = None
2161 2161 else:
2162 2162 transaction.add(self.indexfile, isize, r)
2163 2163 transaction.add(self.datafile, end)
2164 2164 dfh = self.opener(self.datafile, "a+")
2165 2165 def flush():
2166 2166 if dfh:
2167 2167 dfh.flush()
2168 2168 ifh.flush()
2169 2169 try:
2170 2170 deltacomputer = _deltacomputer(self)
2171 2171 # loop through our set of deltas
2172 2172 for data in deltas:
2173 2173 node, p1, p2, linknode, deltabase, delta, flags = data
2174 2174 link = linkmapper(linknode)
2175 2175 flags = flags or REVIDX_DEFAULT_FLAGS
2176 2176
2177 2177 nodes.append(node)
2178 2178
2179 2179 if node in self.nodemap:
2180 2180 # this can happen if two branches make the same change
2181 2181 continue
2182 2182
2183 2183 for p in (p1, p2):
2184 2184 if p not in self.nodemap:
2185 2185 raise LookupError(p, self.indexfile,
2186 2186 _('unknown parent'))
2187 2187
2188 2188 if deltabase not in self.nodemap:
2189 2189 raise LookupError(deltabase, self.indexfile,
2190 2190 _('unknown delta base'))
2191 2191
2192 2192 baserev = self.rev(deltabase)
2193 2193
2194 2194 if baserev != nullrev and self.iscensored(baserev):
2195 2195 # if base is censored, delta must be full replacement in a
2196 2196 # single patch operation
2197 2197 hlen = struct.calcsize(">lll")
2198 2198 oldlen = self.rawsize(baserev)
2199 2199 newlen = len(delta) - hlen
2200 2200 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2201 2201 raise error.CensoredBaseError(self.indexfile,
2202 2202 self.node(baserev))
2203 2203
2204 2204 if not flags and self._peek_iscensored(baserev, delta, flush):
2205 2205 flags |= REVIDX_ISCENSORED
2206 2206
2207 2207 # We assume consumers of addrevisioncb will want to retrieve
2208 2208 # the added revision, which will require a call to
2209 2209 # revision(). revision() will fast path if there is a cache
2210 2210 # hit. So, we tell _addrevision() to always cache in this case.
2211 2211 # We're only using addgroup() in the context of changegroup
2212 2212 # generation so the revision data can always be handled as raw
2213 2213 # by the flagprocessor.
2214 2214 self._addrevision(node, None, transaction, link,
2215 2215 p1, p2, flags, (baserev, delta),
2216 2216 ifh, dfh,
2217 2217 alwayscache=bool(addrevisioncb),
2218 2218 deltacomputer=deltacomputer)
2219 2219
2220 2220 if addrevisioncb:
2221 2221 addrevisioncb(self, node)
2222 2222
2223 2223 if not dfh and not self._inline:
2224 2224 # addrevision switched from inline to conventional
2225 2225 # reopen the index
2226 2226 ifh.close()
2227 2227 dfh = self.opener(self.datafile, "a+")
2228 2228 ifh = self.opener(self.indexfile, "a+",
2229 2229 checkambig=self._checkambig)
2230 2230 finally:
2231 2231 if dfh:
2232 2232 dfh.close()
2233 2233 ifh.close()
2234 2234
2235 2235 return nodes
2236 2236
2237 2237 def iscensored(self, rev):
2238 2238 """Check if a file revision is censored."""
2239 2239 return False
2240 2240
2241 2241 def _peek_iscensored(self, baserev, delta, flush):
2242 2242 """Quickly check if a delta produces a censored revision."""
2243 2243 return False
2244 2244
2245 2245 def getstrippoint(self, minlink):
2246 2246 """find the minimum rev that must be stripped to strip the linkrev
2247 2247
2248 2248 Returns a tuple containing the minimum rev and a set of all revs that
2249 2249 have linkrevs that will be broken by this strip.
2250 2250 """
2251 2251 brokenrevs = set()
2252 2252 strippoint = len(self)
2253 2253
2254 2254 heads = {}
2255 2255 futurelargelinkrevs = set()
2256 2256 for head in self.headrevs():
2257 2257 headlinkrev = self.linkrev(head)
2258 2258 heads[head] = headlinkrev
2259 2259 if headlinkrev >= minlink:
2260 2260 futurelargelinkrevs.add(headlinkrev)
2261 2261
2262 2262 # This algorithm involves walking down the rev graph, starting at the
2263 2263 # heads. Since the revs are topologically sorted according to linkrev,
2264 2264 # once all head linkrevs are below the minlink, we know there are
2265 2265 # no more revs that could have a linkrev greater than minlink.
2266 2266 # So we can stop walking.
2267 2267 while futurelargelinkrevs:
2268 2268 strippoint -= 1
2269 2269 linkrev = heads.pop(strippoint)
2270 2270
2271 2271 if linkrev < minlink:
2272 2272 brokenrevs.add(strippoint)
2273 2273 else:
2274 2274 futurelargelinkrevs.remove(linkrev)
2275 2275
2276 2276 for p in self.parentrevs(strippoint):
2277 2277 if p != nullrev:
2278 2278 plinkrev = self.linkrev(p)
2279 2279 heads[p] = plinkrev
2280 2280 if plinkrev >= minlink:
2281 2281 futurelargelinkrevs.add(plinkrev)
2282 2282
2283 2283 return strippoint, brokenrevs
2284 2284
2285 2285 def strip(self, minlink, transaction):
2286 2286 """truncate the revlog on the first revision with a linkrev >= minlink
2287 2287
2288 2288 This function is called when we're stripping revision minlink and
2289 2289 its descendants from the repository.
2290 2290
2291 2291 We have to remove all revisions with linkrev >= minlink, because
2292 2292 the equivalent changelog revisions will be renumbered after the
2293 2293 strip.
2294 2294
2295 2295 So we truncate the revlog on the first of these revisions, and
2296 2296 trust that the caller has saved the revisions that shouldn't be
2297 2297 removed and that it'll re-add them after this truncation.
2298 2298 """
2299 2299 if len(self) == 0:
2300 2300 return
2301 2301
2302 2302 rev, _ = self.getstrippoint(minlink)
2303 2303 if rev == len(self):
2304 2304 return
2305 2305
2306 2306 # first truncate the files on disk
2307 2307 end = self.start(rev)
2308 2308 if not self._inline:
2309 2309 transaction.add(self.datafile, end)
2310 2310 end = rev * self._io.size
2311 2311 else:
2312 2312 end += rev * self._io.size
2313 2313
2314 2314 transaction.add(self.indexfile, end)
2315 2315
2316 2316 # then reset internal state in memory to forget those revisions
2317 2317 self._cache = None
2318 2318 self._chaininfocache = {}
2319 2319 self._chunkclear()
2320 2320 for x in xrange(rev, len(self)):
2321 2321 del self.nodemap[self.node(x)]
2322 2322
2323 2323 del self.index[rev:-1]
2324 2324
2325 2325 def checksize(self):
2326 2326 expected = 0
2327 2327 if len(self):
2328 2328 expected = max(0, self.end(len(self) - 1))
2329 2329
2330 2330 try:
2331 2331 f = self.opener(self.datafile)
2332 2332 f.seek(0, 2)
2333 2333 actual = f.tell()
2334 2334 f.close()
2335 2335 dd = actual - expected
2336 2336 except IOError as inst:
2337 2337 if inst.errno != errno.ENOENT:
2338 2338 raise
2339 2339 dd = 0
2340 2340
2341 2341 try:
2342 2342 f = self.opener(self.indexfile)
2343 2343 f.seek(0, 2)
2344 2344 actual = f.tell()
2345 2345 f.close()
2346 2346 s = self._io.size
2347 2347 i = max(0, actual // s)
2348 2348 di = actual - (i * s)
2349 2349 if self._inline:
2350 2350 databytes = 0
2351 2351 for r in self:
2352 2352 databytes += max(0, self.length(r))
2353 2353 dd = 0
2354 2354 di = actual - len(self) * s - databytes
2355 2355 except IOError as inst:
2356 2356 if inst.errno != errno.ENOENT:
2357 2357 raise
2358 2358 di = 0
2359 2359
2360 2360 return (dd, di)
2361 2361
2362 2362 def files(self):
2363 2363 res = [self.indexfile]
2364 2364 if not self._inline:
2365 2365 res.append(self.datafile)
2366 2366 return res
2367 2367
2368 2368 DELTAREUSEALWAYS = 'always'
2369 2369 DELTAREUSESAMEREVS = 'samerevs'
2370 2370 DELTAREUSENEVER = 'never'
2371 2371
2372 2372 DELTAREUSEFULLADD = 'fulladd'
2373 2373
2374 2374 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2375 2375
2376 2376 def clone(self, tr, destrevlog, addrevisioncb=None,
2377 2377 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2378 2378 """Copy this revlog to another, possibly with format changes.
2379 2379
2380 2380 The destination revlog will contain the same revisions and nodes.
2381 2381 However, it may not be bit-for-bit identical due to e.g. delta encoding
2382 2382 differences.
2383 2383
2384 2384 The ``deltareuse`` argument control how deltas from the existing revlog
2385 2385 are preserved in the destination revlog. The argument can have the
2386 2386 following values:
2387 2387
2388 2388 DELTAREUSEALWAYS
2389 2389 Deltas will always be reused (if possible), even if the destination
2390 2390 revlog would not select the same revisions for the delta. This is the
2391 2391 fastest mode of operation.
2392 2392 DELTAREUSESAMEREVS
2393 2393 Deltas will be reused if the destination revlog would pick the same
2394 2394 revisions for the delta. This mode strikes a balance between speed
2395 2395 and optimization.
2396 2396 DELTAREUSENEVER
2397 2397 Deltas will never be reused. This is the slowest mode of execution.
2398 2398 This mode can be used to recompute deltas (e.g. if the diff/delta
2399 2399 algorithm changes).
2400 2400
2401 2401 Delta computation can be slow, so the choice of delta reuse policy can
2402 2402 significantly affect run time.
2403 2403
2404 2404 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2405 2405 two extremes. Deltas will be reused if they are appropriate. But if the
2406 2406 delta could choose a better revision, it will do so. This means if you
2407 2407 are converting a non-generaldelta revlog to a generaldelta revlog,
2408 2408 deltas will be recomputed if the delta's parent isn't a parent of the
2409 2409 revision.
2410 2410
2411 2411 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2412 2412 controls whether to compute deltas against both parents for merges.
2413 2413 By default, the current default is used.
2414 2414 """
2415 2415 if deltareuse not in self.DELTAREUSEALL:
2416 2416 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2417 2417
2418 2418 if len(destrevlog):
2419 2419 raise ValueError(_('destination revlog is not empty'))
2420 2420
2421 2421 if getattr(self, 'filteredrevs', None):
2422 2422 raise ValueError(_('source revlog has filtered revisions'))
2423 2423 if getattr(destrevlog, 'filteredrevs', None):
2424 2424 raise ValueError(_('destination revlog has filtered revisions'))
2425 2425
2426 2426 # lazydeltabase controls whether to reuse a cached delta, if possible.
2427 2427 oldlazydeltabase = destrevlog._lazydeltabase
2428 2428 oldamd = destrevlog._aggressivemergedeltas
2429 2429
2430 2430 try:
2431 2431 if deltareuse == self.DELTAREUSEALWAYS:
2432 2432 destrevlog._lazydeltabase = True
2433 2433 elif deltareuse == self.DELTAREUSESAMEREVS:
2434 2434 destrevlog._lazydeltabase = False
2435 2435
2436 2436 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2437 2437
2438 2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2439 2439 self.DELTAREUSESAMEREVS)
2440 2440
2441 2441 deltacomputer = _deltacomputer(destrevlog)
2442 2442 index = self.index
2443 2443 for rev in self:
2444 2444 entry = index[rev]
2445 2445
2446 2446 # Some classes override linkrev to take filtered revs into
2447 2447 # account. Use raw entry from index.
2448 2448 flags = entry[0] & 0xffff
2449 2449 linkrev = entry[4]
2450 2450 p1 = index[entry[5]][7]
2451 2451 p2 = index[entry[6]][7]
2452 2452 node = entry[7]
2453 2453
2454 2454 # (Possibly) reuse the delta from the revlog if allowed and
2455 2455 # the revlog chunk is a delta.
2456 2456 cachedelta = None
2457 2457 rawtext = None
2458 2458 if populatecachedelta:
2459 2459 dp = self.deltaparent(rev)
2460 2460 if dp != nullrev:
2461 2461 cachedelta = (dp, str(self._chunk(rev)))
2462 2462
2463 2463 if not cachedelta:
2464 2464 rawtext = self.revision(rev, raw=True)
2465 2465
2466 2466
2467 2467 if deltareuse == self.DELTAREUSEFULLADD:
2468 2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2469 2469 cachedelta=cachedelta,
2470 2470 node=node, flags=flags,
2471 2471 deltacomputer=deltacomputer)
2472 2472 else:
2473 2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2474 2474 checkambig=False)
2475 2475 dfh = None
2476 2476 if not destrevlog._inline:
2477 2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2478 2478 try:
2479 2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2480 2480 p2, flags, cachedelta, ifh, dfh,
2481 2481 deltacomputer=deltacomputer)
2482 2482 finally:
2483 2483 if dfh:
2484 2484 dfh.close()
2485 2485 ifh.close()
2486 2486
2487 2487 if addrevisioncb:
2488 2488 addrevisioncb(self, rev, node)
2489 2489 finally:
2490 2490 destrevlog._lazydeltabase = oldlazydeltabase
2491 2491 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now