##// END OF EJS Templates
revlog: use context manager for index file life time in __init__...
Boris Feld -
r35987:1f2b8a64 default
parent child Browse files
Show More
@@ -1,2502 +1,2501
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80
81 81 # max size of revlog with inline data
82 82 _maxinline = 131072
83 83 _chunksize = 1048576
84 84
85 85 RevlogError = error.RevlogError
86 86 LookupError = error.LookupError
87 87 CensoredNodeError = error.CensoredNodeError
88 88 ProgrammingError = error.ProgrammingError
89 89
90 90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 91 _flagprocessors = {
92 92 REVIDX_ISCENSORED: None,
93 93 }
94 94
95 95 def addflagprocessor(flag, processor):
96 96 """Register a flag processor on a revision data flag.
97 97
98 98 Invariant:
99 99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 100 - Only one flag processor can be registered on a specific flag.
101 101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 102 following signatures:
103 103 - (read) f(self, rawtext) -> text, bool
104 104 - (write) f(self, text) -> rawtext, bool
105 105 - (raw) f(self, rawtext) -> bool
106 106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 107 directly visible to the user.
108 108 The boolean returned by these transforms is used to determine whether
109 109 the returned text can be used for hash integrity checking. For example,
110 110 if "write" returns False, then "text" is used to generate hash. If
111 111 "write" returns True, that basically means "rawtext" returned by "write"
112 112 should be used to generate hash. Usually, "write" and "read" return
113 113 different booleans. And "raw" returns a same boolean as "write".
114 114
115 115 Note: The 'raw' transform is used for changegroup generation and in some
116 116 debug commands. In this case the transform only indicates whether the
117 117 contents can be used for hash integrity checks.
118 118 """
119 119 if not flag & REVIDX_KNOWN_FLAGS:
120 120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag not in REVIDX_FLAGS_ORDER:
123 123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag in _flagprocessors:
126 126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 127 raise error.Abort(msg)
128 128 _flagprocessors[flag] = processor
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 _nullhash = hashlib.sha1(nullid)
142 142
143 143 def hash(text, p1, p2):
144 144 """generate a hash from the given text and its parent hashes
145 145
146 146 This hash combines both the current file contents and its history
147 147 in a manner that makes it easy to distinguish nodes with the same
148 148 content in the revision graph.
149 149 """
150 150 # As of now, if one of the parent node is null, p2 is null
151 151 if p2 == nullid:
152 152 # deep copy of a hash is faster than creating one
153 153 s = _nullhash.copy()
154 154 s.update(p1)
155 155 else:
156 156 # none of the parent nodes are nullid
157 157 if p1 < p2:
158 158 a = p1
159 159 b = p2
160 160 else:
161 161 a = p2
162 162 b = p1
163 163 s = hashlib.sha1(a)
164 164 s.update(b)
165 165 s.update(text)
166 166 return s.digest()
167 167
168 168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 169 """returns revs[startidx:endidx] without empty trailing revs
170 170 """
171 171 length = revlog.length
172 172
173 173 if endidx is None:
174 174 endidx = len(revs)
175 175
176 176 # Trim empty revs at the end, but never the very first revision of a chain
177 177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 178 endidx -= 1
179 179
180 180 return revs[startidx:endidx]
181 181
182 182 def _slicechunk(revlog, revs):
183 183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 184
185 185 ``revs`` is sliced into groups that should be read in one time.
186 186 Assume that revs are sorted.
187 187 """
188 188 start = revlog.start
189 189 length = revlog.length
190 190
191 191 if len(revs) <= 1:
192 192 yield revs
193 193 return
194 194
195 195 startbyte = start(revs[0])
196 196 endbyte = start(revs[-1]) + length(revs[-1])
197 197 readdata = deltachainspan = endbyte - startbyte
198 198
199 199 chainpayload = sum(length(r) for r in revs)
200 200
201 201 if deltachainspan:
202 202 density = chainpayload / float(deltachainspan)
203 203 else:
204 204 density = 1.0
205 205
206 206 # Store the gaps in a heap to have them sorted by decreasing size
207 207 gapsheap = []
208 208 heapq.heapify(gapsheap)
209 209 prevend = None
210 210 for i, rev in enumerate(revs):
211 211 revstart = start(rev)
212 212 revlen = length(rev)
213 213
214 214 # Skip empty revisions to form larger holes
215 215 if revlen == 0:
216 216 continue
217 217
218 218 if prevend is not None:
219 219 gapsize = revstart - prevend
220 220 # only consider holes that are large enough
221 221 if gapsize > revlog._srmingapsize:
222 222 heapq.heappush(gapsheap, (-gapsize, i))
223 223
224 224 prevend = revstart + revlen
225 225
226 226 # Collect the indices of the largest holes until the density is acceptable
227 227 indicesheap = []
228 228 heapq.heapify(indicesheap)
229 229 while gapsheap and density < revlog._srdensitythreshold:
230 230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 231
232 232 heapq.heappush(indicesheap, gapidx)
233 233
234 234 # the gap sizes are stored as negatives to be sorted decreasingly
235 235 # by the heap
236 236 readdata -= (-oppgapsize)
237 237 if readdata > 0:
238 238 density = chainpayload / float(readdata)
239 239 else:
240 240 density = 1.0
241 241
242 242 # Cut the revs at collected indices
243 243 previdx = 0
244 244 while indicesheap:
245 245 idx = heapq.heappop(indicesheap)
246 246
247 247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 248 if chunk:
249 249 yield chunk
250 250
251 251 previdx = idx
252 252
253 253 chunk = _trimchunk(revlog, revs, previdx)
254 254 if chunk:
255 255 yield chunk
256 256
257 257 @attr.s(slots=True, frozen=True)
258 258 class _deltainfo(object):
259 259 distance = attr.ib()
260 260 deltalen = attr.ib()
261 261 data = attr.ib()
262 262 base = attr.ib()
263 263 chainbase = attr.ib()
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 267 class _deltacomputer(object):
268 268 def __init__(self, revlog):
269 269 self.revlog = revlog
270 270
271 271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 272 """
273 273 Provides revisions that present an interest to be diffed against,
274 274 grouped by level of easiness.
275 275 """
276 276 revlog = self.revlog
277 277 curr = len(revlog)
278 278 prev = curr - 1
279 279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 280
281 281 # should we try to build a delta?
282 282 if prev != nullrev and revlog.storedeltachains:
283 283 tested = set()
284 284 # This condition is true most of the time when processing
285 285 # changegroup data into a generaldelta repo. The only time it
286 286 # isn't true is if this is the first revision in a delta chain
287 287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 289 # Assume what we received from the server is a good choice
290 290 # build delta will reuse the cache
291 291 yield (cachedelta[0],)
292 292 tested.add(cachedelta[0])
293 293
294 294 if revlog._generaldelta:
295 295 # exclude already lazy tested base if any
296 296 parents = [p for p in (p1r, p2r)
297 297 if p != nullrev and p not in tested]
298 298 if parents and not revlog._aggressivemergedeltas:
299 299 # Pick whichever parent is closer to us (to minimize the
300 300 # chance of having to build a fulltext).
301 301 parents = [max(parents)]
302 302 tested.update(parents)
303 303 yield parents
304 304
305 305 if prev not in tested:
306 306 # other approach failed try against prev to hopefully save us a
307 307 # fulltext.
308 308 yield (prev,)
309 309
310 310 def buildtext(self, revinfo, fh):
311 311 """Builds a fulltext version of a revision
312 312
313 313 revinfo: _revisioninfo instance that contains all needed info
314 314 fh: file handle to either the .i or the .d revlog file,
315 315 depending on whether it is inlined or not
316 316 """
317 317 btext = revinfo.btext
318 318 if btext[0] is not None:
319 319 return btext[0]
320 320
321 321 revlog = self.revlog
322 322 cachedelta = revinfo.cachedelta
323 323 flags = revinfo.flags
324 324 node = revinfo.node
325 325
326 326 baserev = cachedelta[0]
327 327 delta = cachedelta[1]
328 328 # special case deltas which replace entire base; no need to decode
329 329 # base revision. this neatly avoids censored bases, which throw when
330 330 # they're decoded.
331 331 hlen = struct.calcsize(">lll")
332 332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 333 len(delta) - hlen):
334 334 btext[0] = delta[hlen:]
335 335 else:
336 336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 337 btext[0] = mdiff.patch(basetext, delta)
338 338
339 339 try:
340 340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 341 btext[0], validatehash = res
342 342 if validatehash:
343 343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 344 if flags & REVIDX_ISCENSORED:
345 345 raise RevlogError(_('node %s is not censored') % node)
346 346 except CensoredNodeError:
347 347 # must pass the censored index flag to add censored revisions
348 348 if not flags & REVIDX_ISCENSORED:
349 349 raise
350 350 return btext[0]
351 351
352 352 def _builddeltadiff(self, base, revinfo, fh):
353 353 revlog = self.revlog
354 354 t = self.buildtext(revinfo, fh)
355 355 if revlog.iscensored(base):
356 356 # deltas based on a censored revision must replace the
357 357 # full content in one patch, so delta works everywhere
358 358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 359 delta = header + t
360 360 else:
361 361 ptext = revlog.revision(base, _df=fh, raw=True)
362 362 delta = mdiff.textdiff(ptext, t)
363 363
364 364 return delta
365 365
366 366 def _builddeltainfo(self, revinfo, base, fh):
367 367 # can we use the cached delta?
368 368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 369 delta = revinfo.cachedelta[1]
370 370 else:
371 371 delta = self._builddeltadiff(base, revinfo, fh)
372 372 revlog = self.revlog
373 373 header, data = revlog.compress(delta)
374 374 deltalen = len(header) + len(data)
375 375 chainbase = revlog.chainbase(base)
376 376 offset = revlog.end(len(revlog) - 1)
377 377 dist = deltalen + offset - revlog.start(chainbase)
378 378 if revlog._generaldelta:
379 379 deltabase = base
380 380 else:
381 381 deltabase = chainbase
382 382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 383 chainlen += 1
384 384 compresseddeltalen += deltalen
385 385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 386 chainbase, chainlen, compresseddeltalen)
387 387
388 388 def finddeltainfo(self, revinfo, fh):
389 389 """Find an acceptable delta against a candidate revision
390 390
391 391 revinfo: information about the revision (instance of _revisioninfo)
392 392 fh: file handle to either the .i or the .d revlog file,
393 393 depending on whether it is inlined or not
394 394
395 395 Returns the first acceptable candidate revision, as ordered by
396 396 _getcandidaterevs
397 397 """
398 398 cachedelta = revinfo.cachedelta
399 399 p1 = revinfo.p1
400 400 p2 = revinfo.p2
401 401 revlog = self.revlog
402 402
403 403 deltainfo = None
404 404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 405 nominateddeltas = []
406 406 for candidaterev in candidaterevs:
407 407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 409 nominateddeltas.append(candidatedelta)
410 410 if nominateddeltas:
411 411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 412 break
413 413
414 414 return deltainfo
415 415
416 416 @attr.s(slots=True, frozen=True)
417 417 class _revisioninfo(object):
418 418 """Information about a revision that allows building its fulltext
419 419 node: expected hash of the revision
420 420 p1, p2: parent revs of the revision
421 421 btext: built text cache consisting of a one-element list
422 422 cachedelta: (baserev, uncompressed_delta) or None
423 423 flags: flags associated to the revision storage
424 424
425 425 One of btext[0] or cachedelta must be set.
426 426 """
427 427 node = attr.ib()
428 428 p1 = attr.ib()
429 429 p2 = attr.ib()
430 430 btext = attr.ib()
431 431 textlen = attr.ib()
432 432 cachedelta = attr.ib()
433 433 flags = attr.ib()
434 434
435 435 # index v0:
436 436 # 4 bytes: offset
437 437 # 4 bytes: compressed length
438 438 # 4 bytes: base rev
439 439 # 4 bytes: link rev
440 440 # 20 bytes: parent 1 nodeid
441 441 # 20 bytes: parent 2 nodeid
442 442 # 20 bytes: nodeid
443 443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 444 indexformatv0_pack = indexformatv0.pack
445 445 indexformatv0_unpack = indexformatv0.unpack
446 446
447 447 class revlogoldio(object):
448 448 def __init__(self):
449 449 self.size = indexformatv0.size
450 450
451 451 def parseindex(self, data, inline):
452 452 s = self.size
453 453 index = []
454 454 nodemap = {nullid: nullrev}
455 455 n = off = 0
456 456 l = len(data)
457 457 while off + s <= l:
458 458 cur = data[off:off + s]
459 459 off += s
460 460 e = indexformatv0_unpack(cur)
461 461 # transform to revlogv1 format
462 462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 464 index.append(e2)
465 465 nodemap[e[6]] = n
466 466 n += 1
467 467
468 468 # add the magic null revision at -1
469 469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 return index, nodemap, None
472 472
473 473 def packentry(self, entry, node, version, rev):
474 474 if gettype(entry[0]):
475 475 raise RevlogError(_('index entry flags need revlog version 1'))
476 476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 477 node(entry[5]), node(entry[6]), entry[7])
478 478 return indexformatv0_pack(*e2)
479 479
480 480 # index ng:
481 481 # 6 bytes: offset
482 482 # 2 bytes: flags
483 483 # 4 bytes: compressed length
484 484 # 4 bytes: uncompressed length
485 485 # 4 bytes: base rev
486 486 # 4 bytes: link rev
487 487 # 4 bytes: parent 1 rev
488 488 # 4 bytes: parent 2 rev
489 489 # 32 bytes: nodeid
490 490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 491 indexformatng_pack = indexformatng.pack
492 492 versionformat = struct.Struct(">I")
493 493 versionformat_pack = versionformat.pack
494 494 versionformat_unpack = versionformat.unpack
495 495
496 496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 497 # signed integer)
498 498 _maxentrysize = 0x7fffffff
499 499
500 500 class revlogio(object):
501 501 def __init__(self):
502 502 self.size = indexformatng.size
503 503
504 504 def parseindex(self, data, inline):
505 505 # call the C implementation to parse the index data
506 506 index, cache = parsers.parse_index2(data, inline)
507 507 return index, getattr(index, 'nodemap', None), cache
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 p = indexformatng_pack(*entry)
511 511 if rev == 0:
512 512 p = versionformat_pack(version) + p[4:]
513 513 return p
514 514
515 515 class revlog(object):
516 516 """
517 517 the underlying revision storage object
518 518
519 519 A revlog consists of two parts, an index and the revision data.
520 520
521 521 The index is a file with a fixed record size containing
522 522 information on each revision, including its nodeid (hash), the
523 523 nodeids of its parents, the position and offset of its data within
524 524 the data file, and the revision it's based on. Finally, each entry
525 525 contains a linkrev entry that can serve as a pointer to external
526 526 data.
527 527
528 528 The revision data itself is a linear collection of data chunks.
529 529 Each chunk represents a revision and is usually represented as a
530 530 delta against the previous chunk. To bound lookup time, runs of
531 531 deltas are limited to about 2 times the length of the original
532 532 version data. This makes retrieval of a version proportional to
533 533 its size, or O(1) relative to the number of revisions.
534 534
535 535 Both pieces of the revlog are written to in an append-only
536 536 fashion, which means we never need to rewrite a file to insert or
537 537 remove data, and can use some simple techniques to avoid the need
538 538 for locking while reading.
539 539
540 540 If checkambig, indexfile is opened with checkambig=True at
541 541 writing, to avoid file stat ambiguity.
542 542
543 543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 544 index will be mmapped rather than read if it is larger than the
545 545 configured threshold.
546 546 """
547 547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 548 mmaplargeindex=False):
549 549 """
550 550 create a revlog object
551 551
552 552 opener is a function that abstracts the file opening operation
553 553 and can be used to implement COW semantics or the like.
554 554 """
555 555 self.indexfile = indexfile
556 556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 557 self.opener = opener
558 558 # When True, indexfile is opened with checkambig=True at writing, to
559 559 # avoid file stat ambiguity.
560 560 self._checkambig = checkambig
561 561 # 3-tuple of (node, rev, text) for a raw revision.
562 562 self._cache = None
563 563 # Maps rev to chain base rev.
564 564 self._chainbasecache = util.lrucachedict(100)
565 565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 566 self._chunkcache = (0, '')
567 567 # How much data to read and cache into the raw revlog data cache.
568 568 self._chunkcachesize = 65536
569 569 self._maxchainlen = None
570 570 self._aggressivemergedeltas = False
571 571 self.index = []
572 572 # Mapping of partial identifiers to full nodes.
573 573 self._pcache = {}
574 574 # Mapping of revision integer to full node.
575 575 self._nodecache = {nullid: nullrev}
576 576 self._nodepos = None
577 577 self._compengine = 'zlib'
578 578 self._maxdeltachainspan = -1
579 579 self._withsparseread = False
580 580 self._srdensitythreshold = 0.25
581 581 self._srmingapsize = 262144
582 582
583 583 mmapindexthreshold = None
584 584 v = REVLOG_DEFAULT_VERSION
585 585 opts = getattr(opener, 'options', None)
586 586 if opts is not None:
587 587 if 'revlogv2' in opts:
588 588 # version 2 revlogs always use generaldelta.
589 589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 590 elif 'revlogv1' in opts:
591 591 if 'generaldelta' in opts:
592 592 v |= FLAG_GENERALDELTA
593 593 else:
594 594 v = 0
595 595 if 'chunkcachesize' in opts:
596 596 self._chunkcachesize = opts['chunkcachesize']
597 597 if 'maxchainlen' in opts:
598 598 self._maxchainlen = opts['maxchainlen']
599 599 if 'aggressivemergedeltas' in opts:
600 600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 602 if 'compengine' in opts:
603 603 self._compengine = opts['compengine']
604 604 if 'maxdeltachainspan' in opts:
605 605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 607 mmapindexthreshold = opts['mmapindexthreshold']
608 608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 609 if 'sparse-read-density-threshold' in opts:
610 610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 611 if 'sparse-read-min-gap-size' in opts:
612 612 self._srmingapsize = opts['sparse-read-min-gap-size']
613 613
614 614 if self._chunkcachesize <= 0:
615 615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 616 'than 0') % self._chunkcachesize)
617 617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 619 'of 2') % self._chunkcachesize)
620 620
621 621 indexdata = ''
622 622 self._initempty = True
623 623 try:
624 f = self._indexfp()
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
628 else:
629 indexdata = f.read()
630 f.close()
624 with self._indexfp() as f:
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
628 else:
629 indexdata = f.read()
631 630 if len(indexdata) > 0:
632 631 v = versionformat_unpack(indexdata[:4])[0]
633 632 self._initempty = False
634 633 except IOError as inst:
635 634 if inst.errno != errno.ENOENT:
636 635 raise
637 636
638 637 self.version = v
639 638 self._inline = v & FLAG_INLINE_DATA
640 639 self._generaldelta = v & FLAG_GENERALDELTA
641 640 flags = v & ~0xFFFF
642 641 fmt = v & 0xFFFF
643 642 if fmt == REVLOGV0:
644 643 if flags:
645 644 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 645 'revlog %s') %
647 646 (flags >> 16, fmt, self.indexfile))
648 647 elif fmt == REVLOGV1:
649 648 if flags & ~REVLOGV1_FLAGS:
650 649 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 650 'revlog %s') %
652 651 (flags >> 16, fmt, self.indexfile))
653 652 elif fmt == REVLOGV2:
654 653 if flags & ~REVLOGV2_FLAGS:
655 654 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 655 'revlog %s') %
657 656 (flags >> 16, fmt, self.indexfile))
658 657 else:
659 658 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 659 (fmt, self.indexfile))
661 660
662 661 self.storedeltachains = True
663 662
664 663 self._io = revlogio()
665 664 if self.version == REVLOGV0:
666 665 self._io = revlogoldio()
667 666 try:
668 667 d = self._io.parseindex(indexdata, self._inline)
669 668 except (ValueError, IndexError):
670 669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 670 self.index, nodemap, self._chunkcache = d
672 671 if nodemap is not None:
673 672 self.nodemap = self._nodecache = nodemap
674 673 if not self._chunkcache:
675 674 self._chunkclear()
676 675 # revnum -> (chain-length, sum-delta-length)
677 676 self._chaininfocache = {}
678 677 # revlog header -> revlog compressor
679 678 self._decompressors = {}
680 679
681 680 @util.propertycache
682 681 def _compressor(self):
683 682 return util.compengines[self._compengine].revlogcompressor()
684 683
685 684 def _indexfp(self, mode='r'):
686 685 """file object for the revlog's index file"""
687 686 args = {r'mode': mode}
688 687 if mode != 'r':
689 688 args[r'checkambig'] = self._checkambig
690 689 if mode == 'w':
691 690 args[r'atomictemp'] = True
692 691 return self.opener(self.indexfile, **args)
693 692
694 693 def _datafp(self, mode='r'):
695 694 """file object for the revlog's data file"""
696 695 return self.opener(self.datafile, mode=mode)
697 696
698 697 def tip(self):
699 698 return self.node(len(self.index) - 2)
700 699 def __contains__(self, rev):
701 700 return 0 <= rev < len(self)
702 701 def __len__(self):
703 702 return len(self.index) - 1
704 703 def __iter__(self):
705 704 return iter(xrange(len(self)))
706 705 def revs(self, start=0, stop=None):
707 706 """iterate over all rev in this revlog (from start to stop)"""
708 707 step = 1
709 708 if stop is not None:
710 709 if start > stop:
711 710 step = -1
712 711 stop += step
713 712 else:
714 713 stop = len(self)
715 714 return xrange(start, stop, step)
716 715
717 716 @util.propertycache
718 717 def nodemap(self):
719 718 self.rev(self.node(0))
720 719 return self._nodecache
721 720
722 721 def hasnode(self, node):
723 722 try:
724 723 self.rev(node)
725 724 return True
726 725 except KeyError:
727 726 return False
728 727
729 728 def clearcaches(self):
730 729 self._cache = None
731 730 self._chainbasecache.clear()
732 731 self._chunkcache = (0, '')
733 732 self._pcache = {}
734 733
735 734 try:
736 735 self._nodecache.clearcaches()
737 736 except AttributeError:
738 737 self._nodecache = {nullid: nullrev}
739 738 self._nodepos = None
740 739
741 740 def rev(self, node):
742 741 try:
743 742 return self._nodecache[node]
744 743 except TypeError:
745 744 raise
746 745 except RevlogError:
747 746 # parsers.c radix tree lookup failed
748 747 if node == wdirid:
749 748 raise error.WdirUnsupported
750 749 raise LookupError(node, self.indexfile, _('no node'))
751 750 except KeyError:
752 751 # pure python cache lookup failed
753 752 n = self._nodecache
754 753 i = self.index
755 754 p = self._nodepos
756 755 if p is None:
757 756 p = len(i) - 2
758 757 for r in xrange(p, -1, -1):
759 758 v = i[r][7]
760 759 n[v] = r
761 760 if v == node:
762 761 self._nodepos = r - 1
763 762 return r
764 763 if node == wdirid:
765 764 raise error.WdirUnsupported
766 765 raise LookupError(node, self.indexfile, _('no node'))
767 766
768 767 # Accessors for index entries.
769 768
770 769 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
771 770 # are flags.
772 771 def start(self, rev):
773 772 return int(self.index[rev][0] >> 16)
774 773
775 774 def flags(self, rev):
776 775 return self.index[rev][0] & 0xFFFF
777 776
778 777 def length(self, rev):
779 778 return self.index[rev][1]
780 779
781 780 def rawsize(self, rev):
782 781 """return the length of the uncompressed text for a given revision"""
783 782 l = self.index[rev][2]
784 783 if l >= 0:
785 784 return l
786 785
787 786 t = self.revision(rev, raw=True)
788 787 return len(t)
789 788
790 789 def size(self, rev):
791 790 """length of non-raw text (processed by a "read" flag processor)"""
792 791 # fast path: if no "read" flag processor could change the content,
793 792 # size is rawsize. note: ELLIPSIS is known to not change the content.
794 793 flags = self.flags(rev)
795 794 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
796 795 return self.rawsize(rev)
797 796
798 797 return len(self.revision(rev, raw=False))
799 798
800 799 def chainbase(self, rev):
801 800 base = self._chainbasecache.get(rev)
802 801 if base is not None:
803 802 return base
804 803
805 804 index = self.index
806 805 base = index[rev][3]
807 806 while base != rev:
808 807 rev = base
809 808 base = index[rev][3]
810 809
811 810 self._chainbasecache[rev] = base
812 811 return base
813 812
814 813 def linkrev(self, rev):
815 814 return self.index[rev][4]
816 815
817 816 def parentrevs(self, rev):
818 817 try:
819 818 entry = self.index[rev]
820 819 except IndexError:
821 820 if rev == wdirrev:
822 821 raise error.WdirUnsupported
823 822 raise
824 823
825 824 return entry[5], entry[6]
826 825
827 826 def node(self, rev):
828 827 try:
829 828 return self.index[rev][7]
830 829 except IndexError:
831 830 if rev == wdirrev:
832 831 raise error.WdirUnsupported
833 832 raise
834 833
835 834 # Derived from index values.
836 835
837 836 def end(self, rev):
838 837 return self.start(rev) + self.length(rev)
839 838
840 839 def parents(self, node):
841 840 i = self.index
842 841 d = i[self.rev(node)]
843 842 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
844 843
845 844 def chainlen(self, rev):
846 845 return self._chaininfo(rev)[0]
847 846
848 847 def _chaininfo(self, rev):
849 848 chaininfocache = self._chaininfocache
850 849 if rev in chaininfocache:
851 850 return chaininfocache[rev]
852 851 index = self.index
853 852 generaldelta = self._generaldelta
854 853 iterrev = rev
855 854 e = index[iterrev]
856 855 clen = 0
857 856 compresseddeltalen = 0
858 857 while iterrev != e[3]:
859 858 clen += 1
860 859 compresseddeltalen += e[1]
861 860 if generaldelta:
862 861 iterrev = e[3]
863 862 else:
864 863 iterrev -= 1
865 864 if iterrev in chaininfocache:
866 865 t = chaininfocache[iterrev]
867 866 clen += t[0]
868 867 compresseddeltalen += t[1]
869 868 break
870 869 e = index[iterrev]
871 870 else:
872 871 # Add text length of base since decompressing that also takes
873 872 # work. For cache hits the length is already included.
874 873 compresseddeltalen += e[1]
875 874 r = (clen, compresseddeltalen)
876 875 chaininfocache[rev] = r
877 876 return r
878 877
879 878 def _deltachain(self, rev, stoprev=None):
880 879 """Obtain the delta chain for a revision.
881 880
882 881 ``stoprev`` specifies a revision to stop at. If not specified, we
883 882 stop at the base of the chain.
884 883
885 884 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
886 885 revs in ascending order and ``stopped`` is a bool indicating whether
887 886 ``stoprev`` was hit.
888 887 """
889 888 # Try C implementation.
890 889 try:
891 890 return self.index.deltachain(rev, stoprev, self._generaldelta)
892 891 except AttributeError:
893 892 pass
894 893
895 894 chain = []
896 895
897 896 # Alias to prevent attribute lookup in tight loop.
898 897 index = self.index
899 898 generaldelta = self._generaldelta
900 899
901 900 iterrev = rev
902 901 e = index[iterrev]
903 902 while iterrev != e[3] and iterrev != stoprev:
904 903 chain.append(iterrev)
905 904 if generaldelta:
906 905 iterrev = e[3]
907 906 else:
908 907 iterrev -= 1
909 908 e = index[iterrev]
910 909
911 910 if iterrev == stoprev:
912 911 stopped = True
913 912 else:
914 913 chain.append(iterrev)
915 914 stopped = False
916 915
917 916 chain.reverse()
918 917 return chain, stopped
919 918
920 919 def ancestors(self, revs, stoprev=0, inclusive=False):
921 920 """Generate the ancestors of 'revs' in reverse topological order.
922 921 Does not generate revs lower than stoprev.
923 922
924 923 See the documentation for ancestor.lazyancestors for more details."""
925 924
926 925 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
927 926 inclusive=inclusive)
928 927
929 928 def descendants(self, revs):
930 929 """Generate the descendants of 'revs' in revision order.
931 930
932 931 Yield a sequence of revision numbers starting with a child of
933 932 some rev in revs, i.e., each revision is *not* considered a
934 933 descendant of itself. Results are ordered by revision number (a
935 934 topological sort)."""
936 935 first = min(revs)
937 936 if first == nullrev:
938 937 for i in self:
939 938 yield i
940 939 return
941 940
942 941 seen = set(revs)
943 942 for i in self.revs(start=first + 1):
944 943 for x in self.parentrevs(i):
945 944 if x != nullrev and x in seen:
946 945 seen.add(i)
947 946 yield i
948 947 break
949 948
950 949 def findcommonmissing(self, common=None, heads=None):
951 950 """Return a tuple of the ancestors of common and the ancestors of heads
952 951 that are not ancestors of common. In revset terminology, we return the
953 952 tuple:
954 953
955 954 ::common, (::heads) - (::common)
956 955
957 956 The list is sorted by revision number, meaning it is
958 957 topologically sorted.
959 958
960 959 'heads' and 'common' are both lists of node IDs. If heads is
961 960 not supplied, uses all of the revlog's heads. If common is not
962 961 supplied, uses nullid."""
963 962 if common is None:
964 963 common = [nullid]
965 964 if heads is None:
966 965 heads = self.heads()
967 966
968 967 common = [self.rev(n) for n in common]
969 968 heads = [self.rev(n) for n in heads]
970 969
971 970 # we want the ancestors, but inclusive
972 971 class lazyset(object):
973 972 def __init__(self, lazyvalues):
974 973 self.addedvalues = set()
975 974 self.lazyvalues = lazyvalues
976 975
977 976 def __contains__(self, value):
978 977 return value in self.addedvalues or value in self.lazyvalues
979 978
980 979 def __iter__(self):
981 980 added = self.addedvalues
982 981 for r in added:
983 982 yield r
984 983 for r in self.lazyvalues:
985 984 if not r in added:
986 985 yield r
987 986
988 987 def add(self, value):
989 988 self.addedvalues.add(value)
990 989
991 990 def update(self, values):
992 991 self.addedvalues.update(values)
993 992
994 993 has = lazyset(self.ancestors(common))
995 994 has.add(nullrev)
996 995 has.update(common)
997 996
998 997 # take all ancestors from heads that aren't in has
999 998 missing = set()
1000 999 visit = collections.deque(r for r in heads if r not in has)
1001 1000 while visit:
1002 1001 r = visit.popleft()
1003 1002 if r in missing:
1004 1003 continue
1005 1004 else:
1006 1005 missing.add(r)
1007 1006 for p in self.parentrevs(r):
1008 1007 if p not in has:
1009 1008 visit.append(p)
1010 1009 missing = list(missing)
1011 1010 missing.sort()
1012 1011 return has, [self.node(miss) for miss in missing]
1013 1012
1014 1013 def incrementalmissingrevs(self, common=None):
1015 1014 """Return an object that can be used to incrementally compute the
1016 1015 revision numbers of the ancestors of arbitrary sets that are not
1017 1016 ancestors of common. This is an ancestor.incrementalmissingancestors
1018 1017 object.
1019 1018
1020 1019 'common' is a list of revision numbers. If common is not supplied, uses
1021 1020 nullrev.
1022 1021 """
1023 1022 if common is None:
1024 1023 common = [nullrev]
1025 1024
1026 1025 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1027 1026
1028 1027 def findmissingrevs(self, common=None, heads=None):
1029 1028 """Return the revision numbers of the ancestors of heads that
1030 1029 are not ancestors of common.
1031 1030
1032 1031 More specifically, return a list of revision numbers corresponding to
1033 1032 nodes N such that every N satisfies the following constraints:
1034 1033
1035 1034 1. N is an ancestor of some node in 'heads'
1036 1035 2. N is not an ancestor of any node in 'common'
1037 1036
1038 1037 The list is sorted by revision number, meaning it is
1039 1038 topologically sorted.
1040 1039
1041 1040 'heads' and 'common' are both lists of revision numbers. If heads is
1042 1041 not supplied, uses all of the revlog's heads. If common is not
1043 1042 supplied, uses nullid."""
1044 1043 if common is None:
1045 1044 common = [nullrev]
1046 1045 if heads is None:
1047 1046 heads = self.headrevs()
1048 1047
1049 1048 inc = self.incrementalmissingrevs(common=common)
1050 1049 return inc.missingancestors(heads)
1051 1050
1052 1051 def findmissing(self, common=None, heads=None):
1053 1052 """Return the ancestors of heads that are not ancestors of common.
1054 1053
1055 1054 More specifically, return a list of nodes N such that every N
1056 1055 satisfies the following constraints:
1057 1056
1058 1057 1. N is an ancestor of some node in 'heads'
1059 1058 2. N is not an ancestor of any node in 'common'
1060 1059
1061 1060 The list is sorted by revision number, meaning it is
1062 1061 topologically sorted.
1063 1062
1064 1063 'heads' and 'common' are both lists of node IDs. If heads is
1065 1064 not supplied, uses all of the revlog's heads. If common is not
1066 1065 supplied, uses nullid."""
1067 1066 if common is None:
1068 1067 common = [nullid]
1069 1068 if heads is None:
1070 1069 heads = self.heads()
1071 1070
1072 1071 common = [self.rev(n) for n in common]
1073 1072 heads = [self.rev(n) for n in heads]
1074 1073
1075 1074 inc = self.incrementalmissingrevs(common=common)
1076 1075 return [self.node(r) for r in inc.missingancestors(heads)]
1077 1076
1078 1077 def nodesbetween(self, roots=None, heads=None):
1079 1078 """Return a topological path from 'roots' to 'heads'.
1080 1079
1081 1080 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1082 1081 topologically sorted list of all nodes N that satisfy both of
1083 1082 these constraints:
1084 1083
1085 1084 1. N is a descendant of some node in 'roots'
1086 1085 2. N is an ancestor of some node in 'heads'
1087 1086
1088 1087 Every node is considered to be both a descendant and an ancestor
1089 1088 of itself, so every reachable node in 'roots' and 'heads' will be
1090 1089 included in 'nodes'.
1091 1090
1092 1091 'outroots' is the list of reachable nodes in 'roots', i.e., the
1093 1092 subset of 'roots' that is returned in 'nodes'. Likewise,
1094 1093 'outheads' is the subset of 'heads' that is also in 'nodes'.
1095 1094
1096 1095 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1097 1096 unspecified, uses nullid as the only root. If 'heads' is
1098 1097 unspecified, uses list of all of the revlog's heads."""
1099 1098 nonodes = ([], [], [])
1100 1099 if roots is not None:
1101 1100 roots = list(roots)
1102 1101 if not roots:
1103 1102 return nonodes
1104 1103 lowestrev = min([self.rev(n) for n in roots])
1105 1104 else:
1106 1105 roots = [nullid] # Everybody's a descendant of nullid
1107 1106 lowestrev = nullrev
1108 1107 if (lowestrev == nullrev) and (heads is None):
1109 1108 # We want _all_ the nodes!
1110 1109 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1111 1110 if heads is None:
1112 1111 # All nodes are ancestors, so the latest ancestor is the last
1113 1112 # node.
1114 1113 highestrev = len(self) - 1
1115 1114 # Set ancestors to None to signal that every node is an ancestor.
1116 1115 ancestors = None
1117 1116 # Set heads to an empty dictionary for later discovery of heads
1118 1117 heads = {}
1119 1118 else:
1120 1119 heads = list(heads)
1121 1120 if not heads:
1122 1121 return nonodes
1123 1122 ancestors = set()
1124 1123 # Turn heads into a dictionary so we can remove 'fake' heads.
1125 1124 # Also, later we will be using it to filter out the heads we can't
1126 1125 # find from roots.
1127 1126 heads = dict.fromkeys(heads, False)
1128 1127 # Start at the top and keep marking parents until we're done.
1129 1128 nodestotag = set(heads)
1130 1129 # Remember where the top was so we can use it as a limit later.
1131 1130 highestrev = max([self.rev(n) for n in nodestotag])
1132 1131 while nodestotag:
1133 1132 # grab a node to tag
1134 1133 n = nodestotag.pop()
1135 1134 # Never tag nullid
1136 1135 if n == nullid:
1137 1136 continue
1138 1137 # A node's revision number represents its place in a
1139 1138 # topologically sorted list of nodes.
1140 1139 r = self.rev(n)
1141 1140 if r >= lowestrev:
1142 1141 if n not in ancestors:
1143 1142 # If we are possibly a descendant of one of the roots
1144 1143 # and we haven't already been marked as an ancestor
1145 1144 ancestors.add(n) # Mark as ancestor
1146 1145 # Add non-nullid parents to list of nodes to tag.
1147 1146 nodestotag.update([p for p in self.parents(n) if
1148 1147 p != nullid])
1149 1148 elif n in heads: # We've seen it before, is it a fake head?
1150 1149 # So it is, real heads should not be the ancestors of
1151 1150 # any other heads.
1152 1151 heads.pop(n)
1153 1152 if not ancestors:
1154 1153 return nonodes
1155 1154 # Now that we have our set of ancestors, we want to remove any
1156 1155 # roots that are not ancestors.
1157 1156
1158 1157 # If one of the roots was nullid, everything is included anyway.
1159 1158 if lowestrev > nullrev:
1160 1159 # But, since we weren't, let's recompute the lowest rev to not
1161 1160 # include roots that aren't ancestors.
1162 1161
1163 1162 # Filter out roots that aren't ancestors of heads
1164 1163 roots = [root for root in roots if root in ancestors]
1165 1164 # Recompute the lowest revision
1166 1165 if roots:
1167 1166 lowestrev = min([self.rev(root) for root in roots])
1168 1167 else:
1169 1168 # No more roots? Return empty list
1170 1169 return nonodes
1171 1170 else:
1172 1171 # We are descending from nullid, and don't need to care about
1173 1172 # any other roots.
1174 1173 lowestrev = nullrev
1175 1174 roots = [nullid]
1176 1175 # Transform our roots list into a set.
1177 1176 descendants = set(roots)
1178 1177 # Also, keep the original roots so we can filter out roots that aren't
1179 1178 # 'real' roots (i.e. are descended from other roots).
1180 1179 roots = descendants.copy()
1181 1180 # Our topologically sorted list of output nodes.
1182 1181 orderedout = []
1183 1182 # Don't start at nullid since we don't want nullid in our output list,
1184 1183 # and if nullid shows up in descendants, empty parents will look like
1185 1184 # they're descendants.
1186 1185 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1187 1186 n = self.node(r)
1188 1187 isdescendant = False
1189 1188 if lowestrev == nullrev: # Everybody is a descendant of nullid
1190 1189 isdescendant = True
1191 1190 elif n in descendants:
1192 1191 # n is already a descendant
1193 1192 isdescendant = True
1194 1193 # This check only needs to be done here because all the roots
1195 1194 # will start being marked is descendants before the loop.
1196 1195 if n in roots:
1197 1196 # If n was a root, check if it's a 'real' root.
1198 1197 p = tuple(self.parents(n))
1199 1198 # If any of its parents are descendants, it's not a root.
1200 1199 if (p[0] in descendants) or (p[1] in descendants):
1201 1200 roots.remove(n)
1202 1201 else:
1203 1202 p = tuple(self.parents(n))
1204 1203 # A node is a descendant if either of its parents are
1205 1204 # descendants. (We seeded the dependents list with the roots
1206 1205 # up there, remember?)
1207 1206 if (p[0] in descendants) or (p[1] in descendants):
1208 1207 descendants.add(n)
1209 1208 isdescendant = True
1210 1209 if isdescendant and ((ancestors is None) or (n in ancestors)):
1211 1210 # Only include nodes that are both descendants and ancestors.
1212 1211 orderedout.append(n)
1213 1212 if (ancestors is not None) and (n in heads):
1214 1213 # We're trying to figure out which heads are reachable
1215 1214 # from roots.
1216 1215 # Mark this head as having been reached
1217 1216 heads[n] = True
1218 1217 elif ancestors is None:
1219 1218 # Otherwise, we're trying to discover the heads.
1220 1219 # Assume this is a head because if it isn't, the next step
1221 1220 # will eventually remove it.
1222 1221 heads[n] = True
1223 1222 # But, obviously its parents aren't.
1224 1223 for p in self.parents(n):
1225 1224 heads.pop(p, None)
1226 1225 heads = [head for head, flag in heads.iteritems() if flag]
1227 1226 roots = list(roots)
1228 1227 assert orderedout
1229 1228 assert roots
1230 1229 assert heads
1231 1230 return (orderedout, roots, heads)
1232 1231
1233 1232 def headrevs(self):
1234 1233 try:
1235 1234 return self.index.headrevs()
1236 1235 except AttributeError:
1237 1236 return self._headrevs()
1238 1237
1239 1238 def computephases(self, roots):
1240 1239 return self.index.computephasesmapsets(roots)
1241 1240
1242 1241 def _headrevs(self):
1243 1242 count = len(self)
1244 1243 if not count:
1245 1244 return [nullrev]
1246 1245 # we won't iter over filtered rev so nobody is a head at start
1247 1246 ishead = [0] * (count + 1)
1248 1247 index = self.index
1249 1248 for r in self:
1250 1249 ishead[r] = 1 # I may be an head
1251 1250 e = index[r]
1252 1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1253 1252 return [r for r, val in enumerate(ishead) if val]
1254 1253
1255 1254 def heads(self, start=None, stop=None):
1256 1255 """return the list of all nodes that have no children
1257 1256
1258 1257 if start is specified, only heads that are descendants of
1259 1258 start will be returned
1260 1259 if stop is specified, it will consider all the revs from stop
1261 1260 as if they had no children
1262 1261 """
1263 1262 if start is None and stop is None:
1264 1263 if not len(self):
1265 1264 return [nullid]
1266 1265 return [self.node(r) for r in self.headrevs()]
1267 1266
1268 1267 if start is None:
1269 1268 start = nullid
1270 1269 if stop is None:
1271 1270 stop = []
1272 1271 stoprevs = set([self.rev(n) for n in stop])
1273 1272 startrev = self.rev(start)
1274 1273 reachable = {startrev}
1275 1274 heads = {startrev}
1276 1275
1277 1276 parentrevs = self.parentrevs
1278 1277 for r in self.revs(start=startrev + 1):
1279 1278 for p in parentrevs(r):
1280 1279 if p in reachable:
1281 1280 if r not in stoprevs:
1282 1281 reachable.add(r)
1283 1282 heads.add(r)
1284 1283 if p in heads and p not in stoprevs:
1285 1284 heads.remove(p)
1286 1285
1287 1286 return [self.node(r) for r in heads]
1288 1287
1289 1288 def children(self, node):
1290 1289 """find the children of a given node"""
1291 1290 c = []
1292 1291 p = self.rev(node)
1293 1292 for r in self.revs(start=p + 1):
1294 1293 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1295 1294 if prevs:
1296 1295 for pr in prevs:
1297 1296 if pr == p:
1298 1297 c.append(self.node(r))
1299 1298 elif p == nullrev:
1300 1299 c.append(self.node(r))
1301 1300 return c
1302 1301
1303 1302 def descendant(self, start, end):
1304 1303 if start == nullrev:
1305 1304 return True
1306 1305 for i in self.descendants([start]):
1307 1306 if i == end:
1308 1307 return True
1309 1308 elif i > end:
1310 1309 break
1311 1310 return False
1312 1311
1313 1312 def commonancestorsheads(self, a, b):
1314 1313 """calculate all the heads of the common ancestors of nodes a and b"""
1315 1314 a, b = self.rev(a), self.rev(b)
1316 1315 try:
1317 1316 ancs = self.index.commonancestorsheads(a, b)
1318 1317 except (AttributeError, OverflowError): # C implementation failed
1319 1318 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1320 1319 return pycompat.maplist(self.node, ancs)
1321 1320
1322 1321 def isancestor(self, a, b):
1323 1322 """return True if node a is an ancestor of node b
1324 1323
1325 1324 The implementation of this is trivial but the use of
1326 1325 commonancestorsheads is not."""
1327 1326 return a in self.commonancestorsheads(a, b)
1328 1327
1329 1328 def ancestor(self, a, b):
1330 1329 """calculate the "best" common ancestor of nodes a and b"""
1331 1330
1332 1331 a, b = self.rev(a), self.rev(b)
1333 1332 try:
1334 1333 ancs = self.index.ancestors(a, b)
1335 1334 except (AttributeError, OverflowError):
1336 1335 ancs = ancestor.ancestors(self.parentrevs, a, b)
1337 1336 if ancs:
1338 1337 # choose a consistent winner when there's a tie
1339 1338 return min(map(self.node, ancs))
1340 1339 return nullid
1341 1340
1342 1341 def _match(self, id):
1343 1342 if isinstance(id, int):
1344 1343 # rev
1345 1344 return self.node(id)
1346 1345 if len(id) == 20:
1347 1346 # possibly a binary node
1348 1347 # odds of a binary node being all hex in ASCII are 1 in 10**25
1349 1348 try:
1350 1349 node = id
1351 1350 self.rev(node) # quick search the index
1352 1351 return node
1353 1352 except LookupError:
1354 1353 pass # may be partial hex id
1355 1354 try:
1356 1355 # str(rev)
1357 1356 rev = int(id)
1358 1357 if str(rev) != id:
1359 1358 raise ValueError
1360 1359 if rev < 0:
1361 1360 rev = len(self) + rev
1362 1361 if rev < 0 or rev >= len(self):
1363 1362 raise ValueError
1364 1363 return self.node(rev)
1365 1364 except (ValueError, OverflowError):
1366 1365 pass
1367 1366 if len(id) == 40:
1368 1367 try:
1369 1368 # a full hex nodeid?
1370 1369 node = bin(id)
1371 1370 self.rev(node)
1372 1371 return node
1373 1372 except (TypeError, LookupError):
1374 1373 pass
1375 1374
1376 1375 def _partialmatch(self, id):
1377 1376 maybewdir = wdirhex.startswith(id)
1378 1377 try:
1379 1378 partial = self.index.partialmatch(id)
1380 1379 if partial and self.hasnode(partial):
1381 1380 if maybewdir:
1382 1381 # single 'ff...' match in radix tree, ambiguous with wdir
1383 1382 raise RevlogError
1384 1383 return partial
1385 1384 if maybewdir:
1386 1385 # no 'ff...' match in radix tree, wdir identified
1387 1386 raise error.WdirUnsupported
1388 1387 return None
1389 1388 except RevlogError:
1390 1389 # parsers.c radix tree lookup gave multiple matches
1391 1390 # fast path: for unfiltered changelog, radix tree is accurate
1392 1391 if not getattr(self, 'filteredrevs', None):
1393 1392 raise LookupError(id, self.indexfile,
1394 1393 _('ambiguous identifier'))
1395 1394 # fall through to slow path that filters hidden revisions
1396 1395 except (AttributeError, ValueError):
1397 1396 # we are pure python, or key was too short to search radix tree
1398 1397 pass
1399 1398
1400 1399 if id in self._pcache:
1401 1400 return self._pcache[id]
1402 1401
1403 1402 if len(id) < 40:
1404 1403 try:
1405 1404 # hex(node)[:...]
1406 1405 l = len(id) // 2 # grab an even number of digits
1407 1406 prefix = bin(id[:l * 2])
1408 1407 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1409 1408 nl = [n for n in nl if hex(n).startswith(id) and
1410 1409 self.hasnode(n)]
1411 1410 if len(nl) > 0:
1412 1411 if len(nl) == 1 and not maybewdir:
1413 1412 self._pcache[id] = nl[0]
1414 1413 return nl[0]
1415 1414 raise LookupError(id, self.indexfile,
1416 1415 _('ambiguous identifier'))
1417 1416 if maybewdir:
1418 1417 raise error.WdirUnsupported
1419 1418 return None
1420 1419 except (TypeError, binascii.Error):
1421 1420 pass
1422 1421
1423 1422 def lookup(self, id):
1424 1423 """locate a node based on:
1425 1424 - revision number or str(revision number)
1426 1425 - nodeid or subset of hex nodeid
1427 1426 """
1428 1427 n = self._match(id)
1429 1428 if n is not None:
1430 1429 return n
1431 1430 n = self._partialmatch(id)
1432 1431 if n:
1433 1432 return n
1434 1433
1435 1434 raise LookupError(id, self.indexfile, _('no match found'))
1436 1435
1437 1436 def shortest(self, hexnode, minlength=1):
1438 1437 """Find the shortest unambiguous prefix that matches hexnode."""
1439 1438 def isvalid(test):
1440 1439 try:
1441 1440 if self._partialmatch(test) is None:
1442 1441 return False
1443 1442
1444 1443 try:
1445 1444 i = int(test)
1446 1445 # if we are a pure int, then starting with zero will not be
1447 1446 # confused as a rev; or, obviously, if the int is larger
1448 1447 # than the value of the tip rev
1449 1448 if test[0] == '0' or i > len(self):
1450 1449 return True
1451 1450 return False
1452 1451 except ValueError:
1453 1452 return True
1454 1453 except error.RevlogError:
1455 1454 return False
1456 1455 except error.WdirUnsupported:
1457 1456 # single 'ff...' match
1458 1457 return True
1459 1458
1460 1459 shortest = hexnode
1461 1460 startlength = max(6, minlength)
1462 1461 length = startlength
1463 1462 while True:
1464 1463 test = hexnode[:length]
1465 1464 if isvalid(test):
1466 1465 shortest = test
1467 1466 if length == minlength or length > startlength:
1468 1467 return shortest
1469 1468 length -= 1
1470 1469 else:
1471 1470 length += 1
1472 1471 if len(shortest) <= length:
1473 1472 return shortest
1474 1473
1475 1474 def cmp(self, node, text):
1476 1475 """compare text with a given file revision
1477 1476
1478 1477 returns True if text is different than what is stored.
1479 1478 """
1480 1479 p1, p2 = self.parents(node)
1481 1480 return hash(text, p1, p2) != node
1482 1481
1483 1482 def _cachesegment(self, offset, data):
1484 1483 """Add a segment to the revlog cache.
1485 1484
1486 1485 Accepts an absolute offset and the data that is at that location.
1487 1486 """
1488 1487 o, d = self._chunkcache
1489 1488 # try to add to existing cache
1490 1489 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1491 1490 self._chunkcache = o, d + data
1492 1491 else:
1493 1492 self._chunkcache = offset, data
1494 1493
1495 1494 def _readsegment(self, offset, length, df=None):
1496 1495 """Load a segment of raw data from the revlog.
1497 1496
1498 1497 Accepts an absolute offset, length to read, and an optional existing
1499 1498 file handle to read from.
1500 1499
1501 1500 If an existing file handle is passed, it will be seeked and the
1502 1501 original seek position will NOT be restored.
1503 1502
1504 1503 Returns a str or buffer of raw byte data.
1505 1504 """
1506 1505 if df is not None:
1507 1506 closehandle = False
1508 1507 else:
1509 1508 if self._inline:
1510 1509 df = self._indexfp()
1511 1510 else:
1512 1511 df = self._datafp()
1513 1512 closehandle = True
1514 1513
1515 1514 # Cache data both forward and backward around the requested
1516 1515 # data, in a fixed size window. This helps speed up operations
1517 1516 # involving reading the revlog backwards.
1518 1517 cachesize = self._chunkcachesize
1519 1518 realoffset = offset & ~(cachesize - 1)
1520 1519 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1521 1520 - realoffset)
1522 1521 df.seek(realoffset)
1523 1522 d = df.read(reallength)
1524 1523 if closehandle:
1525 1524 df.close()
1526 1525 self._cachesegment(realoffset, d)
1527 1526 if offset != realoffset or reallength != length:
1528 1527 return util.buffer(d, offset - realoffset, length)
1529 1528 return d
1530 1529
1531 1530 def _getsegment(self, offset, length, df=None):
1532 1531 """Obtain a segment of raw data from the revlog.
1533 1532
1534 1533 Accepts an absolute offset, length of bytes to obtain, and an
1535 1534 optional file handle to the already-opened revlog. If the file
1536 1535 handle is used, it's original seek position will not be preserved.
1537 1536
1538 1537 Requests for data may be returned from a cache.
1539 1538
1540 1539 Returns a str or a buffer instance of raw byte data.
1541 1540 """
1542 1541 o, d = self._chunkcache
1543 1542 l = len(d)
1544 1543
1545 1544 # is it in the cache?
1546 1545 cachestart = offset - o
1547 1546 cacheend = cachestart + length
1548 1547 if cachestart >= 0 and cacheend <= l:
1549 1548 if cachestart == 0 and cacheend == l:
1550 1549 return d # avoid a copy
1551 1550 return util.buffer(d, cachestart, cacheend - cachestart)
1552 1551
1553 1552 return self._readsegment(offset, length, df=df)
1554 1553
1555 1554 def _getsegmentforrevs(self, startrev, endrev, df=None):
1556 1555 """Obtain a segment of raw data corresponding to a range of revisions.
1557 1556
1558 1557 Accepts the start and end revisions and an optional already-open
1559 1558 file handle to be used for reading. If the file handle is read, its
1560 1559 seek position will not be preserved.
1561 1560
1562 1561 Requests for data may be satisfied by a cache.
1563 1562
1564 1563 Returns a 2-tuple of (offset, data) for the requested range of
1565 1564 revisions. Offset is the integer offset from the beginning of the
1566 1565 revlog and data is a str or buffer of the raw byte data.
1567 1566
1568 1567 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1569 1568 to determine where each revision's data begins and ends.
1570 1569 """
1571 1570 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1572 1571 # (functions are expensive).
1573 1572 index = self.index
1574 1573 istart = index[startrev]
1575 1574 start = int(istart[0] >> 16)
1576 1575 if startrev == endrev:
1577 1576 end = start + istart[1]
1578 1577 else:
1579 1578 iend = index[endrev]
1580 1579 end = int(iend[0] >> 16) + iend[1]
1581 1580
1582 1581 if self._inline:
1583 1582 start += (startrev + 1) * self._io.size
1584 1583 end += (endrev + 1) * self._io.size
1585 1584 length = end - start
1586 1585
1587 1586 return start, self._getsegment(start, length, df=df)
1588 1587
1589 1588 def _chunk(self, rev, df=None):
1590 1589 """Obtain a single decompressed chunk for a revision.
1591 1590
1592 1591 Accepts an integer revision and an optional already-open file handle
1593 1592 to be used for reading. If used, the seek position of the file will not
1594 1593 be preserved.
1595 1594
1596 1595 Returns a str holding uncompressed data for the requested revision.
1597 1596 """
1598 1597 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1599 1598
1600 1599 def _chunks(self, revs, df=None):
1601 1600 """Obtain decompressed chunks for the specified revisions.
1602 1601
1603 1602 Accepts an iterable of numeric revisions that are assumed to be in
1604 1603 ascending order. Also accepts an optional already-open file handle
1605 1604 to be used for reading. If used, the seek position of the file will
1606 1605 not be preserved.
1607 1606
1608 1607 This function is similar to calling ``self._chunk()`` multiple times,
1609 1608 but is faster.
1610 1609
1611 1610 Returns a list with decompressed data for each requested revision.
1612 1611 """
1613 1612 if not revs:
1614 1613 return []
1615 1614 start = self.start
1616 1615 length = self.length
1617 1616 inline = self._inline
1618 1617 iosize = self._io.size
1619 1618 buffer = util.buffer
1620 1619
1621 1620 l = []
1622 1621 ladd = l.append
1623 1622
1624 1623 if not self._withsparseread:
1625 1624 slicedchunks = (revs,)
1626 1625 else:
1627 1626 slicedchunks = _slicechunk(self, revs)
1628 1627
1629 1628 for revschunk in slicedchunks:
1630 1629 firstrev = revschunk[0]
1631 1630 # Skip trailing revisions with empty diff
1632 1631 for lastrev in revschunk[::-1]:
1633 1632 if length(lastrev) != 0:
1634 1633 break
1635 1634
1636 1635 try:
1637 1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1638 1637 except OverflowError:
1639 1638 # issue4215 - we can't cache a run of chunks greater than
1640 1639 # 2G on Windows
1641 1640 return [self._chunk(rev, df=df) for rev in revschunk]
1642 1641
1643 1642 decomp = self.decompress
1644 1643 for rev in revschunk:
1645 1644 chunkstart = start(rev)
1646 1645 if inline:
1647 1646 chunkstart += (rev + 1) * iosize
1648 1647 chunklength = length(rev)
1649 1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1650 1649
1651 1650 return l
1652 1651
1653 1652 def _chunkclear(self):
1654 1653 """Clear the raw chunk cache."""
1655 1654 self._chunkcache = (0, '')
1656 1655
1657 1656 def deltaparent(self, rev):
1658 1657 """return deltaparent of the given revision"""
1659 1658 base = self.index[rev][3]
1660 1659 if base == rev:
1661 1660 return nullrev
1662 1661 elif self._generaldelta:
1663 1662 return base
1664 1663 else:
1665 1664 return rev - 1
1666 1665
1667 1666 def revdiff(self, rev1, rev2):
1668 1667 """return or calculate a delta between two revisions
1669 1668
1670 1669 The delta calculated is in binary form and is intended to be written to
1671 1670 revlog data directly. So this function needs raw revision data.
1672 1671 """
1673 1672 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1674 1673 return bytes(self._chunk(rev2))
1675 1674
1676 1675 return mdiff.textdiff(self.revision(rev1, raw=True),
1677 1676 self.revision(rev2, raw=True))
1678 1677
1679 1678 def revision(self, nodeorrev, _df=None, raw=False):
1680 1679 """return an uncompressed revision of a given node or revision
1681 1680 number.
1682 1681
1683 1682 _df - an existing file handle to read from. (internal-only)
1684 1683 raw - an optional argument specifying if the revision data is to be
1685 1684 treated as raw data when applying flag transforms. 'raw' should be set
1686 1685 to True when generating changegroups or in debug commands.
1687 1686 """
1688 1687 if isinstance(nodeorrev, int):
1689 1688 rev = nodeorrev
1690 1689 node = self.node(rev)
1691 1690 else:
1692 1691 node = nodeorrev
1693 1692 rev = None
1694 1693
1695 1694 cachedrev = None
1696 1695 flags = None
1697 1696 rawtext = None
1698 1697 if node == nullid:
1699 1698 return ""
1700 1699 if self._cache:
1701 1700 if self._cache[0] == node:
1702 1701 # _cache only stores rawtext
1703 1702 if raw:
1704 1703 return self._cache[2]
1705 1704 # duplicated, but good for perf
1706 1705 if rev is None:
1707 1706 rev = self.rev(node)
1708 1707 if flags is None:
1709 1708 flags = self.flags(rev)
1710 1709 # no extra flags set, no flag processor runs, text = rawtext
1711 1710 if flags == REVIDX_DEFAULT_FLAGS:
1712 1711 return self._cache[2]
1713 1712 # rawtext is reusable. need to run flag processor
1714 1713 rawtext = self._cache[2]
1715 1714
1716 1715 cachedrev = self._cache[1]
1717 1716
1718 1717 # look up what we need to read
1719 1718 if rawtext is None:
1720 1719 if rev is None:
1721 1720 rev = self.rev(node)
1722 1721
1723 1722 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1724 1723 if stopped:
1725 1724 rawtext = self._cache[2]
1726 1725
1727 1726 # drop cache to save memory
1728 1727 self._cache = None
1729 1728
1730 1729 bins = self._chunks(chain, df=_df)
1731 1730 if rawtext is None:
1732 1731 rawtext = bytes(bins[0])
1733 1732 bins = bins[1:]
1734 1733
1735 1734 rawtext = mdiff.patches(rawtext, bins)
1736 1735 self._cache = (node, rev, rawtext)
1737 1736
1738 1737 if flags is None:
1739 1738 if rev is None:
1740 1739 rev = self.rev(node)
1741 1740 flags = self.flags(rev)
1742 1741
1743 1742 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1744 1743 if validatehash:
1745 1744 self.checkhash(text, node, rev=rev)
1746 1745
1747 1746 return text
1748 1747
1749 1748 def hash(self, text, p1, p2):
1750 1749 """Compute a node hash.
1751 1750
1752 1751 Available as a function so that subclasses can replace the hash
1753 1752 as needed.
1754 1753 """
1755 1754 return hash(text, p1, p2)
1756 1755
1757 1756 def _processflags(self, text, flags, operation, raw=False):
1758 1757 """Inspect revision data flags and applies transforms defined by
1759 1758 registered flag processors.
1760 1759
1761 1760 ``text`` - the revision data to process
1762 1761 ``flags`` - the revision flags
1763 1762 ``operation`` - the operation being performed (read or write)
1764 1763 ``raw`` - an optional argument describing if the raw transform should be
1765 1764 applied.
1766 1765
1767 1766 This method processes the flags in the order (or reverse order if
1768 1767 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1769 1768 flag processors registered for present flags. The order of flags defined
1770 1769 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1771 1770
1772 1771 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1773 1772 processed text and ``validatehash`` is a bool indicating whether the
1774 1773 returned text should be checked for hash integrity.
1775 1774
1776 1775 Note: If the ``raw`` argument is set, it has precedence over the
1777 1776 operation and will only update the value of ``validatehash``.
1778 1777 """
1779 1778 # fast path: no flag processors will run
1780 1779 if flags == 0:
1781 1780 return text, True
1782 1781 if not operation in ('read', 'write'):
1783 1782 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1784 1783 # Check all flags are known.
1785 1784 if flags & ~REVIDX_KNOWN_FLAGS:
1786 1785 raise RevlogError(_("incompatible revision flag '%#x'") %
1787 1786 (flags & ~REVIDX_KNOWN_FLAGS))
1788 1787 validatehash = True
1789 1788 # Depending on the operation (read or write), the order might be
1790 1789 # reversed due to non-commutative transforms.
1791 1790 orderedflags = REVIDX_FLAGS_ORDER
1792 1791 if operation == 'write':
1793 1792 orderedflags = reversed(orderedflags)
1794 1793
1795 1794 for flag in orderedflags:
1796 1795 # If a flagprocessor has been registered for a known flag, apply the
1797 1796 # related operation transform and update result tuple.
1798 1797 if flag & flags:
1799 1798 vhash = True
1800 1799
1801 1800 if flag not in _flagprocessors:
1802 1801 message = _("missing processor for flag '%#x'") % (flag)
1803 1802 raise RevlogError(message)
1804 1803
1805 1804 processor = _flagprocessors[flag]
1806 1805 if processor is not None:
1807 1806 readtransform, writetransform, rawtransform = processor
1808 1807
1809 1808 if raw:
1810 1809 vhash = rawtransform(self, text)
1811 1810 elif operation == 'read':
1812 1811 text, vhash = readtransform(self, text)
1813 1812 else: # write operation
1814 1813 text, vhash = writetransform(self, text)
1815 1814 validatehash = validatehash and vhash
1816 1815
1817 1816 return text, validatehash
1818 1817
1819 1818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1820 1819 """Check node hash integrity.
1821 1820
1822 1821 Available as a function so that subclasses can extend hash mismatch
1823 1822 behaviors as needed.
1824 1823 """
1825 1824 if p1 is None and p2 is None:
1826 1825 p1, p2 = self.parents(node)
1827 1826 if node != self.hash(text, p1, p2):
1828 1827 revornode = rev
1829 1828 if revornode is None:
1830 1829 revornode = templatefilters.short(hex(node))
1831 1830 raise RevlogError(_("integrity check failed on %s:%s")
1832 1831 % (self.indexfile, pycompat.bytestr(revornode)))
1833 1832
1834 1833 def checkinlinesize(self, tr, fp=None):
1835 1834 """Check if the revlog is too big for inline and convert if so.
1836 1835
1837 1836 This should be called after revisions are added to the revlog. If the
1838 1837 revlog has grown too large to be an inline revlog, it will convert it
1839 1838 to use multiple index and data files.
1840 1839 """
1841 1840 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1842 1841 return
1843 1842
1844 1843 trinfo = tr.find(self.indexfile)
1845 1844 if trinfo is None:
1846 1845 raise RevlogError(_("%s not found in the transaction")
1847 1846 % self.indexfile)
1848 1847
1849 1848 trindex = trinfo[2]
1850 1849 if trindex is not None:
1851 1850 dataoff = self.start(trindex)
1852 1851 else:
1853 1852 # revlog was stripped at start of transaction, use all leftover data
1854 1853 trindex = len(self) - 1
1855 1854 dataoff = self.end(-2)
1856 1855
1857 1856 tr.add(self.datafile, dataoff)
1858 1857
1859 1858 if fp:
1860 1859 fp.flush()
1861 1860 fp.close()
1862 1861
1863 1862 df = self._datafp('w')
1864 1863 try:
1865 1864 for r in self:
1866 1865 df.write(self._getsegmentforrevs(r, r)[1])
1867 1866 finally:
1868 1867 df.close()
1869 1868
1870 1869 fp = self._indexfp('w')
1871 1870 self.version &= ~FLAG_INLINE_DATA
1872 1871 self._inline = False
1873 1872 for i in self:
1874 1873 e = self._io.packentry(self.index[i], self.node, self.version, i)
1875 1874 fp.write(e)
1876 1875
1877 1876 # if we don't call close, the temp file will never replace the
1878 1877 # real index
1879 1878 fp.close()
1880 1879
1881 1880 tr.replace(self.indexfile, trindex * self._io.size)
1882 1881 self._chunkclear()
1883 1882
1884 1883 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1885 1884 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1886 1885 """add a revision to the log
1887 1886
1888 1887 text - the revision data to add
1889 1888 transaction - the transaction object used for rollback
1890 1889 link - the linkrev data to add
1891 1890 p1, p2 - the parent nodeids of the revision
1892 1891 cachedelta - an optional precomputed delta
1893 1892 node - nodeid of revision; typically node is not specified, and it is
1894 1893 computed by default as hash(text, p1, p2), however subclasses might
1895 1894 use different hashing method (and override checkhash() in such case)
1896 1895 flags - the known flags to set on the revision
1897 1896 deltacomputer - an optional _deltacomputer instance shared between
1898 1897 multiple calls
1899 1898 """
1900 1899 if link == nullrev:
1901 1900 raise RevlogError(_("attempted to add linkrev -1 to %s")
1902 1901 % self.indexfile)
1903 1902
1904 1903 if flags:
1905 1904 node = node or self.hash(text, p1, p2)
1906 1905
1907 1906 rawtext, validatehash = self._processflags(text, flags, 'write')
1908 1907
1909 1908 # If the flag processor modifies the revision data, ignore any provided
1910 1909 # cachedelta.
1911 1910 if rawtext != text:
1912 1911 cachedelta = None
1913 1912
1914 1913 if len(rawtext) > _maxentrysize:
1915 1914 raise RevlogError(
1916 1915 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1917 1916 % (self.indexfile, len(rawtext)))
1918 1917
1919 1918 node = node or self.hash(rawtext, p1, p2)
1920 1919 if node in self.nodemap:
1921 1920 return node
1922 1921
1923 1922 if validatehash:
1924 1923 self.checkhash(rawtext, node, p1=p1, p2=p2)
1925 1924
1926 1925 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1927 1926 flags, cachedelta=cachedelta,
1928 1927 deltacomputer=deltacomputer)
1929 1928
1930 1929 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1931 1930 cachedelta=None, deltacomputer=None):
1932 1931 """add a raw revision with known flags, node and parents
1933 1932 useful when reusing a revision not stored in this revlog (ex: received
1934 1933 over wire, or read from an external bundle).
1935 1934 """
1936 1935 dfh = None
1937 1936 if not self._inline:
1938 1937 dfh = self._datafp("a+")
1939 1938 ifh = self._indexfp("a+")
1940 1939 try:
1941 1940 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1942 1941 flags, cachedelta, ifh, dfh,
1943 1942 deltacomputer=deltacomputer)
1944 1943 finally:
1945 1944 if dfh:
1946 1945 dfh.close()
1947 1946 ifh.close()
1948 1947
1949 1948 def compress(self, data):
1950 1949 """Generate a possibly-compressed representation of data."""
1951 1950 if not data:
1952 1951 return '', data
1953 1952
1954 1953 compressed = self._compressor.compress(data)
1955 1954
1956 1955 if compressed:
1957 1956 # The revlog compressor added the header in the returned data.
1958 1957 return '', compressed
1959 1958
1960 1959 if data[0:1] == '\0':
1961 1960 return '', data
1962 1961 return 'u', data
1963 1962
1964 1963 def decompress(self, data):
1965 1964 """Decompress a revlog chunk.
1966 1965
1967 1966 The chunk is expected to begin with a header identifying the
1968 1967 format type so it can be routed to an appropriate decompressor.
1969 1968 """
1970 1969 if not data:
1971 1970 return data
1972 1971
1973 1972 # Revlogs are read much more frequently than they are written and many
1974 1973 # chunks only take microseconds to decompress, so performance is
1975 1974 # important here.
1976 1975 #
1977 1976 # We can make a few assumptions about revlogs:
1978 1977 #
1979 1978 # 1) the majority of chunks will be compressed (as opposed to inline
1980 1979 # raw data).
1981 1980 # 2) decompressing *any* data will likely by at least 10x slower than
1982 1981 # returning raw inline data.
1983 1982 # 3) we want to prioritize common and officially supported compression
1984 1983 # engines
1985 1984 #
1986 1985 # It follows that we want to optimize for "decompress compressed data
1987 1986 # when encoded with common and officially supported compression engines"
1988 1987 # case over "raw data" and "data encoded by less common or non-official
1989 1988 # compression engines." That is why we have the inline lookup first
1990 1989 # followed by the compengines lookup.
1991 1990 #
1992 1991 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1993 1992 # compressed chunks. And this matters for changelog and manifest reads.
1994 1993 t = data[0:1]
1995 1994
1996 1995 if t == 'x':
1997 1996 try:
1998 1997 return _zlibdecompress(data)
1999 1998 except zlib.error as e:
2000 1999 raise RevlogError(_('revlog decompress error: %s') % str(e))
2001 2000 # '\0' is more common than 'u' so it goes first.
2002 2001 elif t == '\0':
2003 2002 return data
2004 2003 elif t == 'u':
2005 2004 return util.buffer(data, 1)
2006 2005
2007 2006 try:
2008 2007 compressor = self._decompressors[t]
2009 2008 except KeyError:
2010 2009 try:
2011 2010 engine = util.compengines.forrevlogheader(t)
2012 2011 compressor = engine.revlogcompressor()
2013 2012 self._decompressors[t] = compressor
2014 2013 except KeyError:
2015 2014 raise RevlogError(_('unknown compression type %r') % t)
2016 2015
2017 2016 return compressor.decompress(data)
2018 2017
2019 2018 def _isgooddeltainfo(self, d, textlen):
2020 2019 """Returns True if the given delta is good. Good means that it is within
2021 2020 the disk span, disk size, and chain length bounds that we know to be
2022 2021 performant."""
2023 2022 if d is None:
2024 2023 return False
2025 2024
2026 2025 # - 'd.distance' is the distance from the base revision -- bounding it
2027 2026 # limits the amount of I/O we need to do.
2028 2027 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2029 2028 # need to apply -- bounding it limits the amount of CPU we consume.
2030 2029
2031 2030 defaultmax = textlen * 4
2032 2031 maxdist = self._maxdeltachainspan
2033 2032 if not maxdist:
2034 2033 maxdist = d.distance # ensure the conditional pass
2035 2034 maxdist = max(maxdist, defaultmax)
2036 2035 if (d.distance > maxdist or d.deltalen > textlen or
2037 2036 d.compresseddeltalen > textlen * 2 or
2038 2037 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2039 2038 return False
2040 2039
2041 2040 return True
2042 2041
2043 2042 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2044 2043 cachedelta, ifh, dfh, alwayscache=False,
2045 2044 deltacomputer=None):
2046 2045 """internal function to add revisions to the log
2047 2046
2048 2047 see addrevision for argument descriptions.
2049 2048
2050 2049 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2051 2050
2052 2051 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2053 2052 be used.
2054 2053
2055 2054 invariants:
2056 2055 - rawtext is optional (can be None); if not set, cachedelta must be set.
2057 2056 if both are set, they must correspond to each other.
2058 2057 """
2059 2058 if node == nullid:
2060 2059 raise RevlogError(_("%s: attempt to add null revision") %
2061 2060 (self.indexfile))
2062 2061 if node == wdirid:
2063 2062 raise RevlogError(_("%s: attempt to add wdir revision") %
2064 2063 (self.indexfile))
2065 2064
2066 2065 if self._inline:
2067 2066 fh = ifh
2068 2067 else:
2069 2068 fh = dfh
2070 2069
2071 2070 btext = [rawtext]
2072 2071
2073 2072 curr = len(self)
2074 2073 prev = curr - 1
2075 2074 offset = self.end(prev)
2076 2075 p1r, p2r = self.rev(p1), self.rev(p2)
2077 2076
2078 2077 # full versions are inserted when the needed deltas
2079 2078 # become comparable to the uncompressed text
2080 2079 if rawtext is None:
2081 2080 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2082 2081 cachedelta[1])
2083 2082 else:
2084 2083 textlen = len(rawtext)
2085 2084
2086 2085 if deltacomputer is None:
2087 2086 deltacomputer = _deltacomputer(self)
2088 2087
2089 2088 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2090 2089 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2091 2090
2092 2091 if deltainfo is not None:
2093 2092 base = deltainfo.base
2094 2093 chainbase = deltainfo.chainbase
2095 2094 data = deltainfo.data
2096 2095 l = deltainfo.deltalen
2097 2096 else:
2098 2097 rawtext = deltacomputer.buildtext(revinfo, fh)
2099 2098 data = self.compress(rawtext)
2100 2099 l = len(data[1]) + len(data[0])
2101 2100 base = chainbase = curr
2102 2101
2103 2102 e = (offset_type(offset, flags), l, textlen,
2104 2103 base, link, p1r, p2r, node)
2105 2104 self.index.insert(-1, e)
2106 2105 self.nodemap[node] = curr
2107 2106
2108 2107 entry = self._io.packentry(e, self.node, self.version, curr)
2109 2108 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2110 2109
2111 2110 if alwayscache and rawtext is None:
2112 2111 rawtext = deltacomputer._buildtext(revinfo, fh)
2113 2112
2114 2113 if type(rawtext) == bytes: # only accept immutable objects
2115 2114 self._cache = (node, curr, rawtext)
2116 2115 self._chainbasecache[curr] = chainbase
2117 2116 return node
2118 2117
2119 2118 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2120 2119 # Files opened in a+ mode have inconsistent behavior on various
2121 2120 # platforms. Windows requires that a file positioning call be made
2122 2121 # when the file handle transitions between reads and writes. See
2123 2122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2124 2123 # platforms, Python or the platform itself can be buggy. Some versions
2125 2124 # of Solaris have been observed to not append at the end of the file
2126 2125 # if the file was seeked to before the end. See issue4943 for more.
2127 2126 #
2128 2127 # We work around this issue by inserting a seek() before writing.
2129 2128 # Note: This is likely not necessary on Python 3.
2130 2129 ifh.seek(0, os.SEEK_END)
2131 2130 if dfh:
2132 2131 dfh.seek(0, os.SEEK_END)
2133 2132
2134 2133 curr = len(self) - 1
2135 2134 if not self._inline:
2136 2135 transaction.add(self.datafile, offset)
2137 2136 transaction.add(self.indexfile, curr * len(entry))
2138 2137 if data[0]:
2139 2138 dfh.write(data[0])
2140 2139 dfh.write(data[1])
2141 2140 ifh.write(entry)
2142 2141 else:
2143 2142 offset += curr * self._io.size
2144 2143 transaction.add(self.indexfile, offset, curr)
2145 2144 ifh.write(entry)
2146 2145 ifh.write(data[0])
2147 2146 ifh.write(data[1])
2148 2147 self.checkinlinesize(transaction, ifh)
2149 2148
2150 2149 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2151 2150 """
2152 2151 add a delta group
2153 2152
2154 2153 given a set of deltas, add them to the revision log. the
2155 2154 first delta is against its parent, which should be in our
2156 2155 log, the rest are against the previous delta.
2157 2156
2158 2157 If ``addrevisioncb`` is defined, it will be called with arguments of
2159 2158 this revlog and the node that was added.
2160 2159 """
2161 2160
2162 2161 nodes = []
2163 2162
2164 2163 r = len(self)
2165 2164 end = 0
2166 2165 if r:
2167 2166 end = self.end(r - 1)
2168 2167 ifh = self._indexfp("a+")
2169 2168 isize = r * self._io.size
2170 2169 if self._inline:
2171 2170 transaction.add(self.indexfile, end + isize, r)
2172 2171 dfh = None
2173 2172 else:
2174 2173 transaction.add(self.indexfile, isize, r)
2175 2174 transaction.add(self.datafile, end)
2176 2175 dfh = self._datafp("a+")
2177 2176 def flush():
2178 2177 if dfh:
2179 2178 dfh.flush()
2180 2179 ifh.flush()
2181 2180 try:
2182 2181 deltacomputer = _deltacomputer(self)
2183 2182 # loop through our set of deltas
2184 2183 for data in deltas:
2185 2184 node, p1, p2, linknode, deltabase, delta, flags = data
2186 2185 link = linkmapper(linknode)
2187 2186 flags = flags or REVIDX_DEFAULT_FLAGS
2188 2187
2189 2188 nodes.append(node)
2190 2189
2191 2190 if node in self.nodemap:
2192 2191 # this can happen if two branches make the same change
2193 2192 continue
2194 2193
2195 2194 for p in (p1, p2):
2196 2195 if p not in self.nodemap:
2197 2196 raise LookupError(p, self.indexfile,
2198 2197 _('unknown parent'))
2199 2198
2200 2199 if deltabase not in self.nodemap:
2201 2200 raise LookupError(deltabase, self.indexfile,
2202 2201 _('unknown delta base'))
2203 2202
2204 2203 baserev = self.rev(deltabase)
2205 2204
2206 2205 if baserev != nullrev and self.iscensored(baserev):
2207 2206 # if base is censored, delta must be full replacement in a
2208 2207 # single patch operation
2209 2208 hlen = struct.calcsize(">lll")
2210 2209 oldlen = self.rawsize(baserev)
2211 2210 newlen = len(delta) - hlen
2212 2211 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2213 2212 raise error.CensoredBaseError(self.indexfile,
2214 2213 self.node(baserev))
2215 2214
2216 2215 if not flags and self._peek_iscensored(baserev, delta, flush):
2217 2216 flags |= REVIDX_ISCENSORED
2218 2217
2219 2218 # We assume consumers of addrevisioncb will want to retrieve
2220 2219 # the added revision, which will require a call to
2221 2220 # revision(). revision() will fast path if there is a cache
2222 2221 # hit. So, we tell _addrevision() to always cache in this case.
2223 2222 # We're only using addgroup() in the context of changegroup
2224 2223 # generation so the revision data can always be handled as raw
2225 2224 # by the flagprocessor.
2226 2225 self._addrevision(node, None, transaction, link,
2227 2226 p1, p2, flags, (baserev, delta),
2228 2227 ifh, dfh,
2229 2228 alwayscache=bool(addrevisioncb),
2230 2229 deltacomputer=deltacomputer)
2231 2230
2232 2231 if addrevisioncb:
2233 2232 addrevisioncb(self, node)
2234 2233
2235 2234 if not dfh and not self._inline:
2236 2235 # addrevision switched from inline to conventional
2237 2236 # reopen the index
2238 2237 ifh.close()
2239 2238 dfh = self._datafp("a+")
2240 2239 ifh = self._indexfp("a+")
2241 2240 finally:
2242 2241 if dfh:
2243 2242 dfh.close()
2244 2243 ifh.close()
2245 2244
2246 2245 return nodes
2247 2246
2248 2247 def iscensored(self, rev):
2249 2248 """Check if a file revision is censored."""
2250 2249 return False
2251 2250
2252 2251 def _peek_iscensored(self, baserev, delta, flush):
2253 2252 """Quickly check if a delta produces a censored revision."""
2254 2253 return False
2255 2254
2256 2255 def getstrippoint(self, minlink):
2257 2256 """find the minimum rev that must be stripped to strip the linkrev
2258 2257
2259 2258 Returns a tuple containing the minimum rev and a set of all revs that
2260 2259 have linkrevs that will be broken by this strip.
2261 2260 """
2262 2261 brokenrevs = set()
2263 2262 strippoint = len(self)
2264 2263
2265 2264 heads = {}
2266 2265 futurelargelinkrevs = set()
2267 2266 for head in self.headrevs():
2268 2267 headlinkrev = self.linkrev(head)
2269 2268 heads[head] = headlinkrev
2270 2269 if headlinkrev >= minlink:
2271 2270 futurelargelinkrevs.add(headlinkrev)
2272 2271
2273 2272 # This algorithm involves walking down the rev graph, starting at the
2274 2273 # heads. Since the revs are topologically sorted according to linkrev,
2275 2274 # once all head linkrevs are below the minlink, we know there are
2276 2275 # no more revs that could have a linkrev greater than minlink.
2277 2276 # So we can stop walking.
2278 2277 while futurelargelinkrevs:
2279 2278 strippoint -= 1
2280 2279 linkrev = heads.pop(strippoint)
2281 2280
2282 2281 if linkrev < minlink:
2283 2282 brokenrevs.add(strippoint)
2284 2283 else:
2285 2284 futurelargelinkrevs.remove(linkrev)
2286 2285
2287 2286 for p in self.parentrevs(strippoint):
2288 2287 if p != nullrev:
2289 2288 plinkrev = self.linkrev(p)
2290 2289 heads[p] = plinkrev
2291 2290 if plinkrev >= minlink:
2292 2291 futurelargelinkrevs.add(plinkrev)
2293 2292
2294 2293 return strippoint, brokenrevs
2295 2294
2296 2295 def strip(self, minlink, transaction):
2297 2296 """truncate the revlog on the first revision with a linkrev >= minlink
2298 2297
2299 2298 This function is called when we're stripping revision minlink and
2300 2299 its descendants from the repository.
2301 2300
2302 2301 We have to remove all revisions with linkrev >= minlink, because
2303 2302 the equivalent changelog revisions will be renumbered after the
2304 2303 strip.
2305 2304
2306 2305 So we truncate the revlog on the first of these revisions, and
2307 2306 trust that the caller has saved the revisions that shouldn't be
2308 2307 removed and that it'll re-add them after this truncation.
2309 2308 """
2310 2309 if len(self) == 0:
2311 2310 return
2312 2311
2313 2312 rev, _ = self.getstrippoint(minlink)
2314 2313 if rev == len(self):
2315 2314 return
2316 2315
2317 2316 # first truncate the files on disk
2318 2317 end = self.start(rev)
2319 2318 if not self._inline:
2320 2319 transaction.add(self.datafile, end)
2321 2320 end = rev * self._io.size
2322 2321 else:
2323 2322 end += rev * self._io.size
2324 2323
2325 2324 transaction.add(self.indexfile, end)
2326 2325
2327 2326 # then reset internal state in memory to forget those revisions
2328 2327 self._cache = None
2329 2328 self._chaininfocache = {}
2330 2329 self._chunkclear()
2331 2330 for x in xrange(rev, len(self)):
2332 2331 del self.nodemap[self.node(x)]
2333 2332
2334 2333 del self.index[rev:-1]
2335 2334
2336 2335 def checksize(self):
2337 2336 expected = 0
2338 2337 if len(self):
2339 2338 expected = max(0, self.end(len(self) - 1))
2340 2339
2341 2340 try:
2342 2341 f = self._datafp()
2343 2342 f.seek(0, 2)
2344 2343 actual = f.tell()
2345 2344 f.close()
2346 2345 dd = actual - expected
2347 2346 except IOError as inst:
2348 2347 if inst.errno != errno.ENOENT:
2349 2348 raise
2350 2349 dd = 0
2351 2350
2352 2351 try:
2353 2352 f = self.opener(self.indexfile)
2354 2353 f.seek(0, 2)
2355 2354 actual = f.tell()
2356 2355 f.close()
2357 2356 s = self._io.size
2358 2357 i = max(0, actual // s)
2359 2358 di = actual - (i * s)
2360 2359 if self._inline:
2361 2360 databytes = 0
2362 2361 for r in self:
2363 2362 databytes += max(0, self.length(r))
2364 2363 dd = 0
2365 2364 di = actual - len(self) * s - databytes
2366 2365 except IOError as inst:
2367 2366 if inst.errno != errno.ENOENT:
2368 2367 raise
2369 2368 di = 0
2370 2369
2371 2370 return (dd, di)
2372 2371
2373 2372 def files(self):
2374 2373 res = [self.indexfile]
2375 2374 if not self._inline:
2376 2375 res.append(self.datafile)
2377 2376 return res
2378 2377
2379 2378 DELTAREUSEALWAYS = 'always'
2380 2379 DELTAREUSESAMEREVS = 'samerevs'
2381 2380 DELTAREUSENEVER = 'never'
2382 2381
2383 2382 DELTAREUSEFULLADD = 'fulladd'
2384 2383
2385 2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2386 2385
2387 2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2388 2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2389 2388 """Copy this revlog to another, possibly with format changes.
2390 2389
2391 2390 The destination revlog will contain the same revisions and nodes.
2392 2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2393 2392 differences.
2394 2393
2395 2394 The ``deltareuse`` argument control how deltas from the existing revlog
2396 2395 are preserved in the destination revlog. The argument can have the
2397 2396 following values:
2398 2397
2399 2398 DELTAREUSEALWAYS
2400 2399 Deltas will always be reused (if possible), even if the destination
2401 2400 revlog would not select the same revisions for the delta. This is the
2402 2401 fastest mode of operation.
2403 2402 DELTAREUSESAMEREVS
2404 2403 Deltas will be reused if the destination revlog would pick the same
2405 2404 revisions for the delta. This mode strikes a balance between speed
2406 2405 and optimization.
2407 2406 DELTAREUSENEVER
2408 2407 Deltas will never be reused. This is the slowest mode of execution.
2409 2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2410 2409 algorithm changes).
2411 2410
2412 2411 Delta computation can be slow, so the choice of delta reuse policy can
2413 2412 significantly affect run time.
2414 2413
2415 2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2416 2415 two extremes. Deltas will be reused if they are appropriate. But if the
2417 2416 delta could choose a better revision, it will do so. This means if you
2418 2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2419 2418 deltas will be recomputed if the delta's parent isn't a parent of the
2420 2419 revision.
2421 2420
2422 2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2423 2422 controls whether to compute deltas against both parents for merges.
2424 2423 By default, the current default is used.
2425 2424 """
2426 2425 if deltareuse not in self.DELTAREUSEALL:
2427 2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2428 2427
2429 2428 if len(destrevlog):
2430 2429 raise ValueError(_('destination revlog is not empty'))
2431 2430
2432 2431 if getattr(self, 'filteredrevs', None):
2433 2432 raise ValueError(_('source revlog has filtered revisions'))
2434 2433 if getattr(destrevlog, 'filteredrevs', None):
2435 2434 raise ValueError(_('destination revlog has filtered revisions'))
2436 2435
2437 2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2438 2437 oldlazydeltabase = destrevlog._lazydeltabase
2439 2438 oldamd = destrevlog._aggressivemergedeltas
2440 2439
2441 2440 try:
2442 2441 if deltareuse == self.DELTAREUSEALWAYS:
2443 2442 destrevlog._lazydeltabase = True
2444 2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2445 2444 destrevlog._lazydeltabase = False
2446 2445
2447 2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2448 2447
2449 2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2450 2449 self.DELTAREUSESAMEREVS)
2451 2450
2452 2451 deltacomputer = _deltacomputer(destrevlog)
2453 2452 index = self.index
2454 2453 for rev in self:
2455 2454 entry = index[rev]
2456 2455
2457 2456 # Some classes override linkrev to take filtered revs into
2458 2457 # account. Use raw entry from index.
2459 2458 flags = entry[0] & 0xffff
2460 2459 linkrev = entry[4]
2461 2460 p1 = index[entry[5]][7]
2462 2461 p2 = index[entry[6]][7]
2463 2462 node = entry[7]
2464 2463
2465 2464 # (Possibly) reuse the delta from the revlog if allowed and
2466 2465 # the revlog chunk is a delta.
2467 2466 cachedelta = None
2468 2467 rawtext = None
2469 2468 if populatecachedelta:
2470 2469 dp = self.deltaparent(rev)
2471 2470 if dp != nullrev:
2472 2471 cachedelta = (dp, str(self._chunk(rev)))
2473 2472
2474 2473 if not cachedelta:
2475 2474 rawtext = self.revision(rev, raw=True)
2476 2475
2477 2476
2478 2477 if deltareuse == self.DELTAREUSEFULLADD:
2479 2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2480 2479 cachedelta=cachedelta,
2481 2480 node=node, flags=flags,
2482 2481 deltacomputer=deltacomputer)
2483 2482 else:
2484 2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2485 2484 checkambig=False)
2486 2485 dfh = None
2487 2486 if not destrevlog._inline:
2488 2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2489 2488 try:
2490 2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2491 2490 p2, flags, cachedelta, ifh, dfh,
2492 2491 deltacomputer=deltacomputer)
2493 2492 finally:
2494 2493 if dfh:
2495 2494 dfh.close()
2496 2495 ifh.close()
2497 2496
2498 2497 if addrevisioncb:
2499 2498 addrevisioncb(self, rev, node)
2500 2499 finally:
2501 2500 destrevlog._lazydeltabase = oldlazydeltabase
2502 2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now