##// END OF EJS Templates
revlog: use context manager for index file lifetime in checkinlinesize...
Boris Feld -
r35989:0f2c51af default
parent child Browse files
Show More
@@ -1,2498 +1,2498
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80
81 81 # max size of revlog with inline data
82 82 _maxinline = 131072
83 83 _chunksize = 1048576
84 84
85 85 RevlogError = error.RevlogError
86 86 LookupError = error.LookupError
87 87 CensoredNodeError = error.CensoredNodeError
88 88 ProgrammingError = error.ProgrammingError
89 89
90 90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 91 _flagprocessors = {
92 92 REVIDX_ISCENSORED: None,
93 93 }
94 94
95 95 def addflagprocessor(flag, processor):
96 96 """Register a flag processor on a revision data flag.
97 97
98 98 Invariant:
99 99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 100 - Only one flag processor can be registered on a specific flag.
101 101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 102 following signatures:
103 103 - (read) f(self, rawtext) -> text, bool
104 104 - (write) f(self, text) -> rawtext, bool
105 105 - (raw) f(self, rawtext) -> bool
106 106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 107 directly visible to the user.
108 108 The boolean returned by these transforms is used to determine whether
109 109 the returned text can be used for hash integrity checking. For example,
110 110 if "write" returns False, then "text" is used to generate hash. If
111 111 "write" returns True, that basically means "rawtext" returned by "write"
112 112 should be used to generate hash. Usually, "write" and "read" return
113 113 different booleans. And "raw" returns a same boolean as "write".
114 114
115 115 Note: The 'raw' transform is used for changegroup generation and in some
116 116 debug commands. In this case the transform only indicates whether the
117 117 contents can be used for hash integrity checks.
118 118 """
119 119 if not flag & REVIDX_KNOWN_FLAGS:
120 120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag not in REVIDX_FLAGS_ORDER:
123 123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag in _flagprocessors:
126 126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 127 raise error.Abort(msg)
128 128 _flagprocessors[flag] = processor
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 _nullhash = hashlib.sha1(nullid)
142 142
143 143 def hash(text, p1, p2):
144 144 """generate a hash from the given text and its parent hashes
145 145
146 146 This hash combines both the current file contents and its history
147 147 in a manner that makes it easy to distinguish nodes with the same
148 148 content in the revision graph.
149 149 """
150 150 # As of now, if one of the parent node is null, p2 is null
151 151 if p2 == nullid:
152 152 # deep copy of a hash is faster than creating one
153 153 s = _nullhash.copy()
154 154 s.update(p1)
155 155 else:
156 156 # none of the parent nodes are nullid
157 157 if p1 < p2:
158 158 a = p1
159 159 b = p2
160 160 else:
161 161 a = p2
162 162 b = p1
163 163 s = hashlib.sha1(a)
164 164 s.update(b)
165 165 s.update(text)
166 166 return s.digest()
167 167
168 168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 169 """returns revs[startidx:endidx] without empty trailing revs
170 170 """
171 171 length = revlog.length
172 172
173 173 if endidx is None:
174 174 endidx = len(revs)
175 175
176 176 # Trim empty revs at the end, but never the very first revision of a chain
177 177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 178 endidx -= 1
179 179
180 180 return revs[startidx:endidx]
181 181
182 182 def _slicechunk(revlog, revs):
183 183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 184
185 185 ``revs`` is sliced into groups that should be read in one time.
186 186 Assume that revs are sorted.
187 187 """
188 188 start = revlog.start
189 189 length = revlog.length
190 190
191 191 if len(revs) <= 1:
192 192 yield revs
193 193 return
194 194
195 195 startbyte = start(revs[0])
196 196 endbyte = start(revs[-1]) + length(revs[-1])
197 197 readdata = deltachainspan = endbyte - startbyte
198 198
199 199 chainpayload = sum(length(r) for r in revs)
200 200
201 201 if deltachainspan:
202 202 density = chainpayload / float(deltachainspan)
203 203 else:
204 204 density = 1.0
205 205
206 206 # Store the gaps in a heap to have them sorted by decreasing size
207 207 gapsheap = []
208 208 heapq.heapify(gapsheap)
209 209 prevend = None
210 210 for i, rev in enumerate(revs):
211 211 revstart = start(rev)
212 212 revlen = length(rev)
213 213
214 214 # Skip empty revisions to form larger holes
215 215 if revlen == 0:
216 216 continue
217 217
218 218 if prevend is not None:
219 219 gapsize = revstart - prevend
220 220 # only consider holes that are large enough
221 221 if gapsize > revlog._srmingapsize:
222 222 heapq.heappush(gapsheap, (-gapsize, i))
223 223
224 224 prevend = revstart + revlen
225 225
226 226 # Collect the indices of the largest holes until the density is acceptable
227 227 indicesheap = []
228 228 heapq.heapify(indicesheap)
229 229 while gapsheap and density < revlog._srdensitythreshold:
230 230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 231
232 232 heapq.heappush(indicesheap, gapidx)
233 233
234 234 # the gap sizes are stored as negatives to be sorted decreasingly
235 235 # by the heap
236 236 readdata -= (-oppgapsize)
237 237 if readdata > 0:
238 238 density = chainpayload / float(readdata)
239 239 else:
240 240 density = 1.0
241 241
242 242 # Cut the revs at collected indices
243 243 previdx = 0
244 244 while indicesheap:
245 245 idx = heapq.heappop(indicesheap)
246 246
247 247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 248 if chunk:
249 249 yield chunk
250 250
251 251 previdx = idx
252 252
253 253 chunk = _trimchunk(revlog, revs, previdx)
254 254 if chunk:
255 255 yield chunk
256 256
257 257 @attr.s(slots=True, frozen=True)
258 258 class _deltainfo(object):
259 259 distance = attr.ib()
260 260 deltalen = attr.ib()
261 261 data = attr.ib()
262 262 base = attr.ib()
263 263 chainbase = attr.ib()
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 267 class _deltacomputer(object):
268 268 def __init__(self, revlog):
269 269 self.revlog = revlog
270 270
271 271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 272 """
273 273 Provides revisions that present an interest to be diffed against,
274 274 grouped by level of easiness.
275 275 """
276 276 revlog = self.revlog
277 277 curr = len(revlog)
278 278 prev = curr - 1
279 279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 280
281 281 # should we try to build a delta?
282 282 if prev != nullrev and revlog.storedeltachains:
283 283 tested = set()
284 284 # This condition is true most of the time when processing
285 285 # changegroup data into a generaldelta repo. The only time it
286 286 # isn't true is if this is the first revision in a delta chain
287 287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 289 # Assume what we received from the server is a good choice
290 290 # build delta will reuse the cache
291 291 yield (cachedelta[0],)
292 292 tested.add(cachedelta[0])
293 293
294 294 if revlog._generaldelta:
295 295 # exclude already lazy tested base if any
296 296 parents = [p for p in (p1r, p2r)
297 297 if p != nullrev and p not in tested]
298 298 if parents and not revlog._aggressivemergedeltas:
299 299 # Pick whichever parent is closer to us (to minimize the
300 300 # chance of having to build a fulltext).
301 301 parents = [max(parents)]
302 302 tested.update(parents)
303 303 yield parents
304 304
305 305 if prev not in tested:
306 306 # other approach failed try against prev to hopefully save us a
307 307 # fulltext.
308 308 yield (prev,)
309 309
310 310 def buildtext(self, revinfo, fh):
311 311 """Builds a fulltext version of a revision
312 312
313 313 revinfo: _revisioninfo instance that contains all needed info
314 314 fh: file handle to either the .i or the .d revlog file,
315 315 depending on whether it is inlined or not
316 316 """
317 317 btext = revinfo.btext
318 318 if btext[0] is not None:
319 319 return btext[0]
320 320
321 321 revlog = self.revlog
322 322 cachedelta = revinfo.cachedelta
323 323 flags = revinfo.flags
324 324 node = revinfo.node
325 325
326 326 baserev = cachedelta[0]
327 327 delta = cachedelta[1]
328 328 # special case deltas which replace entire base; no need to decode
329 329 # base revision. this neatly avoids censored bases, which throw when
330 330 # they're decoded.
331 331 hlen = struct.calcsize(">lll")
332 332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 333 len(delta) - hlen):
334 334 btext[0] = delta[hlen:]
335 335 else:
336 336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 337 btext[0] = mdiff.patch(basetext, delta)
338 338
339 339 try:
340 340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 341 btext[0], validatehash = res
342 342 if validatehash:
343 343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 344 if flags & REVIDX_ISCENSORED:
345 345 raise RevlogError(_('node %s is not censored') % node)
346 346 except CensoredNodeError:
347 347 # must pass the censored index flag to add censored revisions
348 348 if not flags & REVIDX_ISCENSORED:
349 349 raise
350 350 return btext[0]
351 351
352 352 def _builddeltadiff(self, base, revinfo, fh):
353 353 revlog = self.revlog
354 354 t = self.buildtext(revinfo, fh)
355 355 if revlog.iscensored(base):
356 356 # deltas based on a censored revision must replace the
357 357 # full content in one patch, so delta works everywhere
358 358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 359 delta = header + t
360 360 else:
361 361 ptext = revlog.revision(base, _df=fh, raw=True)
362 362 delta = mdiff.textdiff(ptext, t)
363 363
364 364 return delta
365 365
366 366 def _builddeltainfo(self, revinfo, base, fh):
367 367 # can we use the cached delta?
368 368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 369 delta = revinfo.cachedelta[1]
370 370 else:
371 371 delta = self._builddeltadiff(base, revinfo, fh)
372 372 revlog = self.revlog
373 373 header, data = revlog.compress(delta)
374 374 deltalen = len(header) + len(data)
375 375 chainbase = revlog.chainbase(base)
376 376 offset = revlog.end(len(revlog) - 1)
377 377 dist = deltalen + offset - revlog.start(chainbase)
378 378 if revlog._generaldelta:
379 379 deltabase = base
380 380 else:
381 381 deltabase = chainbase
382 382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 383 chainlen += 1
384 384 compresseddeltalen += deltalen
385 385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 386 chainbase, chainlen, compresseddeltalen)
387 387
388 388 def finddeltainfo(self, revinfo, fh):
389 389 """Find an acceptable delta against a candidate revision
390 390
391 391 revinfo: information about the revision (instance of _revisioninfo)
392 392 fh: file handle to either the .i or the .d revlog file,
393 393 depending on whether it is inlined or not
394 394
395 395 Returns the first acceptable candidate revision, as ordered by
396 396 _getcandidaterevs
397 397 """
398 398 cachedelta = revinfo.cachedelta
399 399 p1 = revinfo.p1
400 400 p2 = revinfo.p2
401 401 revlog = self.revlog
402 402
403 403 deltainfo = None
404 404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 405 nominateddeltas = []
406 406 for candidaterev in candidaterevs:
407 407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 409 nominateddeltas.append(candidatedelta)
410 410 if nominateddeltas:
411 411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 412 break
413 413
414 414 return deltainfo
415 415
416 416 @attr.s(slots=True, frozen=True)
417 417 class _revisioninfo(object):
418 418 """Information about a revision that allows building its fulltext
419 419 node: expected hash of the revision
420 420 p1, p2: parent revs of the revision
421 421 btext: built text cache consisting of a one-element list
422 422 cachedelta: (baserev, uncompressed_delta) or None
423 423 flags: flags associated to the revision storage
424 424
425 425 One of btext[0] or cachedelta must be set.
426 426 """
427 427 node = attr.ib()
428 428 p1 = attr.ib()
429 429 p2 = attr.ib()
430 430 btext = attr.ib()
431 431 textlen = attr.ib()
432 432 cachedelta = attr.ib()
433 433 flags = attr.ib()
434 434
435 435 # index v0:
436 436 # 4 bytes: offset
437 437 # 4 bytes: compressed length
438 438 # 4 bytes: base rev
439 439 # 4 bytes: link rev
440 440 # 20 bytes: parent 1 nodeid
441 441 # 20 bytes: parent 2 nodeid
442 442 # 20 bytes: nodeid
443 443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 444 indexformatv0_pack = indexformatv0.pack
445 445 indexformatv0_unpack = indexformatv0.unpack
446 446
447 447 class revlogoldio(object):
448 448 def __init__(self):
449 449 self.size = indexformatv0.size
450 450
451 451 def parseindex(self, data, inline):
452 452 s = self.size
453 453 index = []
454 454 nodemap = {nullid: nullrev}
455 455 n = off = 0
456 456 l = len(data)
457 457 while off + s <= l:
458 458 cur = data[off:off + s]
459 459 off += s
460 460 e = indexformatv0_unpack(cur)
461 461 # transform to revlogv1 format
462 462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 464 index.append(e2)
465 465 nodemap[e[6]] = n
466 466 n += 1
467 467
468 468 # add the magic null revision at -1
469 469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 return index, nodemap, None
472 472
473 473 def packentry(self, entry, node, version, rev):
474 474 if gettype(entry[0]):
475 475 raise RevlogError(_('index entry flags need revlog version 1'))
476 476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 477 node(entry[5]), node(entry[6]), entry[7])
478 478 return indexformatv0_pack(*e2)
479 479
480 480 # index ng:
481 481 # 6 bytes: offset
482 482 # 2 bytes: flags
483 483 # 4 bytes: compressed length
484 484 # 4 bytes: uncompressed length
485 485 # 4 bytes: base rev
486 486 # 4 bytes: link rev
487 487 # 4 bytes: parent 1 rev
488 488 # 4 bytes: parent 2 rev
489 489 # 32 bytes: nodeid
490 490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 491 indexformatng_pack = indexformatng.pack
492 492 versionformat = struct.Struct(">I")
493 493 versionformat_pack = versionformat.pack
494 494 versionformat_unpack = versionformat.unpack
495 495
496 496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 497 # signed integer)
498 498 _maxentrysize = 0x7fffffff
499 499
500 500 class revlogio(object):
501 501 def __init__(self):
502 502 self.size = indexformatng.size
503 503
504 504 def parseindex(self, data, inline):
505 505 # call the C implementation to parse the index data
506 506 index, cache = parsers.parse_index2(data, inline)
507 507 return index, getattr(index, 'nodemap', None), cache
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 p = indexformatng_pack(*entry)
511 511 if rev == 0:
512 512 p = versionformat_pack(version) + p[4:]
513 513 return p
514 514
515 515 class revlog(object):
516 516 """
517 517 the underlying revision storage object
518 518
519 519 A revlog consists of two parts, an index and the revision data.
520 520
521 521 The index is a file with a fixed record size containing
522 522 information on each revision, including its nodeid (hash), the
523 523 nodeids of its parents, the position and offset of its data within
524 524 the data file, and the revision it's based on. Finally, each entry
525 525 contains a linkrev entry that can serve as a pointer to external
526 526 data.
527 527
528 528 The revision data itself is a linear collection of data chunks.
529 529 Each chunk represents a revision and is usually represented as a
530 530 delta against the previous chunk. To bound lookup time, runs of
531 531 deltas are limited to about 2 times the length of the original
532 532 version data. This makes retrieval of a version proportional to
533 533 its size, or O(1) relative to the number of revisions.
534 534
535 535 Both pieces of the revlog are written to in an append-only
536 536 fashion, which means we never need to rewrite a file to insert or
537 537 remove data, and can use some simple techniques to avoid the need
538 538 for locking while reading.
539 539
540 540 If checkambig, indexfile is opened with checkambig=True at
541 541 writing, to avoid file stat ambiguity.
542 542
543 543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 544 index will be mmapped rather than read if it is larger than the
545 545 configured threshold.
546 546 """
547 547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 548 mmaplargeindex=False):
549 549 """
550 550 create a revlog object
551 551
552 552 opener is a function that abstracts the file opening operation
553 553 and can be used to implement COW semantics or the like.
554 554 """
555 555 self.indexfile = indexfile
556 556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 557 self.opener = opener
558 558 # When True, indexfile is opened with checkambig=True at writing, to
559 559 # avoid file stat ambiguity.
560 560 self._checkambig = checkambig
561 561 # 3-tuple of (node, rev, text) for a raw revision.
562 562 self._cache = None
563 563 # Maps rev to chain base rev.
564 564 self._chainbasecache = util.lrucachedict(100)
565 565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 566 self._chunkcache = (0, '')
567 567 # How much data to read and cache into the raw revlog data cache.
568 568 self._chunkcachesize = 65536
569 569 self._maxchainlen = None
570 570 self._aggressivemergedeltas = False
571 571 self.index = []
572 572 # Mapping of partial identifiers to full nodes.
573 573 self._pcache = {}
574 574 # Mapping of revision integer to full node.
575 575 self._nodecache = {nullid: nullrev}
576 576 self._nodepos = None
577 577 self._compengine = 'zlib'
578 578 self._maxdeltachainspan = -1
579 579 self._withsparseread = False
580 580 self._srdensitythreshold = 0.25
581 581 self._srmingapsize = 262144
582 582
583 583 mmapindexthreshold = None
584 584 v = REVLOG_DEFAULT_VERSION
585 585 opts = getattr(opener, 'options', None)
586 586 if opts is not None:
587 587 if 'revlogv2' in opts:
588 588 # version 2 revlogs always use generaldelta.
589 589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 590 elif 'revlogv1' in opts:
591 591 if 'generaldelta' in opts:
592 592 v |= FLAG_GENERALDELTA
593 593 else:
594 594 v = 0
595 595 if 'chunkcachesize' in opts:
596 596 self._chunkcachesize = opts['chunkcachesize']
597 597 if 'maxchainlen' in opts:
598 598 self._maxchainlen = opts['maxchainlen']
599 599 if 'aggressivemergedeltas' in opts:
600 600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 602 if 'compengine' in opts:
603 603 self._compengine = opts['compengine']
604 604 if 'maxdeltachainspan' in opts:
605 605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 607 mmapindexthreshold = opts['mmapindexthreshold']
608 608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 609 if 'sparse-read-density-threshold' in opts:
610 610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 611 if 'sparse-read-min-gap-size' in opts:
612 612 self._srmingapsize = opts['sparse-read-min-gap-size']
613 613
614 614 if self._chunkcachesize <= 0:
615 615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 616 'than 0') % self._chunkcachesize)
617 617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 619 'of 2') % self._chunkcachesize)
620 620
621 621 indexdata = ''
622 622 self._initempty = True
623 623 try:
624 624 with self._indexfp() as f:
625 625 if (mmapindexthreshold is not None and
626 626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 627 indexdata = util.buffer(util.mmapread(f))
628 628 else:
629 629 indexdata = f.read()
630 630 if len(indexdata) > 0:
631 631 v = versionformat_unpack(indexdata[:4])[0]
632 632 self._initempty = False
633 633 except IOError as inst:
634 634 if inst.errno != errno.ENOENT:
635 635 raise
636 636
637 637 self.version = v
638 638 self._inline = v & FLAG_INLINE_DATA
639 639 self._generaldelta = v & FLAG_GENERALDELTA
640 640 flags = v & ~0xFFFF
641 641 fmt = v & 0xFFFF
642 642 if fmt == REVLOGV0:
643 643 if flags:
644 644 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 645 'revlog %s') %
646 646 (flags >> 16, fmt, self.indexfile))
647 647 elif fmt == REVLOGV1:
648 648 if flags & ~REVLOGV1_FLAGS:
649 649 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 650 'revlog %s') %
651 651 (flags >> 16, fmt, self.indexfile))
652 652 elif fmt == REVLOGV2:
653 653 if flags & ~REVLOGV2_FLAGS:
654 654 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 655 'revlog %s') %
656 656 (flags >> 16, fmt, self.indexfile))
657 657 else:
658 658 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 659 (fmt, self.indexfile))
660 660
661 661 self.storedeltachains = True
662 662
663 663 self._io = revlogio()
664 664 if self.version == REVLOGV0:
665 665 self._io = revlogoldio()
666 666 try:
667 667 d = self._io.parseindex(indexdata, self._inline)
668 668 except (ValueError, IndexError):
669 669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 670 self.index, nodemap, self._chunkcache = d
671 671 if nodemap is not None:
672 672 self.nodemap = self._nodecache = nodemap
673 673 if not self._chunkcache:
674 674 self._chunkclear()
675 675 # revnum -> (chain-length, sum-delta-length)
676 676 self._chaininfocache = {}
677 677 # revlog header -> revlog compressor
678 678 self._decompressors = {}
679 679
680 680 @util.propertycache
681 681 def _compressor(self):
682 682 return util.compengines[self._compengine].revlogcompressor()
683 683
684 684 def _indexfp(self, mode='r'):
685 685 """file object for the revlog's index file"""
686 686 args = {r'mode': mode}
687 687 if mode != 'r':
688 688 args[r'checkambig'] = self._checkambig
689 689 if mode == 'w':
690 690 args[r'atomictemp'] = True
691 691 return self.opener(self.indexfile, **args)
692 692
693 693 def _datafp(self, mode='r'):
694 694 """file object for the revlog's data file"""
695 695 return self.opener(self.datafile, mode=mode)
696 696
697 697 def tip(self):
698 698 return self.node(len(self.index) - 2)
699 699 def __contains__(self, rev):
700 700 return 0 <= rev < len(self)
701 701 def __len__(self):
702 702 return len(self.index) - 1
703 703 def __iter__(self):
704 704 return iter(xrange(len(self)))
705 705 def revs(self, start=0, stop=None):
706 706 """iterate over all rev in this revlog (from start to stop)"""
707 707 step = 1
708 708 if stop is not None:
709 709 if start > stop:
710 710 step = -1
711 711 stop += step
712 712 else:
713 713 stop = len(self)
714 714 return xrange(start, stop, step)
715 715
716 716 @util.propertycache
717 717 def nodemap(self):
718 718 self.rev(self.node(0))
719 719 return self._nodecache
720 720
721 721 def hasnode(self, node):
722 722 try:
723 723 self.rev(node)
724 724 return True
725 725 except KeyError:
726 726 return False
727 727
728 728 def clearcaches(self):
729 729 self._cache = None
730 730 self._chainbasecache.clear()
731 731 self._chunkcache = (0, '')
732 732 self._pcache = {}
733 733
734 734 try:
735 735 self._nodecache.clearcaches()
736 736 except AttributeError:
737 737 self._nodecache = {nullid: nullrev}
738 738 self._nodepos = None
739 739
740 740 def rev(self, node):
741 741 try:
742 742 return self._nodecache[node]
743 743 except TypeError:
744 744 raise
745 745 except RevlogError:
746 746 # parsers.c radix tree lookup failed
747 747 if node == wdirid:
748 748 raise error.WdirUnsupported
749 749 raise LookupError(node, self.indexfile, _('no node'))
750 750 except KeyError:
751 751 # pure python cache lookup failed
752 752 n = self._nodecache
753 753 i = self.index
754 754 p = self._nodepos
755 755 if p is None:
756 756 p = len(i) - 2
757 757 for r in xrange(p, -1, -1):
758 758 v = i[r][7]
759 759 n[v] = r
760 760 if v == node:
761 761 self._nodepos = r - 1
762 762 return r
763 763 if node == wdirid:
764 764 raise error.WdirUnsupported
765 765 raise LookupError(node, self.indexfile, _('no node'))
766 766
767 767 # Accessors for index entries.
768 768
769 769 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
770 770 # are flags.
771 771 def start(self, rev):
772 772 return int(self.index[rev][0] >> 16)
773 773
774 774 def flags(self, rev):
775 775 return self.index[rev][0] & 0xFFFF
776 776
777 777 def length(self, rev):
778 778 return self.index[rev][1]
779 779
780 780 def rawsize(self, rev):
781 781 """return the length of the uncompressed text for a given revision"""
782 782 l = self.index[rev][2]
783 783 if l >= 0:
784 784 return l
785 785
786 786 t = self.revision(rev, raw=True)
787 787 return len(t)
788 788
789 789 def size(self, rev):
790 790 """length of non-raw text (processed by a "read" flag processor)"""
791 791 # fast path: if no "read" flag processor could change the content,
792 792 # size is rawsize. note: ELLIPSIS is known to not change the content.
793 793 flags = self.flags(rev)
794 794 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
795 795 return self.rawsize(rev)
796 796
797 797 return len(self.revision(rev, raw=False))
798 798
799 799 def chainbase(self, rev):
800 800 base = self._chainbasecache.get(rev)
801 801 if base is not None:
802 802 return base
803 803
804 804 index = self.index
805 805 base = index[rev][3]
806 806 while base != rev:
807 807 rev = base
808 808 base = index[rev][3]
809 809
810 810 self._chainbasecache[rev] = base
811 811 return base
812 812
813 813 def linkrev(self, rev):
814 814 return self.index[rev][4]
815 815
816 816 def parentrevs(self, rev):
817 817 try:
818 818 entry = self.index[rev]
819 819 except IndexError:
820 820 if rev == wdirrev:
821 821 raise error.WdirUnsupported
822 822 raise
823 823
824 824 return entry[5], entry[6]
825 825
826 826 def node(self, rev):
827 827 try:
828 828 return self.index[rev][7]
829 829 except IndexError:
830 830 if rev == wdirrev:
831 831 raise error.WdirUnsupported
832 832 raise
833 833
834 834 # Derived from index values.
835 835
836 836 def end(self, rev):
837 837 return self.start(rev) + self.length(rev)
838 838
839 839 def parents(self, node):
840 840 i = self.index
841 841 d = i[self.rev(node)]
842 842 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
843 843
844 844 def chainlen(self, rev):
845 845 return self._chaininfo(rev)[0]
846 846
847 847 def _chaininfo(self, rev):
848 848 chaininfocache = self._chaininfocache
849 849 if rev in chaininfocache:
850 850 return chaininfocache[rev]
851 851 index = self.index
852 852 generaldelta = self._generaldelta
853 853 iterrev = rev
854 854 e = index[iterrev]
855 855 clen = 0
856 856 compresseddeltalen = 0
857 857 while iterrev != e[3]:
858 858 clen += 1
859 859 compresseddeltalen += e[1]
860 860 if generaldelta:
861 861 iterrev = e[3]
862 862 else:
863 863 iterrev -= 1
864 864 if iterrev in chaininfocache:
865 865 t = chaininfocache[iterrev]
866 866 clen += t[0]
867 867 compresseddeltalen += t[1]
868 868 break
869 869 e = index[iterrev]
870 870 else:
871 871 # Add text length of base since decompressing that also takes
872 872 # work. For cache hits the length is already included.
873 873 compresseddeltalen += e[1]
874 874 r = (clen, compresseddeltalen)
875 875 chaininfocache[rev] = r
876 876 return r
877 877
878 878 def _deltachain(self, rev, stoprev=None):
879 879 """Obtain the delta chain for a revision.
880 880
881 881 ``stoprev`` specifies a revision to stop at. If not specified, we
882 882 stop at the base of the chain.
883 883
884 884 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
885 885 revs in ascending order and ``stopped`` is a bool indicating whether
886 886 ``stoprev`` was hit.
887 887 """
888 888 # Try C implementation.
889 889 try:
890 890 return self.index.deltachain(rev, stoprev, self._generaldelta)
891 891 except AttributeError:
892 892 pass
893 893
894 894 chain = []
895 895
896 896 # Alias to prevent attribute lookup in tight loop.
897 897 index = self.index
898 898 generaldelta = self._generaldelta
899 899
900 900 iterrev = rev
901 901 e = index[iterrev]
902 902 while iterrev != e[3] and iterrev != stoprev:
903 903 chain.append(iterrev)
904 904 if generaldelta:
905 905 iterrev = e[3]
906 906 else:
907 907 iterrev -= 1
908 908 e = index[iterrev]
909 909
910 910 if iterrev == stoprev:
911 911 stopped = True
912 912 else:
913 913 chain.append(iterrev)
914 914 stopped = False
915 915
916 916 chain.reverse()
917 917 return chain, stopped
918 918
919 919 def ancestors(self, revs, stoprev=0, inclusive=False):
920 920 """Generate the ancestors of 'revs' in reverse topological order.
921 921 Does not generate revs lower than stoprev.
922 922
923 923 See the documentation for ancestor.lazyancestors for more details."""
924 924
925 925 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
926 926 inclusive=inclusive)
927 927
928 928 def descendants(self, revs):
929 929 """Generate the descendants of 'revs' in revision order.
930 930
931 931 Yield a sequence of revision numbers starting with a child of
932 932 some rev in revs, i.e., each revision is *not* considered a
933 933 descendant of itself. Results are ordered by revision number (a
934 934 topological sort)."""
935 935 first = min(revs)
936 936 if first == nullrev:
937 937 for i in self:
938 938 yield i
939 939 return
940 940
941 941 seen = set(revs)
942 942 for i in self.revs(start=first + 1):
943 943 for x in self.parentrevs(i):
944 944 if x != nullrev and x in seen:
945 945 seen.add(i)
946 946 yield i
947 947 break
948 948
949 949 def findcommonmissing(self, common=None, heads=None):
950 950 """Return a tuple of the ancestors of common and the ancestors of heads
951 951 that are not ancestors of common. In revset terminology, we return the
952 952 tuple:
953 953
954 954 ::common, (::heads) - (::common)
955 955
956 956 The list is sorted by revision number, meaning it is
957 957 topologically sorted.
958 958
959 959 'heads' and 'common' are both lists of node IDs. If heads is
960 960 not supplied, uses all of the revlog's heads. If common is not
961 961 supplied, uses nullid."""
962 962 if common is None:
963 963 common = [nullid]
964 964 if heads is None:
965 965 heads = self.heads()
966 966
967 967 common = [self.rev(n) for n in common]
968 968 heads = [self.rev(n) for n in heads]
969 969
970 970 # we want the ancestors, but inclusive
971 971 class lazyset(object):
972 972 def __init__(self, lazyvalues):
973 973 self.addedvalues = set()
974 974 self.lazyvalues = lazyvalues
975 975
976 976 def __contains__(self, value):
977 977 return value in self.addedvalues or value in self.lazyvalues
978 978
979 979 def __iter__(self):
980 980 added = self.addedvalues
981 981 for r in added:
982 982 yield r
983 983 for r in self.lazyvalues:
984 984 if not r in added:
985 985 yield r
986 986
987 987 def add(self, value):
988 988 self.addedvalues.add(value)
989 989
990 990 def update(self, values):
991 991 self.addedvalues.update(values)
992 992
993 993 has = lazyset(self.ancestors(common))
994 994 has.add(nullrev)
995 995 has.update(common)
996 996
997 997 # take all ancestors from heads that aren't in has
998 998 missing = set()
999 999 visit = collections.deque(r for r in heads if r not in has)
1000 1000 while visit:
1001 1001 r = visit.popleft()
1002 1002 if r in missing:
1003 1003 continue
1004 1004 else:
1005 1005 missing.add(r)
1006 1006 for p in self.parentrevs(r):
1007 1007 if p not in has:
1008 1008 visit.append(p)
1009 1009 missing = list(missing)
1010 1010 missing.sort()
1011 1011 return has, [self.node(miss) for miss in missing]
1012 1012
1013 1013 def incrementalmissingrevs(self, common=None):
1014 1014 """Return an object that can be used to incrementally compute the
1015 1015 revision numbers of the ancestors of arbitrary sets that are not
1016 1016 ancestors of common. This is an ancestor.incrementalmissingancestors
1017 1017 object.
1018 1018
1019 1019 'common' is a list of revision numbers. If common is not supplied, uses
1020 1020 nullrev.
1021 1021 """
1022 1022 if common is None:
1023 1023 common = [nullrev]
1024 1024
1025 1025 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1026 1026
1027 1027 def findmissingrevs(self, common=None, heads=None):
1028 1028 """Return the revision numbers of the ancestors of heads that
1029 1029 are not ancestors of common.
1030 1030
1031 1031 More specifically, return a list of revision numbers corresponding to
1032 1032 nodes N such that every N satisfies the following constraints:
1033 1033
1034 1034 1. N is an ancestor of some node in 'heads'
1035 1035 2. N is not an ancestor of any node in 'common'
1036 1036
1037 1037 The list is sorted by revision number, meaning it is
1038 1038 topologically sorted.
1039 1039
1040 1040 'heads' and 'common' are both lists of revision numbers. If heads is
1041 1041 not supplied, uses all of the revlog's heads. If common is not
1042 1042 supplied, uses nullid."""
1043 1043 if common is None:
1044 1044 common = [nullrev]
1045 1045 if heads is None:
1046 1046 heads = self.headrevs()
1047 1047
1048 1048 inc = self.incrementalmissingrevs(common=common)
1049 1049 return inc.missingancestors(heads)
1050 1050
1051 1051 def findmissing(self, common=None, heads=None):
1052 1052 """Return the ancestors of heads that are not ancestors of common.
1053 1053
1054 1054 More specifically, return a list of nodes N such that every N
1055 1055 satisfies the following constraints:
1056 1056
1057 1057 1. N is an ancestor of some node in 'heads'
1058 1058 2. N is not an ancestor of any node in 'common'
1059 1059
1060 1060 The list is sorted by revision number, meaning it is
1061 1061 topologically sorted.
1062 1062
1063 1063 'heads' and 'common' are both lists of node IDs. If heads is
1064 1064 not supplied, uses all of the revlog's heads. If common is not
1065 1065 supplied, uses nullid."""
1066 1066 if common is None:
1067 1067 common = [nullid]
1068 1068 if heads is None:
1069 1069 heads = self.heads()
1070 1070
1071 1071 common = [self.rev(n) for n in common]
1072 1072 heads = [self.rev(n) for n in heads]
1073 1073
1074 1074 inc = self.incrementalmissingrevs(common=common)
1075 1075 return [self.node(r) for r in inc.missingancestors(heads)]
1076 1076
1077 1077 def nodesbetween(self, roots=None, heads=None):
1078 1078 """Return a topological path from 'roots' to 'heads'.
1079 1079
1080 1080 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1081 1081 topologically sorted list of all nodes N that satisfy both of
1082 1082 these constraints:
1083 1083
1084 1084 1. N is a descendant of some node in 'roots'
1085 1085 2. N is an ancestor of some node in 'heads'
1086 1086
1087 1087 Every node is considered to be both a descendant and an ancestor
1088 1088 of itself, so every reachable node in 'roots' and 'heads' will be
1089 1089 included in 'nodes'.
1090 1090
1091 1091 'outroots' is the list of reachable nodes in 'roots', i.e., the
1092 1092 subset of 'roots' that is returned in 'nodes'. Likewise,
1093 1093 'outheads' is the subset of 'heads' that is also in 'nodes'.
1094 1094
1095 1095 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1096 1096 unspecified, uses nullid as the only root. If 'heads' is
1097 1097 unspecified, uses list of all of the revlog's heads."""
1098 1098 nonodes = ([], [], [])
1099 1099 if roots is not None:
1100 1100 roots = list(roots)
1101 1101 if not roots:
1102 1102 return nonodes
1103 1103 lowestrev = min([self.rev(n) for n in roots])
1104 1104 else:
1105 1105 roots = [nullid] # Everybody's a descendant of nullid
1106 1106 lowestrev = nullrev
1107 1107 if (lowestrev == nullrev) and (heads is None):
1108 1108 # We want _all_ the nodes!
1109 1109 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1110 1110 if heads is None:
1111 1111 # All nodes are ancestors, so the latest ancestor is the last
1112 1112 # node.
1113 1113 highestrev = len(self) - 1
1114 1114 # Set ancestors to None to signal that every node is an ancestor.
1115 1115 ancestors = None
1116 1116 # Set heads to an empty dictionary for later discovery of heads
1117 1117 heads = {}
1118 1118 else:
1119 1119 heads = list(heads)
1120 1120 if not heads:
1121 1121 return nonodes
1122 1122 ancestors = set()
1123 1123 # Turn heads into a dictionary so we can remove 'fake' heads.
1124 1124 # Also, later we will be using it to filter out the heads we can't
1125 1125 # find from roots.
1126 1126 heads = dict.fromkeys(heads, False)
1127 1127 # Start at the top and keep marking parents until we're done.
1128 1128 nodestotag = set(heads)
1129 1129 # Remember where the top was so we can use it as a limit later.
1130 1130 highestrev = max([self.rev(n) for n in nodestotag])
1131 1131 while nodestotag:
1132 1132 # grab a node to tag
1133 1133 n = nodestotag.pop()
1134 1134 # Never tag nullid
1135 1135 if n == nullid:
1136 1136 continue
1137 1137 # A node's revision number represents its place in a
1138 1138 # topologically sorted list of nodes.
1139 1139 r = self.rev(n)
1140 1140 if r >= lowestrev:
1141 1141 if n not in ancestors:
1142 1142 # If we are possibly a descendant of one of the roots
1143 1143 # and we haven't already been marked as an ancestor
1144 1144 ancestors.add(n) # Mark as ancestor
1145 1145 # Add non-nullid parents to list of nodes to tag.
1146 1146 nodestotag.update([p for p in self.parents(n) if
1147 1147 p != nullid])
1148 1148 elif n in heads: # We've seen it before, is it a fake head?
1149 1149 # So it is, real heads should not be the ancestors of
1150 1150 # any other heads.
1151 1151 heads.pop(n)
1152 1152 if not ancestors:
1153 1153 return nonodes
1154 1154 # Now that we have our set of ancestors, we want to remove any
1155 1155 # roots that are not ancestors.
1156 1156
1157 1157 # If one of the roots was nullid, everything is included anyway.
1158 1158 if lowestrev > nullrev:
1159 1159 # But, since we weren't, let's recompute the lowest rev to not
1160 1160 # include roots that aren't ancestors.
1161 1161
1162 1162 # Filter out roots that aren't ancestors of heads
1163 1163 roots = [root for root in roots if root in ancestors]
1164 1164 # Recompute the lowest revision
1165 1165 if roots:
1166 1166 lowestrev = min([self.rev(root) for root in roots])
1167 1167 else:
1168 1168 # No more roots? Return empty list
1169 1169 return nonodes
1170 1170 else:
1171 1171 # We are descending from nullid, and don't need to care about
1172 1172 # any other roots.
1173 1173 lowestrev = nullrev
1174 1174 roots = [nullid]
1175 1175 # Transform our roots list into a set.
1176 1176 descendants = set(roots)
1177 1177 # Also, keep the original roots so we can filter out roots that aren't
1178 1178 # 'real' roots (i.e. are descended from other roots).
1179 1179 roots = descendants.copy()
1180 1180 # Our topologically sorted list of output nodes.
1181 1181 orderedout = []
1182 1182 # Don't start at nullid since we don't want nullid in our output list,
1183 1183 # and if nullid shows up in descendants, empty parents will look like
1184 1184 # they're descendants.
1185 1185 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1186 1186 n = self.node(r)
1187 1187 isdescendant = False
1188 1188 if lowestrev == nullrev: # Everybody is a descendant of nullid
1189 1189 isdescendant = True
1190 1190 elif n in descendants:
1191 1191 # n is already a descendant
1192 1192 isdescendant = True
1193 1193 # This check only needs to be done here because all the roots
1194 1194 # will start being marked is descendants before the loop.
1195 1195 if n in roots:
1196 1196 # If n was a root, check if it's a 'real' root.
1197 1197 p = tuple(self.parents(n))
1198 1198 # If any of its parents are descendants, it's not a root.
1199 1199 if (p[0] in descendants) or (p[1] in descendants):
1200 1200 roots.remove(n)
1201 1201 else:
1202 1202 p = tuple(self.parents(n))
1203 1203 # A node is a descendant if either of its parents are
1204 1204 # descendants. (We seeded the dependents list with the roots
1205 1205 # up there, remember?)
1206 1206 if (p[0] in descendants) or (p[1] in descendants):
1207 1207 descendants.add(n)
1208 1208 isdescendant = True
1209 1209 if isdescendant and ((ancestors is None) or (n in ancestors)):
1210 1210 # Only include nodes that are both descendants and ancestors.
1211 1211 orderedout.append(n)
1212 1212 if (ancestors is not None) and (n in heads):
1213 1213 # We're trying to figure out which heads are reachable
1214 1214 # from roots.
1215 1215 # Mark this head as having been reached
1216 1216 heads[n] = True
1217 1217 elif ancestors is None:
1218 1218 # Otherwise, we're trying to discover the heads.
1219 1219 # Assume this is a head because if it isn't, the next step
1220 1220 # will eventually remove it.
1221 1221 heads[n] = True
1222 1222 # But, obviously its parents aren't.
1223 1223 for p in self.parents(n):
1224 1224 heads.pop(p, None)
1225 1225 heads = [head for head, flag in heads.iteritems() if flag]
1226 1226 roots = list(roots)
1227 1227 assert orderedout
1228 1228 assert roots
1229 1229 assert heads
1230 1230 return (orderedout, roots, heads)
1231 1231
1232 1232 def headrevs(self):
1233 1233 try:
1234 1234 return self.index.headrevs()
1235 1235 except AttributeError:
1236 1236 return self._headrevs()
1237 1237
1238 1238 def computephases(self, roots):
1239 1239 return self.index.computephasesmapsets(roots)
1240 1240
1241 1241 def _headrevs(self):
1242 1242 count = len(self)
1243 1243 if not count:
1244 1244 return [nullrev]
1245 1245 # we won't iter over filtered rev so nobody is a head at start
1246 1246 ishead = [0] * (count + 1)
1247 1247 index = self.index
1248 1248 for r in self:
1249 1249 ishead[r] = 1 # I may be an head
1250 1250 e = index[r]
1251 1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 1252 return [r for r, val in enumerate(ishead) if val]
1253 1253
1254 1254 def heads(self, start=None, stop=None):
1255 1255 """return the list of all nodes that have no children
1256 1256
1257 1257 if start is specified, only heads that are descendants of
1258 1258 start will be returned
1259 1259 if stop is specified, it will consider all the revs from stop
1260 1260 as if they had no children
1261 1261 """
1262 1262 if start is None and stop is None:
1263 1263 if not len(self):
1264 1264 return [nullid]
1265 1265 return [self.node(r) for r in self.headrevs()]
1266 1266
1267 1267 if start is None:
1268 1268 start = nullid
1269 1269 if stop is None:
1270 1270 stop = []
1271 1271 stoprevs = set([self.rev(n) for n in stop])
1272 1272 startrev = self.rev(start)
1273 1273 reachable = {startrev}
1274 1274 heads = {startrev}
1275 1275
1276 1276 parentrevs = self.parentrevs
1277 1277 for r in self.revs(start=startrev + 1):
1278 1278 for p in parentrevs(r):
1279 1279 if p in reachable:
1280 1280 if r not in stoprevs:
1281 1281 reachable.add(r)
1282 1282 heads.add(r)
1283 1283 if p in heads and p not in stoprevs:
1284 1284 heads.remove(p)
1285 1285
1286 1286 return [self.node(r) for r in heads]
1287 1287
1288 1288 def children(self, node):
1289 1289 """find the children of a given node"""
1290 1290 c = []
1291 1291 p = self.rev(node)
1292 1292 for r in self.revs(start=p + 1):
1293 1293 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1294 1294 if prevs:
1295 1295 for pr in prevs:
1296 1296 if pr == p:
1297 1297 c.append(self.node(r))
1298 1298 elif p == nullrev:
1299 1299 c.append(self.node(r))
1300 1300 return c
1301 1301
1302 1302 def descendant(self, start, end):
1303 1303 if start == nullrev:
1304 1304 return True
1305 1305 for i in self.descendants([start]):
1306 1306 if i == end:
1307 1307 return True
1308 1308 elif i > end:
1309 1309 break
1310 1310 return False
1311 1311
1312 1312 def commonancestorsheads(self, a, b):
1313 1313 """calculate all the heads of the common ancestors of nodes a and b"""
1314 1314 a, b = self.rev(a), self.rev(b)
1315 1315 try:
1316 1316 ancs = self.index.commonancestorsheads(a, b)
1317 1317 except (AttributeError, OverflowError): # C implementation failed
1318 1318 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1319 1319 return pycompat.maplist(self.node, ancs)
1320 1320
1321 1321 def isancestor(self, a, b):
1322 1322 """return True if node a is an ancestor of node b
1323 1323
1324 1324 The implementation of this is trivial but the use of
1325 1325 commonancestorsheads is not."""
1326 1326 return a in self.commonancestorsheads(a, b)
1327 1327
1328 1328 def ancestor(self, a, b):
1329 1329 """calculate the "best" common ancestor of nodes a and b"""
1330 1330
1331 1331 a, b = self.rev(a), self.rev(b)
1332 1332 try:
1333 1333 ancs = self.index.ancestors(a, b)
1334 1334 except (AttributeError, OverflowError):
1335 1335 ancs = ancestor.ancestors(self.parentrevs, a, b)
1336 1336 if ancs:
1337 1337 # choose a consistent winner when there's a tie
1338 1338 return min(map(self.node, ancs))
1339 1339 return nullid
1340 1340
1341 1341 def _match(self, id):
1342 1342 if isinstance(id, int):
1343 1343 # rev
1344 1344 return self.node(id)
1345 1345 if len(id) == 20:
1346 1346 # possibly a binary node
1347 1347 # odds of a binary node being all hex in ASCII are 1 in 10**25
1348 1348 try:
1349 1349 node = id
1350 1350 self.rev(node) # quick search the index
1351 1351 return node
1352 1352 except LookupError:
1353 1353 pass # may be partial hex id
1354 1354 try:
1355 1355 # str(rev)
1356 1356 rev = int(id)
1357 1357 if str(rev) != id:
1358 1358 raise ValueError
1359 1359 if rev < 0:
1360 1360 rev = len(self) + rev
1361 1361 if rev < 0 or rev >= len(self):
1362 1362 raise ValueError
1363 1363 return self.node(rev)
1364 1364 except (ValueError, OverflowError):
1365 1365 pass
1366 1366 if len(id) == 40:
1367 1367 try:
1368 1368 # a full hex nodeid?
1369 1369 node = bin(id)
1370 1370 self.rev(node)
1371 1371 return node
1372 1372 except (TypeError, LookupError):
1373 1373 pass
1374 1374
1375 1375 def _partialmatch(self, id):
1376 1376 maybewdir = wdirhex.startswith(id)
1377 1377 try:
1378 1378 partial = self.index.partialmatch(id)
1379 1379 if partial and self.hasnode(partial):
1380 1380 if maybewdir:
1381 1381 # single 'ff...' match in radix tree, ambiguous with wdir
1382 1382 raise RevlogError
1383 1383 return partial
1384 1384 if maybewdir:
1385 1385 # no 'ff...' match in radix tree, wdir identified
1386 1386 raise error.WdirUnsupported
1387 1387 return None
1388 1388 except RevlogError:
1389 1389 # parsers.c radix tree lookup gave multiple matches
1390 1390 # fast path: for unfiltered changelog, radix tree is accurate
1391 1391 if not getattr(self, 'filteredrevs', None):
1392 1392 raise LookupError(id, self.indexfile,
1393 1393 _('ambiguous identifier'))
1394 1394 # fall through to slow path that filters hidden revisions
1395 1395 except (AttributeError, ValueError):
1396 1396 # we are pure python, or key was too short to search radix tree
1397 1397 pass
1398 1398
1399 1399 if id in self._pcache:
1400 1400 return self._pcache[id]
1401 1401
1402 1402 if len(id) < 40:
1403 1403 try:
1404 1404 # hex(node)[:...]
1405 1405 l = len(id) // 2 # grab an even number of digits
1406 1406 prefix = bin(id[:l * 2])
1407 1407 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1408 1408 nl = [n for n in nl if hex(n).startswith(id) and
1409 1409 self.hasnode(n)]
1410 1410 if len(nl) > 0:
1411 1411 if len(nl) == 1 and not maybewdir:
1412 1412 self._pcache[id] = nl[0]
1413 1413 return nl[0]
1414 1414 raise LookupError(id, self.indexfile,
1415 1415 _('ambiguous identifier'))
1416 1416 if maybewdir:
1417 1417 raise error.WdirUnsupported
1418 1418 return None
1419 1419 except (TypeError, binascii.Error):
1420 1420 pass
1421 1421
1422 1422 def lookup(self, id):
1423 1423 """locate a node based on:
1424 1424 - revision number or str(revision number)
1425 1425 - nodeid or subset of hex nodeid
1426 1426 """
1427 1427 n = self._match(id)
1428 1428 if n is not None:
1429 1429 return n
1430 1430 n = self._partialmatch(id)
1431 1431 if n:
1432 1432 return n
1433 1433
1434 1434 raise LookupError(id, self.indexfile, _('no match found'))
1435 1435
1436 1436 def shortest(self, hexnode, minlength=1):
1437 1437 """Find the shortest unambiguous prefix that matches hexnode."""
1438 1438 def isvalid(test):
1439 1439 try:
1440 1440 if self._partialmatch(test) is None:
1441 1441 return False
1442 1442
1443 1443 try:
1444 1444 i = int(test)
1445 1445 # if we are a pure int, then starting with zero will not be
1446 1446 # confused as a rev; or, obviously, if the int is larger
1447 1447 # than the value of the tip rev
1448 1448 if test[0] == '0' or i > len(self):
1449 1449 return True
1450 1450 return False
1451 1451 except ValueError:
1452 1452 return True
1453 1453 except error.RevlogError:
1454 1454 return False
1455 1455 except error.WdirUnsupported:
1456 1456 # single 'ff...' match
1457 1457 return True
1458 1458
1459 1459 shortest = hexnode
1460 1460 startlength = max(6, minlength)
1461 1461 length = startlength
1462 1462 while True:
1463 1463 test = hexnode[:length]
1464 1464 if isvalid(test):
1465 1465 shortest = test
1466 1466 if length == minlength or length > startlength:
1467 1467 return shortest
1468 1468 length -= 1
1469 1469 else:
1470 1470 length += 1
1471 1471 if len(shortest) <= length:
1472 1472 return shortest
1473 1473
1474 1474 def cmp(self, node, text):
1475 1475 """compare text with a given file revision
1476 1476
1477 1477 returns True if text is different than what is stored.
1478 1478 """
1479 1479 p1, p2 = self.parents(node)
1480 1480 return hash(text, p1, p2) != node
1481 1481
1482 1482 def _cachesegment(self, offset, data):
1483 1483 """Add a segment to the revlog cache.
1484 1484
1485 1485 Accepts an absolute offset and the data that is at that location.
1486 1486 """
1487 1487 o, d = self._chunkcache
1488 1488 # try to add to existing cache
1489 1489 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1490 1490 self._chunkcache = o, d + data
1491 1491 else:
1492 1492 self._chunkcache = offset, data
1493 1493
1494 1494 def _readsegment(self, offset, length, df=None):
1495 1495 """Load a segment of raw data from the revlog.
1496 1496
1497 1497 Accepts an absolute offset, length to read, and an optional existing
1498 1498 file handle to read from.
1499 1499
1500 1500 If an existing file handle is passed, it will be seeked and the
1501 1501 original seek position will NOT be restored.
1502 1502
1503 1503 Returns a str or buffer of raw byte data.
1504 1504 """
1505 1505 if df is not None:
1506 1506 closehandle = False
1507 1507 else:
1508 1508 if self._inline:
1509 1509 df = self._indexfp()
1510 1510 else:
1511 1511 df = self._datafp()
1512 1512 closehandle = True
1513 1513
1514 1514 # Cache data both forward and backward around the requested
1515 1515 # data, in a fixed size window. This helps speed up operations
1516 1516 # involving reading the revlog backwards.
1517 1517 cachesize = self._chunkcachesize
1518 1518 realoffset = offset & ~(cachesize - 1)
1519 1519 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1520 1520 - realoffset)
1521 1521 df.seek(realoffset)
1522 1522 d = df.read(reallength)
1523 1523 if closehandle:
1524 1524 df.close()
1525 1525 self._cachesegment(realoffset, d)
1526 1526 if offset != realoffset or reallength != length:
1527 1527 return util.buffer(d, offset - realoffset, length)
1528 1528 return d
1529 1529
1530 1530 def _getsegment(self, offset, length, df=None):
1531 1531 """Obtain a segment of raw data from the revlog.
1532 1532
1533 1533 Accepts an absolute offset, length of bytes to obtain, and an
1534 1534 optional file handle to the already-opened revlog. If the file
1535 1535 handle is used, it's original seek position will not be preserved.
1536 1536
1537 1537 Requests for data may be returned from a cache.
1538 1538
1539 1539 Returns a str or a buffer instance of raw byte data.
1540 1540 """
1541 1541 o, d = self._chunkcache
1542 1542 l = len(d)
1543 1543
1544 1544 # is it in the cache?
1545 1545 cachestart = offset - o
1546 1546 cacheend = cachestart + length
1547 1547 if cachestart >= 0 and cacheend <= l:
1548 1548 if cachestart == 0 and cacheend == l:
1549 1549 return d # avoid a copy
1550 1550 return util.buffer(d, cachestart, cacheend - cachestart)
1551 1551
1552 1552 return self._readsegment(offset, length, df=df)
1553 1553
1554 1554 def _getsegmentforrevs(self, startrev, endrev, df=None):
1555 1555 """Obtain a segment of raw data corresponding to a range of revisions.
1556 1556
1557 1557 Accepts the start and end revisions and an optional already-open
1558 1558 file handle to be used for reading. If the file handle is read, its
1559 1559 seek position will not be preserved.
1560 1560
1561 1561 Requests for data may be satisfied by a cache.
1562 1562
1563 1563 Returns a 2-tuple of (offset, data) for the requested range of
1564 1564 revisions. Offset is the integer offset from the beginning of the
1565 1565 revlog and data is a str or buffer of the raw byte data.
1566 1566
1567 1567 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1568 1568 to determine where each revision's data begins and ends.
1569 1569 """
1570 1570 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1571 1571 # (functions are expensive).
1572 1572 index = self.index
1573 1573 istart = index[startrev]
1574 1574 start = int(istart[0] >> 16)
1575 1575 if startrev == endrev:
1576 1576 end = start + istart[1]
1577 1577 else:
1578 1578 iend = index[endrev]
1579 1579 end = int(iend[0] >> 16) + iend[1]
1580 1580
1581 1581 if self._inline:
1582 1582 start += (startrev + 1) * self._io.size
1583 1583 end += (endrev + 1) * self._io.size
1584 1584 length = end - start
1585 1585
1586 1586 return start, self._getsegment(start, length, df=df)
1587 1587
1588 1588 def _chunk(self, rev, df=None):
1589 1589 """Obtain a single decompressed chunk for a revision.
1590 1590
1591 1591 Accepts an integer revision and an optional already-open file handle
1592 1592 to be used for reading. If used, the seek position of the file will not
1593 1593 be preserved.
1594 1594
1595 1595 Returns a str holding uncompressed data for the requested revision.
1596 1596 """
1597 1597 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1598 1598
1599 1599 def _chunks(self, revs, df=None):
1600 1600 """Obtain decompressed chunks for the specified revisions.
1601 1601
1602 1602 Accepts an iterable of numeric revisions that are assumed to be in
1603 1603 ascending order. Also accepts an optional already-open file handle
1604 1604 to be used for reading. If used, the seek position of the file will
1605 1605 not be preserved.
1606 1606
1607 1607 This function is similar to calling ``self._chunk()`` multiple times,
1608 1608 but is faster.
1609 1609
1610 1610 Returns a list with decompressed data for each requested revision.
1611 1611 """
1612 1612 if not revs:
1613 1613 return []
1614 1614 start = self.start
1615 1615 length = self.length
1616 1616 inline = self._inline
1617 1617 iosize = self._io.size
1618 1618 buffer = util.buffer
1619 1619
1620 1620 l = []
1621 1621 ladd = l.append
1622 1622
1623 1623 if not self._withsparseread:
1624 1624 slicedchunks = (revs,)
1625 1625 else:
1626 1626 slicedchunks = _slicechunk(self, revs)
1627 1627
1628 1628 for revschunk in slicedchunks:
1629 1629 firstrev = revschunk[0]
1630 1630 # Skip trailing revisions with empty diff
1631 1631 for lastrev in revschunk[::-1]:
1632 1632 if length(lastrev) != 0:
1633 1633 break
1634 1634
1635 1635 try:
1636 1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 1637 except OverflowError:
1638 1638 # issue4215 - we can't cache a run of chunks greater than
1639 1639 # 2G on Windows
1640 1640 return [self._chunk(rev, df=df) for rev in revschunk]
1641 1641
1642 1642 decomp = self.decompress
1643 1643 for rev in revschunk:
1644 1644 chunkstart = start(rev)
1645 1645 if inline:
1646 1646 chunkstart += (rev + 1) * iosize
1647 1647 chunklength = length(rev)
1648 1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649 1649
1650 1650 return l
1651 1651
1652 1652 def _chunkclear(self):
1653 1653 """Clear the raw chunk cache."""
1654 1654 self._chunkcache = (0, '')
1655 1655
1656 1656 def deltaparent(self, rev):
1657 1657 """return deltaparent of the given revision"""
1658 1658 base = self.index[rev][3]
1659 1659 if base == rev:
1660 1660 return nullrev
1661 1661 elif self._generaldelta:
1662 1662 return base
1663 1663 else:
1664 1664 return rev - 1
1665 1665
1666 1666 def revdiff(self, rev1, rev2):
1667 1667 """return or calculate a delta between two revisions
1668 1668
1669 1669 The delta calculated is in binary form and is intended to be written to
1670 1670 revlog data directly. So this function needs raw revision data.
1671 1671 """
1672 1672 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1673 1673 return bytes(self._chunk(rev2))
1674 1674
1675 1675 return mdiff.textdiff(self.revision(rev1, raw=True),
1676 1676 self.revision(rev2, raw=True))
1677 1677
1678 1678 def revision(self, nodeorrev, _df=None, raw=False):
1679 1679 """return an uncompressed revision of a given node or revision
1680 1680 number.
1681 1681
1682 1682 _df - an existing file handle to read from. (internal-only)
1683 1683 raw - an optional argument specifying if the revision data is to be
1684 1684 treated as raw data when applying flag transforms. 'raw' should be set
1685 1685 to True when generating changegroups or in debug commands.
1686 1686 """
1687 1687 if isinstance(nodeorrev, int):
1688 1688 rev = nodeorrev
1689 1689 node = self.node(rev)
1690 1690 else:
1691 1691 node = nodeorrev
1692 1692 rev = None
1693 1693
1694 1694 cachedrev = None
1695 1695 flags = None
1696 1696 rawtext = None
1697 1697 if node == nullid:
1698 1698 return ""
1699 1699 if self._cache:
1700 1700 if self._cache[0] == node:
1701 1701 # _cache only stores rawtext
1702 1702 if raw:
1703 1703 return self._cache[2]
1704 1704 # duplicated, but good for perf
1705 1705 if rev is None:
1706 1706 rev = self.rev(node)
1707 1707 if flags is None:
1708 1708 flags = self.flags(rev)
1709 1709 # no extra flags set, no flag processor runs, text = rawtext
1710 1710 if flags == REVIDX_DEFAULT_FLAGS:
1711 1711 return self._cache[2]
1712 1712 # rawtext is reusable. need to run flag processor
1713 1713 rawtext = self._cache[2]
1714 1714
1715 1715 cachedrev = self._cache[1]
1716 1716
1717 1717 # look up what we need to read
1718 1718 if rawtext is None:
1719 1719 if rev is None:
1720 1720 rev = self.rev(node)
1721 1721
1722 1722 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1723 1723 if stopped:
1724 1724 rawtext = self._cache[2]
1725 1725
1726 1726 # drop cache to save memory
1727 1727 self._cache = None
1728 1728
1729 1729 bins = self._chunks(chain, df=_df)
1730 1730 if rawtext is None:
1731 1731 rawtext = bytes(bins[0])
1732 1732 bins = bins[1:]
1733 1733
1734 1734 rawtext = mdiff.patches(rawtext, bins)
1735 1735 self._cache = (node, rev, rawtext)
1736 1736
1737 1737 if flags is None:
1738 1738 if rev is None:
1739 1739 rev = self.rev(node)
1740 1740 flags = self.flags(rev)
1741 1741
1742 1742 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1743 1743 if validatehash:
1744 1744 self.checkhash(text, node, rev=rev)
1745 1745
1746 1746 return text
1747 1747
1748 1748 def hash(self, text, p1, p2):
1749 1749 """Compute a node hash.
1750 1750
1751 1751 Available as a function so that subclasses can replace the hash
1752 1752 as needed.
1753 1753 """
1754 1754 return hash(text, p1, p2)
1755 1755
1756 1756 def _processflags(self, text, flags, operation, raw=False):
1757 1757 """Inspect revision data flags and applies transforms defined by
1758 1758 registered flag processors.
1759 1759
1760 1760 ``text`` - the revision data to process
1761 1761 ``flags`` - the revision flags
1762 1762 ``operation`` - the operation being performed (read or write)
1763 1763 ``raw`` - an optional argument describing if the raw transform should be
1764 1764 applied.
1765 1765
1766 1766 This method processes the flags in the order (or reverse order if
1767 1767 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1768 1768 flag processors registered for present flags. The order of flags defined
1769 1769 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1770 1770
1771 1771 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1772 1772 processed text and ``validatehash`` is a bool indicating whether the
1773 1773 returned text should be checked for hash integrity.
1774 1774
1775 1775 Note: If the ``raw`` argument is set, it has precedence over the
1776 1776 operation and will only update the value of ``validatehash``.
1777 1777 """
1778 1778 # fast path: no flag processors will run
1779 1779 if flags == 0:
1780 1780 return text, True
1781 1781 if not operation in ('read', 'write'):
1782 1782 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1783 1783 # Check all flags are known.
1784 1784 if flags & ~REVIDX_KNOWN_FLAGS:
1785 1785 raise RevlogError(_("incompatible revision flag '%#x'") %
1786 1786 (flags & ~REVIDX_KNOWN_FLAGS))
1787 1787 validatehash = True
1788 1788 # Depending on the operation (read or write), the order might be
1789 1789 # reversed due to non-commutative transforms.
1790 1790 orderedflags = REVIDX_FLAGS_ORDER
1791 1791 if operation == 'write':
1792 1792 orderedflags = reversed(orderedflags)
1793 1793
1794 1794 for flag in orderedflags:
1795 1795 # If a flagprocessor has been registered for a known flag, apply the
1796 1796 # related operation transform and update result tuple.
1797 1797 if flag & flags:
1798 1798 vhash = True
1799 1799
1800 1800 if flag not in _flagprocessors:
1801 1801 message = _("missing processor for flag '%#x'") % (flag)
1802 1802 raise RevlogError(message)
1803 1803
1804 1804 processor = _flagprocessors[flag]
1805 1805 if processor is not None:
1806 1806 readtransform, writetransform, rawtransform = processor
1807 1807
1808 1808 if raw:
1809 1809 vhash = rawtransform(self, text)
1810 1810 elif operation == 'read':
1811 1811 text, vhash = readtransform(self, text)
1812 1812 else: # write operation
1813 1813 text, vhash = writetransform(self, text)
1814 1814 validatehash = validatehash and vhash
1815 1815
1816 1816 return text, validatehash
1817 1817
1818 1818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1819 1819 """Check node hash integrity.
1820 1820
1821 1821 Available as a function so that subclasses can extend hash mismatch
1822 1822 behaviors as needed.
1823 1823 """
1824 1824 if p1 is None and p2 is None:
1825 1825 p1, p2 = self.parents(node)
1826 1826 if node != self.hash(text, p1, p2):
1827 1827 revornode = rev
1828 1828 if revornode is None:
1829 1829 revornode = templatefilters.short(hex(node))
1830 1830 raise RevlogError(_("integrity check failed on %s:%s")
1831 1831 % (self.indexfile, pycompat.bytestr(revornode)))
1832 1832
1833 1833 def checkinlinesize(self, tr, fp=None):
1834 1834 """Check if the revlog is too big for inline and convert if so.
1835 1835
1836 1836 This should be called after revisions are added to the revlog. If the
1837 1837 revlog has grown too large to be an inline revlog, it will convert it
1838 1838 to use multiple index and data files.
1839 1839 """
1840 1840 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1841 1841 return
1842 1842
1843 1843 trinfo = tr.find(self.indexfile)
1844 1844 if trinfo is None:
1845 1845 raise RevlogError(_("%s not found in the transaction")
1846 1846 % self.indexfile)
1847 1847
1848 1848 trindex = trinfo[2]
1849 1849 if trindex is not None:
1850 1850 dataoff = self.start(trindex)
1851 1851 else:
1852 1852 # revlog was stripped at start of transaction, use all leftover data
1853 1853 trindex = len(self) - 1
1854 1854 dataoff = self.end(-2)
1855 1855
1856 1856 tr.add(self.datafile, dataoff)
1857 1857
1858 1858 if fp:
1859 1859 fp.flush()
1860 1860 fp.close()
1861 1861
1862 1862 with self._datafp('w') as df:
1863 1863 for r in self:
1864 1864 df.write(self._getsegmentforrevs(r, r)[1])
1865 1865
1866 fp = self._indexfp('w')
1866 with self._indexfp('w') as fp:
1867 1867 self.version &= ~FLAG_INLINE_DATA
1868 1868 self._inline = False
1869 io = self._io
1869 1870 for i in self:
1870 e = self._io.packentry(self.index[i], self.node, self.version, i)
1871 e = io.packentry(self.index[i], self.node, self.version, i)
1871 1872 fp.write(e)
1872 1873
1873 # if we don't call close, the temp file will never replace the
1874 # real index
1875 fp.close()
1874 # the temp file replace the real index when we exit the context
1875 # manager
1876 1876
1877 1877 tr.replace(self.indexfile, trindex * self._io.size)
1878 1878 self._chunkclear()
1879 1879
1880 1880 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1881 1881 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1882 1882 """add a revision to the log
1883 1883
1884 1884 text - the revision data to add
1885 1885 transaction - the transaction object used for rollback
1886 1886 link - the linkrev data to add
1887 1887 p1, p2 - the parent nodeids of the revision
1888 1888 cachedelta - an optional precomputed delta
1889 1889 node - nodeid of revision; typically node is not specified, and it is
1890 1890 computed by default as hash(text, p1, p2), however subclasses might
1891 1891 use different hashing method (and override checkhash() in such case)
1892 1892 flags - the known flags to set on the revision
1893 1893 deltacomputer - an optional _deltacomputer instance shared between
1894 1894 multiple calls
1895 1895 """
1896 1896 if link == nullrev:
1897 1897 raise RevlogError(_("attempted to add linkrev -1 to %s")
1898 1898 % self.indexfile)
1899 1899
1900 1900 if flags:
1901 1901 node = node or self.hash(text, p1, p2)
1902 1902
1903 1903 rawtext, validatehash = self._processflags(text, flags, 'write')
1904 1904
1905 1905 # If the flag processor modifies the revision data, ignore any provided
1906 1906 # cachedelta.
1907 1907 if rawtext != text:
1908 1908 cachedelta = None
1909 1909
1910 1910 if len(rawtext) > _maxentrysize:
1911 1911 raise RevlogError(
1912 1912 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1913 1913 % (self.indexfile, len(rawtext)))
1914 1914
1915 1915 node = node or self.hash(rawtext, p1, p2)
1916 1916 if node in self.nodemap:
1917 1917 return node
1918 1918
1919 1919 if validatehash:
1920 1920 self.checkhash(rawtext, node, p1=p1, p2=p2)
1921 1921
1922 1922 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1923 1923 flags, cachedelta=cachedelta,
1924 1924 deltacomputer=deltacomputer)
1925 1925
1926 1926 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1927 1927 cachedelta=None, deltacomputer=None):
1928 1928 """add a raw revision with known flags, node and parents
1929 1929 useful when reusing a revision not stored in this revlog (ex: received
1930 1930 over wire, or read from an external bundle).
1931 1931 """
1932 1932 dfh = None
1933 1933 if not self._inline:
1934 1934 dfh = self._datafp("a+")
1935 1935 ifh = self._indexfp("a+")
1936 1936 try:
1937 1937 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1938 1938 flags, cachedelta, ifh, dfh,
1939 1939 deltacomputer=deltacomputer)
1940 1940 finally:
1941 1941 if dfh:
1942 1942 dfh.close()
1943 1943 ifh.close()
1944 1944
1945 1945 def compress(self, data):
1946 1946 """Generate a possibly-compressed representation of data."""
1947 1947 if not data:
1948 1948 return '', data
1949 1949
1950 1950 compressed = self._compressor.compress(data)
1951 1951
1952 1952 if compressed:
1953 1953 # The revlog compressor added the header in the returned data.
1954 1954 return '', compressed
1955 1955
1956 1956 if data[0:1] == '\0':
1957 1957 return '', data
1958 1958 return 'u', data
1959 1959
1960 1960 def decompress(self, data):
1961 1961 """Decompress a revlog chunk.
1962 1962
1963 1963 The chunk is expected to begin with a header identifying the
1964 1964 format type so it can be routed to an appropriate decompressor.
1965 1965 """
1966 1966 if not data:
1967 1967 return data
1968 1968
1969 1969 # Revlogs are read much more frequently than they are written and many
1970 1970 # chunks only take microseconds to decompress, so performance is
1971 1971 # important here.
1972 1972 #
1973 1973 # We can make a few assumptions about revlogs:
1974 1974 #
1975 1975 # 1) the majority of chunks will be compressed (as opposed to inline
1976 1976 # raw data).
1977 1977 # 2) decompressing *any* data will likely by at least 10x slower than
1978 1978 # returning raw inline data.
1979 1979 # 3) we want to prioritize common and officially supported compression
1980 1980 # engines
1981 1981 #
1982 1982 # It follows that we want to optimize for "decompress compressed data
1983 1983 # when encoded with common and officially supported compression engines"
1984 1984 # case over "raw data" and "data encoded by less common or non-official
1985 1985 # compression engines." That is why we have the inline lookup first
1986 1986 # followed by the compengines lookup.
1987 1987 #
1988 1988 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1989 1989 # compressed chunks. And this matters for changelog and manifest reads.
1990 1990 t = data[0:1]
1991 1991
1992 1992 if t == 'x':
1993 1993 try:
1994 1994 return _zlibdecompress(data)
1995 1995 except zlib.error as e:
1996 1996 raise RevlogError(_('revlog decompress error: %s') % str(e))
1997 1997 # '\0' is more common than 'u' so it goes first.
1998 1998 elif t == '\0':
1999 1999 return data
2000 2000 elif t == 'u':
2001 2001 return util.buffer(data, 1)
2002 2002
2003 2003 try:
2004 2004 compressor = self._decompressors[t]
2005 2005 except KeyError:
2006 2006 try:
2007 2007 engine = util.compengines.forrevlogheader(t)
2008 2008 compressor = engine.revlogcompressor()
2009 2009 self._decompressors[t] = compressor
2010 2010 except KeyError:
2011 2011 raise RevlogError(_('unknown compression type %r') % t)
2012 2012
2013 2013 return compressor.decompress(data)
2014 2014
2015 2015 def _isgooddeltainfo(self, d, textlen):
2016 2016 """Returns True if the given delta is good. Good means that it is within
2017 2017 the disk span, disk size, and chain length bounds that we know to be
2018 2018 performant."""
2019 2019 if d is None:
2020 2020 return False
2021 2021
2022 2022 # - 'd.distance' is the distance from the base revision -- bounding it
2023 2023 # limits the amount of I/O we need to do.
2024 2024 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2025 2025 # need to apply -- bounding it limits the amount of CPU we consume.
2026 2026
2027 2027 defaultmax = textlen * 4
2028 2028 maxdist = self._maxdeltachainspan
2029 2029 if not maxdist:
2030 2030 maxdist = d.distance # ensure the conditional pass
2031 2031 maxdist = max(maxdist, defaultmax)
2032 2032 if (d.distance > maxdist or d.deltalen > textlen or
2033 2033 d.compresseddeltalen > textlen * 2 or
2034 2034 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2035 2035 return False
2036 2036
2037 2037 return True
2038 2038
2039 2039 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2040 2040 cachedelta, ifh, dfh, alwayscache=False,
2041 2041 deltacomputer=None):
2042 2042 """internal function to add revisions to the log
2043 2043
2044 2044 see addrevision for argument descriptions.
2045 2045
2046 2046 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2047 2047
2048 2048 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2049 2049 be used.
2050 2050
2051 2051 invariants:
2052 2052 - rawtext is optional (can be None); if not set, cachedelta must be set.
2053 2053 if both are set, they must correspond to each other.
2054 2054 """
2055 2055 if node == nullid:
2056 2056 raise RevlogError(_("%s: attempt to add null revision") %
2057 2057 (self.indexfile))
2058 2058 if node == wdirid:
2059 2059 raise RevlogError(_("%s: attempt to add wdir revision") %
2060 2060 (self.indexfile))
2061 2061
2062 2062 if self._inline:
2063 2063 fh = ifh
2064 2064 else:
2065 2065 fh = dfh
2066 2066
2067 2067 btext = [rawtext]
2068 2068
2069 2069 curr = len(self)
2070 2070 prev = curr - 1
2071 2071 offset = self.end(prev)
2072 2072 p1r, p2r = self.rev(p1), self.rev(p2)
2073 2073
2074 2074 # full versions are inserted when the needed deltas
2075 2075 # become comparable to the uncompressed text
2076 2076 if rawtext is None:
2077 2077 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2078 2078 cachedelta[1])
2079 2079 else:
2080 2080 textlen = len(rawtext)
2081 2081
2082 2082 if deltacomputer is None:
2083 2083 deltacomputer = _deltacomputer(self)
2084 2084
2085 2085 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2086 2086 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2087 2087
2088 2088 if deltainfo is not None:
2089 2089 base = deltainfo.base
2090 2090 chainbase = deltainfo.chainbase
2091 2091 data = deltainfo.data
2092 2092 l = deltainfo.deltalen
2093 2093 else:
2094 2094 rawtext = deltacomputer.buildtext(revinfo, fh)
2095 2095 data = self.compress(rawtext)
2096 2096 l = len(data[1]) + len(data[0])
2097 2097 base = chainbase = curr
2098 2098
2099 2099 e = (offset_type(offset, flags), l, textlen,
2100 2100 base, link, p1r, p2r, node)
2101 2101 self.index.insert(-1, e)
2102 2102 self.nodemap[node] = curr
2103 2103
2104 2104 entry = self._io.packentry(e, self.node, self.version, curr)
2105 2105 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2106 2106
2107 2107 if alwayscache and rawtext is None:
2108 2108 rawtext = deltacomputer._buildtext(revinfo, fh)
2109 2109
2110 2110 if type(rawtext) == bytes: # only accept immutable objects
2111 2111 self._cache = (node, curr, rawtext)
2112 2112 self._chainbasecache[curr] = chainbase
2113 2113 return node
2114 2114
2115 2115 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2116 2116 # Files opened in a+ mode have inconsistent behavior on various
2117 2117 # platforms. Windows requires that a file positioning call be made
2118 2118 # when the file handle transitions between reads and writes. See
2119 2119 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2120 2120 # platforms, Python or the platform itself can be buggy. Some versions
2121 2121 # of Solaris have been observed to not append at the end of the file
2122 2122 # if the file was seeked to before the end. See issue4943 for more.
2123 2123 #
2124 2124 # We work around this issue by inserting a seek() before writing.
2125 2125 # Note: This is likely not necessary on Python 3.
2126 2126 ifh.seek(0, os.SEEK_END)
2127 2127 if dfh:
2128 2128 dfh.seek(0, os.SEEK_END)
2129 2129
2130 2130 curr = len(self) - 1
2131 2131 if not self._inline:
2132 2132 transaction.add(self.datafile, offset)
2133 2133 transaction.add(self.indexfile, curr * len(entry))
2134 2134 if data[0]:
2135 2135 dfh.write(data[0])
2136 2136 dfh.write(data[1])
2137 2137 ifh.write(entry)
2138 2138 else:
2139 2139 offset += curr * self._io.size
2140 2140 transaction.add(self.indexfile, offset, curr)
2141 2141 ifh.write(entry)
2142 2142 ifh.write(data[0])
2143 2143 ifh.write(data[1])
2144 2144 self.checkinlinesize(transaction, ifh)
2145 2145
2146 2146 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2147 2147 """
2148 2148 add a delta group
2149 2149
2150 2150 given a set of deltas, add them to the revision log. the
2151 2151 first delta is against its parent, which should be in our
2152 2152 log, the rest are against the previous delta.
2153 2153
2154 2154 If ``addrevisioncb`` is defined, it will be called with arguments of
2155 2155 this revlog and the node that was added.
2156 2156 """
2157 2157
2158 2158 nodes = []
2159 2159
2160 2160 r = len(self)
2161 2161 end = 0
2162 2162 if r:
2163 2163 end = self.end(r - 1)
2164 2164 ifh = self._indexfp("a+")
2165 2165 isize = r * self._io.size
2166 2166 if self._inline:
2167 2167 transaction.add(self.indexfile, end + isize, r)
2168 2168 dfh = None
2169 2169 else:
2170 2170 transaction.add(self.indexfile, isize, r)
2171 2171 transaction.add(self.datafile, end)
2172 2172 dfh = self._datafp("a+")
2173 2173 def flush():
2174 2174 if dfh:
2175 2175 dfh.flush()
2176 2176 ifh.flush()
2177 2177 try:
2178 2178 deltacomputer = _deltacomputer(self)
2179 2179 # loop through our set of deltas
2180 2180 for data in deltas:
2181 2181 node, p1, p2, linknode, deltabase, delta, flags = data
2182 2182 link = linkmapper(linknode)
2183 2183 flags = flags or REVIDX_DEFAULT_FLAGS
2184 2184
2185 2185 nodes.append(node)
2186 2186
2187 2187 if node in self.nodemap:
2188 2188 # this can happen if two branches make the same change
2189 2189 continue
2190 2190
2191 2191 for p in (p1, p2):
2192 2192 if p not in self.nodemap:
2193 2193 raise LookupError(p, self.indexfile,
2194 2194 _('unknown parent'))
2195 2195
2196 2196 if deltabase not in self.nodemap:
2197 2197 raise LookupError(deltabase, self.indexfile,
2198 2198 _('unknown delta base'))
2199 2199
2200 2200 baserev = self.rev(deltabase)
2201 2201
2202 2202 if baserev != nullrev and self.iscensored(baserev):
2203 2203 # if base is censored, delta must be full replacement in a
2204 2204 # single patch operation
2205 2205 hlen = struct.calcsize(">lll")
2206 2206 oldlen = self.rawsize(baserev)
2207 2207 newlen = len(delta) - hlen
2208 2208 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2209 2209 raise error.CensoredBaseError(self.indexfile,
2210 2210 self.node(baserev))
2211 2211
2212 2212 if not flags and self._peek_iscensored(baserev, delta, flush):
2213 2213 flags |= REVIDX_ISCENSORED
2214 2214
2215 2215 # We assume consumers of addrevisioncb will want to retrieve
2216 2216 # the added revision, which will require a call to
2217 2217 # revision(). revision() will fast path if there is a cache
2218 2218 # hit. So, we tell _addrevision() to always cache in this case.
2219 2219 # We're only using addgroup() in the context of changegroup
2220 2220 # generation so the revision data can always be handled as raw
2221 2221 # by the flagprocessor.
2222 2222 self._addrevision(node, None, transaction, link,
2223 2223 p1, p2, flags, (baserev, delta),
2224 2224 ifh, dfh,
2225 2225 alwayscache=bool(addrevisioncb),
2226 2226 deltacomputer=deltacomputer)
2227 2227
2228 2228 if addrevisioncb:
2229 2229 addrevisioncb(self, node)
2230 2230
2231 2231 if not dfh and not self._inline:
2232 2232 # addrevision switched from inline to conventional
2233 2233 # reopen the index
2234 2234 ifh.close()
2235 2235 dfh = self._datafp("a+")
2236 2236 ifh = self._indexfp("a+")
2237 2237 finally:
2238 2238 if dfh:
2239 2239 dfh.close()
2240 2240 ifh.close()
2241 2241
2242 2242 return nodes
2243 2243
2244 2244 def iscensored(self, rev):
2245 2245 """Check if a file revision is censored."""
2246 2246 return False
2247 2247
2248 2248 def _peek_iscensored(self, baserev, delta, flush):
2249 2249 """Quickly check if a delta produces a censored revision."""
2250 2250 return False
2251 2251
2252 2252 def getstrippoint(self, minlink):
2253 2253 """find the minimum rev that must be stripped to strip the linkrev
2254 2254
2255 2255 Returns a tuple containing the minimum rev and a set of all revs that
2256 2256 have linkrevs that will be broken by this strip.
2257 2257 """
2258 2258 brokenrevs = set()
2259 2259 strippoint = len(self)
2260 2260
2261 2261 heads = {}
2262 2262 futurelargelinkrevs = set()
2263 2263 for head in self.headrevs():
2264 2264 headlinkrev = self.linkrev(head)
2265 2265 heads[head] = headlinkrev
2266 2266 if headlinkrev >= minlink:
2267 2267 futurelargelinkrevs.add(headlinkrev)
2268 2268
2269 2269 # This algorithm involves walking down the rev graph, starting at the
2270 2270 # heads. Since the revs are topologically sorted according to linkrev,
2271 2271 # once all head linkrevs are below the minlink, we know there are
2272 2272 # no more revs that could have a linkrev greater than minlink.
2273 2273 # So we can stop walking.
2274 2274 while futurelargelinkrevs:
2275 2275 strippoint -= 1
2276 2276 linkrev = heads.pop(strippoint)
2277 2277
2278 2278 if linkrev < minlink:
2279 2279 brokenrevs.add(strippoint)
2280 2280 else:
2281 2281 futurelargelinkrevs.remove(linkrev)
2282 2282
2283 2283 for p in self.parentrevs(strippoint):
2284 2284 if p != nullrev:
2285 2285 plinkrev = self.linkrev(p)
2286 2286 heads[p] = plinkrev
2287 2287 if plinkrev >= minlink:
2288 2288 futurelargelinkrevs.add(plinkrev)
2289 2289
2290 2290 return strippoint, brokenrevs
2291 2291
2292 2292 def strip(self, minlink, transaction):
2293 2293 """truncate the revlog on the first revision with a linkrev >= minlink
2294 2294
2295 2295 This function is called when we're stripping revision minlink and
2296 2296 its descendants from the repository.
2297 2297
2298 2298 We have to remove all revisions with linkrev >= minlink, because
2299 2299 the equivalent changelog revisions will be renumbered after the
2300 2300 strip.
2301 2301
2302 2302 So we truncate the revlog on the first of these revisions, and
2303 2303 trust that the caller has saved the revisions that shouldn't be
2304 2304 removed and that it'll re-add them after this truncation.
2305 2305 """
2306 2306 if len(self) == 0:
2307 2307 return
2308 2308
2309 2309 rev, _ = self.getstrippoint(minlink)
2310 2310 if rev == len(self):
2311 2311 return
2312 2312
2313 2313 # first truncate the files on disk
2314 2314 end = self.start(rev)
2315 2315 if not self._inline:
2316 2316 transaction.add(self.datafile, end)
2317 2317 end = rev * self._io.size
2318 2318 else:
2319 2319 end += rev * self._io.size
2320 2320
2321 2321 transaction.add(self.indexfile, end)
2322 2322
2323 2323 # then reset internal state in memory to forget those revisions
2324 2324 self._cache = None
2325 2325 self._chaininfocache = {}
2326 2326 self._chunkclear()
2327 2327 for x in xrange(rev, len(self)):
2328 2328 del self.nodemap[self.node(x)]
2329 2329
2330 2330 del self.index[rev:-1]
2331 2331
2332 2332 def checksize(self):
2333 2333 expected = 0
2334 2334 if len(self):
2335 2335 expected = max(0, self.end(len(self) - 1))
2336 2336
2337 2337 try:
2338 2338 f = self._datafp()
2339 2339 f.seek(0, 2)
2340 2340 actual = f.tell()
2341 2341 f.close()
2342 2342 dd = actual - expected
2343 2343 except IOError as inst:
2344 2344 if inst.errno != errno.ENOENT:
2345 2345 raise
2346 2346 dd = 0
2347 2347
2348 2348 try:
2349 2349 f = self.opener(self.indexfile)
2350 2350 f.seek(0, 2)
2351 2351 actual = f.tell()
2352 2352 f.close()
2353 2353 s = self._io.size
2354 2354 i = max(0, actual // s)
2355 2355 di = actual - (i * s)
2356 2356 if self._inline:
2357 2357 databytes = 0
2358 2358 for r in self:
2359 2359 databytes += max(0, self.length(r))
2360 2360 dd = 0
2361 2361 di = actual - len(self) * s - databytes
2362 2362 except IOError as inst:
2363 2363 if inst.errno != errno.ENOENT:
2364 2364 raise
2365 2365 di = 0
2366 2366
2367 2367 return (dd, di)
2368 2368
2369 2369 def files(self):
2370 2370 res = [self.indexfile]
2371 2371 if not self._inline:
2372 2372 res.append(self.datafile)
2373 2373 return res
2374 2374
2375 2375 DELTAREUSEALWAYS = 'always'
2376 2376 DELTAREUSESAMEREVS = 'samerevs'
2377 2377 DELTAREUSENEVER = 'never'
2378 2378
2379 2379 DELTAREUSEFULLADD = 'fulladd'
2380 2380
2381 2381 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2382 2382
2383 2383 def clone(self, tr, destrevlog, addrevisioncb=None,
2384 2384 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2385 2385 """Copy this revlog to another, possibly with format changes.
2386 2386
2387 2387 The destination revlog will contain the same revisions and nodes.
2388 2388 However, it may not be bit-for-bit identical due to e.g. delta encoding
2389 2389 differences.
2390 2390
2391 2391 The ``deltareuse`` argument control how deltas from the existing revlog
2392 2392 are preserved in the destination revlog. The argument can have the
2393 2393 following values:
2394 2394
2395 2395 DELTAREUSEALWAYS
2396 2396 Deltas will always be reused (if possible), even if the destination
2397 2397 revlog would not select the same revisions for the delta. This is the
2398 2398 fastest mode of operation.
2399 2399 DELTAREUSESAMEREVS
2400 2400 Deltas will be reused if the destination revlog would pick the same
2401 2401 revisions for the delta. This mode strikes a balance between speed
2402 2402 and optimization.
2403 2403 DELTAREUSENEVER
2404 2404 Deltas will never be reused. This is the slowest mode of execution.
2405 2405 This mode can be used to recompute deltas (e.g. if the diff/delta
2406 2406 algorithm changes).
2407 2407
2408 2408 Delta computation can be slow, so the choice of delta reuse policy can
2409 2409 significantly affect run time.
2410 2410
2411 2411 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2412 2412 two extremes. Deltas will be reused if they are appropriate. But if the
2413 2413 delta could choose a better revision, it will do so. This means if you
2414 2414 are converting a non-generaldelta revlog to a generaldelta revlog,
2415 2415 deltas will be recomputed if the delta's parent isn't a parent of the
2416 2416 revision.
2417 2417
2418 2418 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2419 2419 controls whether to compute deltas against both parents for merges.
2420 2420 By default, the current default is used.
2421 2421 """
2422 2422 if deltareuse not in self.DELTAREUSEALL:
2423 2423 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2424 2424
2425 2425 if len(destrevlog):
2426 2426 raise ValueError(_('destination revlog is not empty'))
2427 2427
2428 2428 if getattr(self, 'filteredrevs', None):
2429 2429 raise ValueError(_('source revlog has filtered revisions'))
2430 2430 if getattr(destrevlog, 'filteredrevs', None):
2431 2431 raise ValueError(_('destination revlog has filtered revisions'))
2432 2432
2433 2433 # lazydeltabase controls whether to reuse a cached delta, if possible.
2434 2434 oldlazydeltabase = destrevlog._lazydeltabase
2435 2435 oldamd = destrevlog._aggressivemergedeltas
2436 2436
2437 2437 try:
2438 2438 if deltareuse == self.DELTAREUSEALWAYS:
2439 2439 destrevlog._lazydeltabase = True
2440 2440 elif deltareuse == self.DELTAREUSESAMEREVS:
2441 2441 destrevlog._lazydeltabase = False
2442 2442
2443 2443 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2444 2444
2445 2445 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2446 2446 self.DELTAREUSESAMEREVS)
2447 2447
2448 2448 deltacomputer = _deltacomputer(destrevlog)
2449 2449 index = self.index
2450 2450 for rev in self:
2451 2451 entry = index[rev]
2452 2452
2453 2453 # Some classes override linkrev to take filtered revs into
2454 2454 # account. Use raw entry from index.
2455 2455 flags = entry[0] & 0xffff
2456 2456 linkrev = entry[4]
2457 2457 p1 = index[entry[5]][7]
2458 2458 p2 = index[entry[6]][7]
2459 2459 node = entry[7]
2460 2460
2461 2461 # (Possibly) reuse the delta from the revlog if allowed and
2462 2462 # the revlog chunk is a delta.
2463 2463 cachedelta = None
2464 2464 rawtext = None
2465 2465 if populatecachedelta:
2466 2466 dp = self.deltaparent(rev)
2467 2467 if dp != nullrev:
2468 2468 cachedelta = (dp, str(self._chunk(rev)))
2469 2469
2470 2470 if not cachedelta:
2471 2471 rawtext = self.revision(rev, raw=True)
2472 2472
2473 2473
2474 2474 if deltareuse == self.DELTAREUSEFULLADD:
2475 2475 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2476 2476 cachedelta=cachedelta,
2477 2477 node=node, flags=flags,
2478 2478 deltacomputer=deltacomputer)
2479 2479 else:
2480 2480 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2481 2481 checkambig=False)
2482 2482 dfh = None
2483 2483 if not destrevlog._inline:
2484 2484 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2485 2485 try:
2486 2486 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2487 2487 p2, flags, cachedelta, ifh, dfh,
2488 2488 deltacomputer=deltacomputer)
2489 2489 finally:
2490 2490 if dfh:
2491 2491 dfh.close()
2492 2492 ifh.close()
2493 2493
2494 2494 if addrevisioncb:
2495 2495 addrevisioncb(self, rev, node)
2496 2496 finally:
2497 2497 destrevlog._lazydeltabase = oldlazydeltabase
2498 2498 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now