##// END OF EJS Templates
py3: use bytes() instead of str()...
Pulkit Goyal -
r36739:6bacb2f6 default
parent child Browse files
Show More
@@ -1,2501 +1,2501
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80
81 81 # max size of revlog with inline data
82 82 _maxinline = 131072
83 83 _chunksize = 1048576
84 84
85 85 RevlogError = error.RevlogError
86 86 LookupError = error.LookupError
87 87 CensoredNodeError = error.CensoredNodeError
88 88 ProgrammingError = error.ProgrammingError
89 89
90 90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 91 _flagprocessors = {
92 92 REVIDX_ISCENSORED: None,
93 93 }
94 94
95 95 def addflagprocessor(flag, processor):
96 96 """Register a flag processor on a revision data flag.
97 97
98 98 Invariant:
99 99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 100 - Only one flag processor can be registered on a specific flag.
101 101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 102 following signatures:
103 103 - (read) f(self, rawtext) -> text, bool
104 104 - (write) f(self, text) -> rawtext, bool
105 105 - (raw) f(self, rawtext) -> bool
106 106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 107 directly visible to the user.
108 108 The boolean returned by these transforms is used to determine whether
109 109 the returned text can be used for hash integrity checking. For example,
110 110 if "write" returns False, then "text" is used to generate hash. If
111 111 "write" returns True, that basically means "rawtext" returned by "write"
112 112 should be used to generate hash. Usually, "write" and "read" return
113 113 different booleans. And "raw" returns a same boolean as "write".
114 114
115 115 Note: The 'raw' transform is used for changegroup generation and in some
116 116 debug commands. In this case the transform only indicates whether the
117 117 contents can be used for hash integrity checks.
118 118 """
119 119 if not flag & REVIDX_KNOWN_FLAGS:
120 120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag not in REVIDX_FLAGS_ORDER:
123 123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag in _flagprocessors:
126 126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 127 raise error.Abort(msg)
128 128 _flagprocessors[flag] = processor
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 _nullhash = hashlib.sha1(nullid)
142 142
143 143 def hash(text, p1, p2):
144 144 """generate a hash from the given text and its parent hashes
145 145
146 146 This hash combines both the current file contents and its history
147 147 in a manner that makes it easy to distinguish nodes with the same
148 148 content in the revision graph.
149 149 """
150 150 # As of now, if one of the parent node is null, p2 is null
151 151 if p2 == nullid:
152 152 # deep copy of a hash is faster than creating one
153 153 s = _nullhash.copy()
154 154 s.update(p1)
155 155 else:
156 156 # none of the parent nodes are nullid
157 157 if p1 < p2:
158 158 a = p1
159 159 b = p2
160 160 else:
161 161 a = p2
162 162 b = p1
163 163 s = hashlib.sha1(a)
164 164 s.update(b)
165 165 s.update(text)
166 166 return s.digest()
167 167
168 168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 169 """returns revs[startidx:endidx] without empty trailing revs
170 170 """
171 171 length = revlog.length
172 172
173 173 if endidx is None:
174 174 endidx = len(revs)
175 175
176 176 # Trim empty revs at the end, but never the very first revision of a chain
177 177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 178 endidx -= 1
179 179
180 180 return revs[startidx:endidx]
181 181
182 182 def _slicechunk(revlog, revs):
183 183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 184
185 185 ``revs`` is sliced into groups that should be read in one time.
186 186 Assume that revs are sorted.
187 187 """
188 188 start = revlog.start
189 189 length = revlog.length
190 190
191 191 if len(revs) <= 1:
192 192 yield revs
193 193 return
194 194
195 195 startbyte = start(revs[0])
196 196 endbyte = start(revs[-1]) + length(revs[-1])
197 197 readdata = deltachainspan = endbyte - startbyte
198 198
199 199 chainpayload = sum(length(r) for r in revs)
200 200
201 201 if deltachainspan:
202 202 density = chainpayload / float(deltachainspan)
203 203 else:
204 204 density = 1.0
205 205
206 206 # Store the gaps in a heap to have them sorted by decreasing size
207 207 gapsheap = []
208 208 heapq.heapify(gapsheap)
209 209 prevend = None
210 210 for i, rev in enumerate(revs):
211 211 revstart = start(rev)
212 212 revlen = length(rev)
213 213
214 214 # Skip empty revisions to form larger holes
215 215 if revlen == 0:
216 216 continue
217 217
218 218 if prevend is not None:
219 219 gapsize = revstart - prevend
220 220 # only consider holes that are large enough
221 221 if gapsize > revlog._srmingapsize:
222 222 heapq.heappush(gapsheap, (-gapsize, i))
223 223
224 224 prevend = revstart + revlen
225 225
226 226 # Collect the indices of the largest holes until the density is acceptable
227 227 indicesheap = []
228 228 heapq.heapify(indicesheap)
229 229 while gapsheap and density < revlog._srdensitythreshold:
230 230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 231
232 232 heapq.heappush(indicesheap, gapidx)
233 233
234 234 # the gap sizes are stored as negatives to be sorted decreasingly
235 235 # by the heap
236 236 readdata -= (-oppgapsize)
237 237 if readdata > 0:
238 238 density = chainpayload / float(readdata)
239 239 else:
240 240 density = 1.0
241 241
242 242 # Cut the revs at collected indices
243 243 previdx = 0
244 244 while indicesheap:
245 245 idx = heapq.heappop(indicesheap)
246 246
247 247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 248 if chunk:
249 249 yield chunk
250 250
251 251 previdx = idx
252 252
253 253 chunk = _trimchunk(revlog, revs, previdx)
254 254 if chunk:
255 255 yield chunk
256 256
257 257 @attr.s(slots=True, frozen=True)
258 258 class _deltainfo(object):
259 259 distance = attr.ib()
260 260 deltalen = attr.ib()
261 261 data = attr.ib()
262 262 base = attr.ib()
263 263 chainbase = attr.ib()
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 267 class _deltacomputer(object):
268 268 def __init__(self, revlog):
269 269 self.revlog = revlog
270 270
271 271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 272 """
273 273 Provides revisions that present an interest to be diffed against,
274 274 grouped by level of easiness.
275 275 """
276 276 revlog = self.revlog
277 277 curr = len(revlog)
278 278 prev = curr - 1
279 279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 280
281 281 # should we try to build a delta?
282 282 if prev != nullrev and revlog.storedeltachains:
283 283 tested = set()
284 284 # This condition is true most of the time when processing
285 285 # changegroup data into a generaldelta repo. The only time it
286 286 # isn't true is if this is the first revision in a delta chain
287 287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 289 # Assume what we received from the server is a good choice
290 290 # build delta will reuse the cache
291 291 yield (cachedelta[0],)
292 292 tested.add(cachedelta[0])
293 293
294 294 if revlog._generaldelta:
295 295 # exclude already lazy tested base if any
296 296 parents = [p for p in (p1r, p2r)
297 297 if p != nullrev and p not in tested]
298 298 if parents and not revlog._aggressivemergedeltas:
299 299 # Pick whichever parent is closer to us (to minimize the
300 300 # chance of having to build a fulltext).
301 301 parents = [max(parents)]
302 302 tested.update(parents)
303 303 yield parents
304 304
305 305 if prev not in tested:
306 306 # other approach failed try against prev to hopefully save us a
307 307 # fulltext.
308 308 yield (prev,)
309 309
310 310 def buildtext(self, revinfo, fh):
311 311 """Builds a fulltext version of a revision
312 312
313 313 revinfo: _revisioninfo instance that contains all needed info
314 314 fh: file handle to either the .i or the .d revlog file,
315 315 depending on whether it is inlined or not
316 316 """
317 317 btext = revinfo.btext
318 318 if btext[0] is not None:
319 319 return btext[0]
320 320
321 321 revlog = self.revlog
322 322 cachedelta = revinfo.cachedelta
323 323 flags = revinfo.flags
324 324 node = revinfo.node
325 325
326 326 baserev = cachedelta[0]
327 327 delta = cachedelta[1]
328 328 # special case deltas which replace entire base; no need to decode
329 329 # base revision. this neatly avoids censored bases, which throw when
330 330 # they're decoded.
331 331 hlen = struct.calcsize(">lll")
332 332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 333 len(delta) - hlen):
334 334 btext[0] = delta[hlen:]
335 335 else:
336 336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 337 btext[0] = mdiff.patch(basetext, delta)
338 338
339 339 try:
340 340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 341 btext[0], validatehash = res
342 342 if validatehash:
343 343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 344 if flags & REVIDX_ISCENSORED:
345 345 raise RevlogError(_('node %s is not censored') % node)
346 346 except CensoredNodeError:
347 347 # must pass the censored index flag to add censored revisions
348 348 if not flags & REVIDX_ISCENSORED:
349 349 raise
350 350 return btext[0]
351 351
352 352 def _builddeltadiff(self, base, revinfo, fh):
353 353 revlog = self.revlog
354 354 t = self.buildtext(revinfo, fh)
355 355 if revlog.iscensored(base):
356 356 # deltas based on a censored revision must replace the
357 357 # full content in one patch, so delta works everywhere
358 358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 359 delta = header + t
360 360 else:
361 361 ptext = revlog.revision(base, _df=fh, raw=True)
362 362 delta = mdiff.textdiff(ptext, t)
363 363
364 364 return delta
365 365
366 366 def _builddeltainfo(self, revinfo, base, fh):
367 367 # can we use the cached delta?
368 368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 369 delta = revinfo.cachedelta[1]
370 370 else:
371 371 delta = self._builddeltadiff(base, revinfo, fh)
372 372 revlog = self.revlog
373 373 header, data = revlog.compress(delta)
374 374 deltalen = len(header) + len(data)
375 375 chainbase = revlog.chainbase(base)
376 376 offset = revlog.end(len(revlog) - 1)
377 377 dist = deltalen + offset - revlog.start(chainbase)
378 378 if revlog._generaldelta:
379 379 deltabase = base
380 380 else:
381 381 deltabase = chainbase
382 382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 383 chainlen += 1
384 384 compresseddeltalen += deltalen
385 385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 386 chainbase, chainlen, compresseddeltalen)
387 387
388 388 def finddeltainfo(self, revinfo, fh):
389 389 """Find an acceptable delta against a candidate revision
390 390
391 391 revinfo: information about the revision (instance of _revisioninfo)
392 392 fh: file handle to either the .i or the .d revlog file,
393 393 depending on whether it is inlined or not
394 394
395 395 Returns the first acceptable candidate revision, as ordered by
396 396 _getcandidaterevs
397 397 """
398 398 cachedelta = revinfo.cachedelta
399 399 p1 = revinfo.p1
400 400 p2 = revinfo.p2
401 401 revlog = self.revlog
402 402
403 403 deltainfo = None
404 404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 405 nominateddeltas = []
406 406 for candidaterev in candidaterevs:
407 407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 409 nominateddeltas.append(candidatedelta)
410 410 if nominateddeltas:
411 411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 412 break
413 413
414 414 return deltainfo
415 415
416 416 @attr.s(slots=True, frozen=True)
417 417 class _revisioninfo(object):
418 418 """Information about a revision that allows building its fulltext
419 419 node: expected hash of the revision
420 420 p1, p2: parent revs of the revision
421 421 btext: built text cache consisting of a one-element list
422 422 cachedelta: (baserev, uncompressed_delta) or None
423 423 flags: flags associated to the revision storage
424 424
425 425 One of btext[0] or cachedelta must be set.
426 426 """
427 427 node = attr.ib()
428 428 p1 = attr.ib()
429 429 p2 = attr.ib()
430 430 btext = attr.ib()
431 431 textlen = attr.ib()
432 432 cachedelta = attr.ib()
433 433 flags = attr.ib()
434 434
435 435 # index v0:
436 436 # 4 bytes: offset
437 437 # 4 bytes: compressed length
438 438 # 4 bytes: base rev
439 439 # 4 bytes: link rev
440 440 # 20 bytes: parent 1 nodeid
441 441 # 20 bytes: parent 2 nodeid
442 442 # 20 bytes: nodeid
443 443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 444 indexformatv0_pack = indexformatv0.pack
445 445 indexformatv0_unpack = indexformatv0.unpack
446 446
447 447 class revlogoldio(object):
448 448 def __init__(self):
449 449 self.size = indexformatv0.size
450 450
451 451 def parseindex(self, data, inline):
452 452 s = self.size
453 453 index = []
454 454 nodemap = {nullid: nullrev}
455 455 n = off = 0
456 456 l = len(data)
457 457 while off + s <= l:
458 458 cur = data[off:off + s]
459 459 off += s
460 460 e = indexformatv0_unpack(cur)
461 461 # transform to revlogv1 format
462 462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 464 index.append(e2)
465 465 nodemap[e[6]] = n
466 466 n += 1
467 467
468 468 # add the magic null revision at -1
469 469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 return index, nodemap, None
472 472
473 473 def packentry(self, entry, node, version, rev):
474 474 if gettype(entry[0]):
475 475 raise RevlogError(_('index entry flags need revlog version 1'))
476 476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 477 node(entry[5]), node(entry[6]), entry[7])
478 478 return indexformatv0_pack(*e2)
479 479
480 480 # index ng:
481 481 # 6 bytes: offset
482 482 # 2 bytes: flags
483 483 # 4 bytes: compressed length
484 484 # 4 bytes: uncompressed length
485 485 # 4 bytes: base rev
486 486 # 4 bytes: link rev
487 487 # 4 bytes: parent 1 rev
488 488 # 4 bytes: parent 2 rev
489 489 # 32 bytes: nodeid
490 490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 491 indexformatng_pack = indexformatng.pack
492 492 versionformat = struct.Struct(">I")
493 493 versionformat_pack = versionformat.pack
494 494 versionformat_unpack = versionformat.unpack
495 495
496 496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 497 # signed integer)
498 498 _maxentrysize = 0x7fffffff
499 499
500 500 class revlogio(object):
501 501 def __init__(self):
502 502 self.size = indexformatng.size
503 503
504 504 def parseindex(self, data, inline):
505 505 # call the C implementation to parse the index data
506 506 index, cache = parsers.parse_index2(data, inline)
507 507 return index, getattr(index, 'nodemap', None), cache
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 p = indexformatng_pack(*entry)
511 511 if rev == 0:
512 512 p = versionformat_pack(version) + p[4:]
513 513 return p
514 514
515 515 class revlog(object):
516 516 """
517 517 the underlying revision storage object
518 518
519 519 A revlog consists of two parts, an index and the revision data.
520 520
521 521 The index is a file with a fixed record size containing
522 522 information on each revision, including its nodeid (hash), the
523 523 nodeids of its parents, the position and offset of its data within
524 524 the data file, and the revision it's based on. Finally, each entry
525 525 contains a linkrev entry that can serve as a pointer to external
526 526 data.
527 527
528 528 The revision data itself is a linear collection of data chunks.
529 529 Each chunk represents a revision and is usually represented as a
530 530 delta against the previous chunk. To bound lookup time, runs of
531 531 deltas are limited to about 2 times the length of the original
532 532 version data. This makes retrieval of a version proportional to
533 533 its size, or O(1) relative to the number of revisions.
534 534
535 535 Both pieces of the revlog are written to in an append-only
536 536 fashion, which means we never need to rewrite a file to insert or
537 537 remove data, and can use some simple techniques to avoid the need
538 538 for locking while reading.
539 539
540 540 If checkambig, indexfile is opened with checkambig=True at
541 541 writing, to avoid file stat ambiguity.
542 542
543 543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 544 index will be mmapped rather than read if it is larger than the
545 545 configured threshold.
546 546 """
547 547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 548 mmaplargeindex=False):
549 549 """
550 550 create a revlog object
551 551
552 552 opener is a function that abstracts the file opening operation
553 553 and can be used to implement COW semantics or the like.
554 554 """
555 555 self.indexfile = indexfile
556 556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 557 self.opener = opener
558 558 # When True, indexfile is opened with checkambig=True at writing, to
559 559 # avoid file stat ambiguity.
560 560 self._checkambig = checkambig
561 561 # 3-tuple of (node, rev, text) for a raw revision.
562 562 self._cache = None
563 563 # Maps rev to chain base rev.
564 564 self._chainbasecache = util.lrucachedict(100)
565 565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 566 self._chunkcache = (0, '')
567 567 # How much data to read and cache into the raw revlog data cache.
568 568 self._chunkcachesize = 65536
569 569 self._maxchainlen = None
570 570 self._aggressivemergedeltas = False
571 571 self.index = []
572 572 # Mapping of partial identifiers to full nodes.
573 573 self._pcache = {}
574 574 # Mapping of revision integer to full node.
575 575 self._nodecache = {nullid: nullrev}
576 576 self._nodepos = None
577 577 self._compengine = 'zlib'
578 578 self._maxdeltachainspan = -1
579 579 self._withsparseread = False
580 580 self._srdensitythreshold = 0.25
581 581 self._srmingapsize = 262144
582 582
583 583 mmapindexthreshold = None
584 584 v = REVLOG_DEFAULT_VERSION
585 585 opts = getattr(opener, 'options', None)
586 586 if opts is not None:
587 587 if 'revlogv2' in opts:
588 588 # version 2 revlogs always use generaldelta.
589 589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 590 elif 'revlogv1' in opts:
591 591 if 'generaldelta' in opts:
592 592 v |= FLAG_GENERALDELTA
593 593 else:
594 594 v = 0
595 595 if 'chunkcachesize' in opts:
596 596 self._chunkcachesize = opts['chunkcachesize']
597 597 if 'maxchainlen' in opts:
598 598 self._maxchainlen = opts['maxchainlen']
599 599 if 'aggressivemergedeltas' in opts:
600 600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 602 if 'compengine' in opts:
603 603 self._compengine = opts['compengine']
604 604 if 'maxdeltachainspan' in opts:
605 605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 607 mmapindexthreshold = opts['mmapindexthreshold']
608 608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 609 if 'sparse-read-density-threshold' in opts:
610 610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 611 if 'sparse-read-min-gap-size' in opts:
612 612 self._srmingapsize = opts['sparse-read-min-gap-size']
613 613
614 614 if self._chunkcachesize <= 0:
615 615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 616 'than 0') % self._chunkcachesize)
617 617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 619 'of 2') % self._chunkcachesize)
620 620
621 621 indexdata = ''
622 622 self._initempty = True
623 623 try:
624 624 with self._indexfp() as f:
625 625 if (mmapindexthreshold is not None and
626 626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 627 indexdata = util.buffer(util.mmapread(f))
628 628 else:
629 629 indexdata = f.read()
630 630 if len(indexdata) > 0:
631 631 v = versionformat_unpack(indexdata[:4])[0]
632 632 self._initempty = False
633 633 except IOError as inst:
634 634 if inst.errno != errno.ENOENT:
635 635 raise
636 636
637 637 self.version = v
638 638 self._inline = v & FLAG_INLINE_DATA
639 639 self._generaldelta = v & FLAG_GENERALDELTA
640 640 flags = v & ~0xFFFF
641 641 fmt = v & 0xFFFF
642 642 if fmt == REVLOGV0:
643 643 if flags:
644 644 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 645 'revlog %s') %
646 646 (flags >> 16, fmt, self.indexfile))
647 647 elif fmt == REVLOGV1:
648 648 if flags & ~REVLOGV1_FLAGS:
649 649 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 650 'revlog %s') %
651 651 (flags >> 16, fmt, self.indexfile))
652 652 elif fmt == REVLOGV2:
653 653 if flags & ~REVLOGV2_FLAGS:
654 654 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 655 'revlog %s') %
656 656 (flags >> 16, fmt, self.indexfile))
657 657 else:
658 658 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 659 (fmt, self.indexfile))
660 660
661 661 self.storedeltachains = True
662 662
663 663 self._io = revlogio()
664 664 if self.version == REVLOGV0:
665 665 self._io = revlogoldio()
666 666 try:
667 667 d = self._io.parseindex(indexdata, self._inline)
668 668 except (ValueError, IndexError):
669 669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 670 self.index, nodemap, self._chunkcache = d
671 671 if nodemap is not None:
672 672 self.nodemap = self._nodecache = nodemap
673 673 if not self._chunkcache:
674 674 self._chunkclear()
675 675 # revnum -> (chain-length, sum-delta-length)
676 676 self._chaininfocache = {}
677 677 # revlog header -> revlog compressor
678 678 self._decompressors = {}
679 679
680 680 @util.propertycache
681 681 def _compressor(self):
682 682 return util.compengines[self._compengine].revlogcompressor()
683 683
684 684 def _indexfp(self, mode='r'):
685 685 """file object for the revlog's index file"""
686 686 args = {r'mode': mode}
687 687 if mode != 'r':
688 688 args[r'checkambig'] = self._checkambig
689 689 if mode == 'w':
690 690 args[r'atomictemp'] = True
691 691 return self.opener(self.indexfile, **args)
692 692
693 693 def _datafp(self, mode='r'):
694 694 """file object for the revlog's data file"""
695 695 return self.opener(self.datafile, mode=mode)
696 696
697 697 @contextlib.contextmanager
698 698 def _datareadfp(self, existingfp=None):
699 699 """file object suitable to read data"""
700 700 if existingfp is not None:
701 701 yield existingfp
702 702 else:
703 703 if self._inline:
704 704 func = self._indexfp
705 705 else:
706 706 func = self._datafp
707 707 with func() as fp:
708 708 yield fp
709 709
710 710 def tip(self):
711 711 return self.node(len(self.index) - 2)
712 712 def __contains__(self, rev):
713 713 return 0 <= rev < len(self)
714 714 def __len__(self):
715 715 return len(self.index) - 1
716 716 def __iter__(self):
717 717 return iter(xrange(len(self)))
718 718 def revs(self, start=0, stop=None):
719 719 """iterate over all rev in this revlog (from start to stop)"""
720 720 step = 1
721 721 if stop is not None:
722 722 if start > stop:
723 723 step = -1
724 724 stop += step
725 725 else:
726 726 stop = len(self)
727 727 return xrange(start, stop, step)
728 728
729 729 @util.propertycache
730 730 def nodemap(self):
731 731 self.rev(self.node(0))
732 732 return self._nodecache
733 733
734 734 def hasnode(self, node):
735 735 try:
736 736 self.rev(node)
737 737 return True
738 738 except KeyError:
739 739 return False
740 740
741 741 def clearcaches(self):
742 742 self._cache = None
743 743 self._chainbasecache.clear()
744 744 self._chunkcache = (0, '')
745 745 self._pcache = {}
746 746
747 747 try:
748 748 self._nodecache.clearcaches()
749 749 except AttributeError:
750 750 self._nodecache = {nullid: nullrev}
751 751 self._nodepos = None
752 752
753 753 def rev(self, node):
754 754 try:
755 755 return self._nodecache[node]
756 756 except TypeError:
757 757 raise
758 758 except RevlogError:
759 759 # parsers.c radix tree lookup failed
760 760 if node == wdirid:
761 761 raise error.WdirUnsupported
762 762 raise LookupError(node, self.indexfile, _('no node'))
763 763 except KeyError:
764 764 # pure python cache lookup failed
765 765 n = self._nodecache
766 766 i = self.index
767 767 p = self._nodepos
768 768 if p is None:
769 769 p = len(i) - 2
770 770 for r in xrange(p, -1, -1):
771 771 v = i[r][7]
772 772 n[v] = r
773 773 if v == node:
774 774 self._nodepos = r - 1
775 775 return r
776 776 if node == wdirid:
777 777 raise error.WdirUnsupported
778 778 raise LookupError(node, self.indexfile, _('no node'))
779 779
780 780 # Accessors for index entries.
781 781
782 782 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
783 783 # are flags.
784 784 def start(self, rev):
785 785 return int(self.index[rev][0] >> 16)
786 786
787 787 def flags(self, rev):
788 788 return self.index[rev][0] & 0xFFFF
789 789
790 790 def length(self, rev):
791 791 return self.index[rev][1]
792 792
793 793 def rawsize(self, rev):
794 794 """return the length of the uncompressed text for a given revision"""
795 795 l = self.index[rev][2]
796 796 if l >= 0:
797 797 return l
798 798
799 799 t = self.revision(rev, raw=True)
800 800 return len(t)
801 801
802 802 def size(self, rev):
803 803 """length of non-raw text (processed by a "read" flag processor)"""
804 804 # fast path: if no "read" flag processor could change the content,
805 805 # size is rawsize. note: ELLIPSIS is known to not change the content.
806 806 flags = self.flags(rev)
807 807 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
808 808 return self.rawsize(rev)
809 809
810 810 return len(self.revision(rev, raw=False))
811 811
812 812 def chainbase(self, rev):
813 813 base = self._chainbasecache.get(rev)
814 814 if base is not None:
815 815 return base
816 816
817 817 index = self.index
818 818 base = index[rev][3]
819 819 while base != rev:
820 820 rev = base
821 821 base = index[rev][3]
822 822
823 823 self._chainbasecache[rev] = base
824 824 return base
825 825
826 826 def linkrev(self, rev):
827 827 return self.index[rev][4]
828 828
829 829 def parentrevs(self, rev):
830 830 try:
831 831 entry = self.index[rev]
832 832 except IndexError:
833 833 if rev == wdirrev:
834 834 raise error.WdirUnsupported
835 835 raise
836 836
837 837 return entry[5], entry[6]
838 838
839 839 def node(self, rev):
840 840 try:
841 841 return self.index[rev][7]
842 842 except IndexError:
843 843 if rev == wdirrev:
844 844 raise error.WdirUnsupported
845 845 raise
846 846
847 847 # Derived from index values.
848 848
849 849 def end(self, rev):
850 850 return self.start(rev) + self.length(rev)
851 851
852 852 def parents(self, node):
853 853 i = self.index
854 854 d = i[self.rev(node)]
855 855 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
856 856
857 857 def chainlen(self, rev):
858 858 return self._chaininfo(rev)[0]
859 859
860 860 def _chaininfo(self, rev):
861 861 chaininfocache = self._chaininfocache
862 862 if rev in chaininfocache:
863 863 return chaininfocache[rev]
864 864 index = self.index
865 865 generaldelta = self._generaldelta
866 866 iterrev = rev
867 867 e = index[iterrev]
868 868 clen = 0
869 869 compresseddeltalen = 0
870 870 while iterrev != e[3]:
871 871 clen += 1
872 872 compresseddeltalen += e[1]
873 873 if generaldelta:
874 874 iterrev = e[3]
875 875 else:
876 876 iterrev -= 1
877 877 if iterrev in chaininfocache:
878 878 t = chaininfocache[iterrev]
879 879 clen += t[0]
880 880 compresseddeltalen += t[1]
881 881 break
882 882 e = index[iterrev]
883 883 else:
884 884 # Add text length of base since decompressing that also takes
885 885 # work. For cache hits the length is already included.
886 886 compresseddeltalen += e[1]
887 887 r = (clen, compresseddeltalen)
888 888 chaininfocache[rev] = r
889 889 return r
890 890
891 891 def _deltachain(self, rev, stoprev=None):
892 892 """Obtain the delta chain for a revision.
893 893
894 894 ``stoprev`` specifies a revision to stop at. If not specified, we
895 895 stop at the base of the chain.
896 896
897 897 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
898 898 revs in ascending order and ``stopped`` is a bool indicating whether
899 899 ``stoprev`` was hit.
900 900 """
901 901 # Try C implementation.
902 902 try:
903 903 return self.index.deltachain(rev, stoprev, self._generaldelta)
904 904 except AttributeError:
905 905 pass
906 906
907 907 chain = []
908 908
909 909 # Alias to prevent attribute lookup in tight loop.
910 910 index = self.index
911 911 generaldelta = self._generaldelta
912 912
913 913 iterrev = rev
914 914 e = index[iterrev]
915 915 while iterrev != e[3] and iterrev != stoprev:
916 916 chain.append(iterrev)
917 917 if generaldelta:
918 918 iterrev = e[3]
919 919 else:
920 920 iterrev -= 1
921 921 e = index[iterrev]
922 922
923 923 if iterrev == stoprev:
924 924 stopped = True
925 925 else:
926 926 chain.append(iterrev)
927 927 stopped = False
928 928
929 929 chain.reverse()
930 930 return chain, stopped
931 931
932 932 def ancestors(self, revs, stoprev=0, inclusive=False):
933 933 """Generate the ancestors of 'revs' in reverse topological order.
934 934 Does not generate revs lower than stoprev.
935 935
936 936 See the documentation for ancestor.lazyancestors for more details."""
937 937
938 938 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
939 939 inclusive=inclusive)
940 940
941 941 def descendants(self, revs):
942 942 """Generate the descendants of 'revs' in revision order.
943 943
944 944 Yield a sequence of revision numbers starting with a child of
945 945 some rev in revs, i.e., each revision is *not* considered a
946 946 descendant of itself. Results are ordered by revision number (a
947 947 topological sort)."""
948 948 first = min(revs)
949 949 if first == nullrev:
950 950 for i in self:
951 951 yield i
952 952 return
953 953
954 954 seen = set(revs)
955 955 for i in self.revs(start=first + 1):
956 956 for x in self.parentrevs(i):
957 957 if x != nullrev and x in seen:
958 958 seen.add(i)
959 959 yield i
960 960 break
961 961
962 962 def findcommonmissing(self, common=None, heads=None):
963 963 """Return a tuple of the ancestors of common and the ancestors of heads
964 964 that are not ancestors of common. In revset terminology, we return the
965 965 tuple:
966 966
967 967 ::common, (::heads) - (::common)
968 968
969 969 The list is sorted by revision number, meaning it is
970 970 topologically sorted.
971 971
972 972 'heads' and 'common' are both lists of node IDs. If heads is
973 973 not supplied, uses all of the revlog's heads. If common is not
974 974 supplied, uses nullid."""
975 975 if common is None:
976 976 common = [nullid]
977 977 if heads is None:
978 978 heads = self.heads()
979 979
980 980 common = [self.rev(n) for n in common]
981 981 heads = [self.rev(n) for n in heads]
982 982
983 983 # we want the ancestors, but inclusive
984 984 class lazyset(object):
985 985 def __init__(self, lazyvalues):
986 986 self.addedvalues = set()
987 987 self.lazyvalues = lazyvalues
988 988
989 989 def __contains__(self, value):
990 990 return value in self.addedvalues or value in self.lazyvalues
991 991
992 992 def __iter__(self):
993 993 added = self.addedvalues
994 994 for r in added:
995 995 yield r
996 996 for r in self.lazyvalues:
997 997 if not r in added:
998 998 yield r
999 999
1000 1000 def add(self, value):
1001 1001 self.addedvalues.add(value)
1002 1002
1003 1003 def update(self, values):
1004 1004 self.addedvalues.update(values)
1005 1005
1006 1006 has = lazyset(self.ancestors(common))
1007 1007 has.add(nullrev)
1008 1008 has.update(common)
1009 1009
1010 1010 # take all ancestors from heads that aren't in has
1011 1011 missing = set()
1012 1012 visit = collections.deque(r for r in heads if r not in has)
1013 1013 while visit:
1014 1014 r = visit.popleft()
1015 1015 if r in missing:
1016 1016 continue
1017 1017 else:
1018 1018 missing.add(r)
1019 1019 for p in self.parentrevs(r):
1020 1020 if p not in has:
1021 1021 visit.append(p)
1022 1022 missing = list(missing)
1023 1023 missing.sort()
1024 1024 return has, [self.node(miss) for miss in missing]
1025 1025
1026 1026 def incrementalmissingrevs(self, common=None):
1027 1027 """Return an object that can be used to incrementally compute the
1028 1028 revision numbers of the ancestors of arbitrary sets that are not
1029 1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 1030 object.
1031 1031
1032 1032 'common' is a list of revision numbers. If common is not supplied, uses
1033 1033 nullrev.
1034 1034 """
1035 1035 if common is None:
1036 1036 common = [nullrev]
1037 1037
1038 1038 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1039 1039
1040 1040 def findmissingrevs(self, common=None, heads=None):
1041 1041 """Return the revision numbers of the ancestors of heads that
1042 1042 are not ancestors of common.
1043 1043
1044 1044 More specifically, return a list of revision numbers corresponding to
1045 1045 nodes N such that every N satisfies the following constraints:
1046 1046
1047 1047 1. N is an ancestor of some node in 'heads'
1048 1048 2. N is not an ancestor of any node in 'common'
1049 1049
1050 1050 The list is sorted by revision number, meaning it is
1051 1051 topologically sorted.
1052 1052
1053 1053 'heads' and 'common' are both lists of revision numbers. If heads is
1054 1054 not supplied, uses all of the revlog's heads. If common is not
1055 1055 supplied, uses nullid."""
1056 1056 if common is None:
1057 1057 common = [nullrev]
1058 1058 if heads is None:
1059 1059 heads = self.headrevs()
1060 1060
1061 1061 inc = self.incrementalmissingrevs(common=common)
1062 1062 return inc.missingancestors(heads)
1063 1063
1064 1064 def findmissing(self, common=None, heads=None):
1065 1065 """Return the ancestors of heads that are not ancestors of common.
1066 1066
1067 1067 More specifically, return a list of nodes N such that every N
1068 1068 satisfies the following constraints:
1069 1069
1070 1070 1. N is an ancestor of some node in 'heads'
1071 1071 2. N is not an ancestor of any node in 'common'
1072 1072
1073 1073 The list is sorted by revision number, meaning it is
1074 1074 topologically sorted.
1075 1075
1076 1076 'heads' and 'common' are both lists of node IDs. If heads is
1077 1077 not supplied, uses all of the revlog's heads. If common is not
1078 1078 supplied, uses nullid."""
1079 1079 if common is None:
1080 1080 common = [nullid]
1081 1081 if heads is None:
1082 1082 heads = self.heads()
1083 1083
1084 1084 common = [self.rev(n) for n in common]
1085 1085 heads = [self.rev(n) for n in heads]
1086 1086
1087 1087 inc = self.incrementalmissingrevs(common=common)
1088 1088 return [self.node(r) for r in inc.missingancestors(heads)]
1089 1089
1090 1090 def nodesbetween(self, roots=None, heads=None):
1091 1091 """Return a topological path from 'roots' to 'heads'.
1092 1092
1093 1093 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1094 1094 topologically sorted list of all nodes N that satisfy both of
1095 1095 these constraints:
1096 1096
1097 1097 1. N is a descendant of some node in 'roots'
1098 1098 2. N is an ancestor of some node in 'heads'
1099 1099
1100 1100 Every node is considered to be both a descendant and an ancestor
1101 1101 of itself, so every reachable node in 'roots' and 'heads' will be
1102 1102 included in 'nodes'.
1103 1103
1104 1104 'outroots' is the list of reachable nodes in 'roots', i.e., the
1105 1105 subset of 'roots' that is returned in 'nodes'. Likewise,
1106 1106 'outheads' is the subset of 'heads' that is also in 'nodes'.
1107 1107
1108 1108 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1109 1109 unspecified, uses nullid as the only root. If 'heads' is
1110 1110 unspecified, uses list of all of the revlog's heads."""
1111 1111 nonodes = ([], [], [])
1112 1112 if roots is not None:
1113 1113 roots = list(roots)
1114 1114 if not roots:
1115 1115 return nonodes
1116 1116 lowestrev = min([self.rev(n) for n in roots])
1117 1117 else:
1118 1118 roots = [nullid] # Everybody's a descendant of nullid
1119 1119 lowestrev = nullrev
1120 1120 if (lowestrev == nullrev) and (heads is None):
1121 1121 # We want _all_ the nodes!
1122 1122 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1123 1123 if heads is None:
1124 1124 # All nodes are ancestors, so the latest ancestor is the last
1125 1125 # node.
1126 1126 highestrev = len(self) - 1
1127 1127 # Set ancestors to None to signal that every node is an ancestor.
1128 1128 ancestors = None
1129 1129 # Set heads to an empty dictionary for later discovery of heads
1130 1130 heads = {}
1131 1131 else:
1132 1132 heads = list(heads)
1133 1133 if not heads:
1134 1134 return nonodes
1135 1135 ancestors = set()
1136 1136 # Turn heads into a dictionary so we can remove 'fake' heads.
1137 1137 # Also, later we will be using it to filter out the heads we can't
1138 1138 # find from roots.
1139 1139 heads = dict.fromkeys(heads, False)
1140 1140 # Start at the top and keep marking parents until we're done.
1141 1141 nodestotag = set(heads)
1142 1142 # Remember where the top was so we can use it as a limit later.
1143 1143 highestrev = max([self.rev(n) for n in nodestotag])
1144 1144 while nodestotag:
1145 1145 # grab a node to tag
1146 1146 n = nodestotag.pop()
1147 1147 # Never tag nullid
1148 1148 if n == nullid:
1149 1149 continue
1150 1150 # A node's revision number represents its place in a
1151 1151 # topologically sorted list of nodes.
1152 1152 r = self.rev(n)
1153 1153 if r >= lowestrev:
1154 1154 if n not in ancestors:
1155 1155 # If we are possibly a descendant of one of the roots
1156 1156 # and we haven't already been marked as an ancestor
1157 1157 ancestors.add(n) # Mark as ancestor
1158 1158 # Add non-nullid parents to list of nodes to tag.
1159 1159 nodestotag.update([p for p in self.parents(n) if
1160 1160 p != nullid])
1161 1161 elif n in heads: # We've seen it before, is it a fake head?
1162 1162 # So it is, real heads should not be the ancestors of
1163 1163 # any other heads.
1164 1164 heads.pop(n)
1165 1165 if not ancestors:
1166 1166 return nonodes
1167 1167 # Now that we have our set of ancestors, we want to remove any
1168 1168 # roots that are not ancestors.
1169 1169
1170 1170 # If one of the roots was nullid, everything is included anyway.
1171 1171 if lowestrev > nullrev:
1172 1172 # But, since we weren't, let's recompute the lowest rev to not
1173 1173 # include roots that aren't ancestors.
1174 1174
1175 1175 # Filter out roots that aren't ancestors of heads
1176 1176 roots = [root for root in roots if root in ancestors]
1177 1177 # Recompute the lowest revision
1178 1178 if roots:
1179 1179 lowestrev = min([self.rev(root) for root in roots])
1180 1180 else:
1181 1181 # No more roots? Return empty list
1182 1182 return nonodes
1183 1183 else:
1184 1184 # We are descending from nullid, and don't need to care about
1185 1185 # any other roots.
1186 1186 lowestrev = nullrev
1187 1187 roots = [nullid]
1188 1188 # Transform our roots list into a set.
1189 1189 descendants = set(roots)
1190 1190 # Also, keep the original roots so we can filter out roots that aren't
1191 1191 # 'real' roots (i.e. are descended from other roots).
1192 1192 roots = descendants.copy()
1193 1193 # Our topologically sorted list of output nodes.
1194 1194 orderedout = []
1195 1195 # Don't start at nullid since we don't want nullid in our output list,
1196 1196 # and if nullid shows up in descendants, empty parents will look like
1197 1197 # they're descendants.
1198 1198 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1199 1199 n = self.node(r)
1200 1200 isdescendant = False
1201 1201 if lowestrev == nullrev: # Everybody is a descendant of nullid
1202 1202 isdescendant = True
1203 1203 elif n in descendants:
1204 1204 # n is already a descendant
1205 1205 isdescendant = True
1206 1206 # This check only needs to be done here because all the roots
1207 1207 # will start being marked is descendants before the loop.
1208 1208 if n in roots:
1209 1209 # If n was a root, check if it's a 'real' root.
1210 1210 p = tuple(self.parents(n))
1211 1211 # If any of its parents are descendants, it's not a root.
1212 1212 if (p[0] in descendants) or (p[1] in descendants):
1213 1213 roots.remove(n)
1214 1214 else:
1215 1215 p = tuple(self.parents(n))
1216 1216 # A node is a descendant if either of its parents are
1217 1217 # descendants. (We seeded the dependents list with the roots
1218 1218 # up there, remember?)
1219 1219 if (p[0] in descendants) or (p[1] in descendants):
1220 1220 descendants.add(n)
1221 1221 isdescendant = True
1222 1222 if isdescendant and ((ancestors is None) or (n in ancestors)):
1223 1223 # Only include nodes that are both descendants and ancestors.
1224 1224 orderedout.append(n)
1225 1225 if (ancestors is not None) and (n in heads):
1226 1226 # We're trying to figure out which heads are reachable
1227 1227 # from roots.
1228 1228 # Mark this head as having been reached
1229 1229 heads[n] = True
1230 1230 elif ancestors is None:
1231 1231 # Otherwise, we're trying to discover the heads.
1232 1232 # Assume this is a head because if it isn't, the next step
1233 1233 # will eventually remove it.
1234 1234 heads[n] = True
1235 1235 # But, obviously its parents aren't.
1236 1236 for p in self.parents(n):
1237 1237 heads.pop(p, None)
1238 1238 heads = [head for head, flag in heads.iteritems() if flag]
1239 1239 roots = list(roots)
1240 1240 assert orderedout
1241 1241 assert roots
1242 1242 assert heads
1243 1243 return (orderedout, roots, heads)
1244 1244
1245 1245 def headrevs(self):
1246 1246 try:
1247 1247 return self.index.headrevs()
1248 1248 except AttributeError:
1249 1249 return self._headrevs()
1250 1250
1251 1251 def computephases(self, roots):
1252 1252 return self.index.computephasesmapsets(roots)
1253 1253
1254 1254 def _headrevs(self):
1255 1255 count = len(self)
1256 1256 if not count:
1257 1257 return [nullrev]
1258 1258 # we won't iter over filtered rev so nobody is a head at start
1259 1259 ishead = [0] * (count + 1)
1260 1260 index = self.index
1261 1261 for r in self:
1262 1262 ishead[r] = 1 # I may be an head
1263 1263 e = index[r]
1264 1264 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 1265 return [r for r, val in enumerate(ishead) if val]
1266 1266
1267 1267 def heads(self, start=None, stop=None):
1268 1268 """return the list of all nodes that have no children
1269 1269
1270 1270 if start is specified, only heads that are descendants of
1271 1271 start will be returned
1272 1272 if stop is specified, it will consider all the revs from stop
1273 1273 as if they had no children
1274 1274 """
1275 1275 if start is None and stop is None:
1276 1276 if not len(self):
1277 1277 return [nullid]
1278 1278 return [self.node(r) for r in self.headrevs()]
1279 1279
1280 1280 if start is None:
1281 1281 start = nullid
1282 1282 if stop is None:
1283 1283 stop = []
1284 1284 stoprevs = set([self.rev(n) for n in stop])
1285 1285 startrev = self.rev(start)
1286 1286 reachable = {startrev}
1287 1287 heads = {startrev}
1288 1288
1289 1289 parentrevs = self.parentrevs
1290 1290 for r in self.revs(start=startrev + 1):
1291 1291 for p in parentrevs(r):
1292 1292 if p in reachable:
1293 1293 if r not in stoprevs:
1294 1294 reachable.add(r)
1295 1295 heads.add(r)
1296 1296 if p in heads and p not in stoprevs:
1297 1297 heads.remove(p)
1298 1298
1299 1299 return [self.node(r) for r in heads]
1300 1300
1301 1301 def children(self, node):
1302 1302 """find the children of a given node"""
1303 1303 c = []
1304 1304 p = self.rev(node)
1305 1305 for r in self.revs(start=p + 1):
1306 1306 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1307 1307 if prevs:
1308 1308 for pr in prevs:
1309 1309 if pr == p:
1310 1310 c.append(self.node(r))
1311 1311 elif p == nullrev:
1312 1312 c.append(self.node(r))
1313 1313 return c
1314 1314
1315 1315 def descendant(self, start, end):
1316 1316 if start == nullrev:
1317 1317 return True
1318 1318 for i in self.descendants([start]):
1319 1319 if i == end:
1320 1320 return True
1321 1321 elif i > end:
1322 1322 break
1323 1323 return False
1324 1324
1325 1325 def commonancestorsheads(self, a, b):
1326 1326 """calculate all the heads of the common ancestors of nodes a and b"""
1327 1327 a, b = self.rev(a), self.rev(b)
1328 1328 try:
1329 1329 ancs = self.index.commonancestorsheads(a, b)
1330 1330 except (AttributeError, OverflowError): # C implementation failed
1331 1331 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1332 1332 return pycompat.maplist(self.node, ancs)
1333 1333
1334 1334 def isancestor(self, a, b):
1335 1335 """return True if node a is an ancestor of node b
1336 1336
1337 1337 The implementation of this is trivial but the use of
1338 1338 commonancestorsheads is not."""
1339 1339 return a in self.commonancestorsheads(a, b)
1340 1340
1341 1341 def ancestor(self, a, b):
1342 1342 """calculate the "best" common ancestor of nodes a and b"""
1343 1343
1344 1344 a, b = self.rev(a), self.rev(b)
1345 1345 try:
1346 1346 ancs = self.index.ancestors(a, b)
1347 1347 except (AttributeError, OverflowError):
1348 1348 ancs = ancestor.ancestors(self.parentrevs, a, b)
1349 1349 if ancs:
1350 1350 # choose a consistent winner when there's a tie
1351 1351 return min(map(self.node, ancs))
1352 1352 return nullid
1353 1353
1354 1354 def _match(self, id):
1355 1355 if isinstance(id, int):
1356 1356 # rev
1357 1357 return self.node(id)
1358 1358 if len(id) == 20:
1359 1359 # possibly a binary node
1360 1360 # odds of a binary node being all hex in ASCII are 1 in 10**25
1361 1361 try:
1362 1362 node = id
1363 1363 self.rev(node) # quick search the index
1364 1364 return node
1365 1365 except LookupError:
1366 1366 pass # may be partial hex id
1367 1367 try:
1368 1368 # str(rev)
1369 1369 rev = int(id)
1370 1370 if "%d" % rev != id:
1371 1371 raise ValueError
1372 1372 if rev < 0:
1373 1373 rev = len(self) + rev
1374 1374 if rev < 0 or rev >= len(self):
1375 1375 raise ValueError
1376 1376 return self.node(rev)
1377 1377 except (ValueError, OverflowError):
1378 1378 pass
1379 1379 if len(id) == 40:
1380 1380 try:
1381 1381 # a full hex nodeid?
1382 1382 node = bin(id)
1383 1383 self.rev(node)
1384 1384 return node
1385 1385 except (TypeError, LookupError):
1386 1386 pass
1387 1387
1388 1388 def _partialmatch(self, id):
1389 1389 maybewdir = wdirhex.startswith(id)
1390 1390 try:
1391 1391 partial = self.index.partialmatch(id)
1392 1392 if partial and self.hasnode(partial):
1393 1393 if maybewdir:
1394 1394 # single 'ff...' match in radix tree, ambiguous with wdir
1395 1395 raise RevlogError
1396 1396 return partial
1397 1397 if maybewdir:
1398 1398 # no 'ff...' match in radix tree, wdir identified
1399 1399 raise error.WdirUnsupported
1400 1400 return None
1401 1401 except RevlogError:
1402 1402 # parsers.c radix tree lookup gave multiple matches
1403 1403 # fast path: for unfiltered changelog, radix tree is accurate
1404 1404 if not getattr(self, 'filteredrevs', None):
1405 1405 raise LookupError(id, self.indexfile,
1406 1406 _('ambiguous identifier'))
1407 1407 # fall through to slow path that filters hidden revisions
1408 1408 except (AttributeError, ValueError):
1409 1409 # we are pure python, or key was too short to search radix tree
1410 1410 pass
1411 1411
1412 1412 if id in self._pcache:
1413 1413 return self._pcache[id]
1414 1414
1415 1415 if len(id) < 40:
1416 1416 try:
1417 1417 # hex(node)[:...]
1418 1418 l = len(id) // 2 # grab an even number of digits
1419 1419 prefix = bin(id[:l * 2])
1420 1420 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1421 1421 nl = [n for n in nl if hex(n).startswith(id) and
1422 1422 self.hasnode(n)]
1423 1423 if len(nl) > 0:
1424 1424 if len(nl) == 1 and not maybewdir:
1425 1425 self._pcache[id] = nl[0]
1426 1426 return nl[0]
1427 1427 raise LookupError(id, self.indexfile,
1428 1428 _('ambiguous identifier'))
1429 1429 if maybewdir:
1430 1430 raise error.WdirUnsupported
1431 1431 return None
1432 1432 except TypeError:
1433 1433 pass
1434 1434
1435 1435 def lookup(self, id):
1436 1436 """locate a node based on:
1437 1437 - revision number or str(revision number)
1438 1438 - nodeid or subset of hex nodeid
1439 1439 """
1440 1440 n = self._match(id)
1441 1441 if n is not None:
1442 1442 return n
1443 1443 n = self._partialmatch(id)
1444 1444 if n:
1445 1445 return n
1446 1446
1447 1447 raise LookupError(id, self.indexfile, _('no match found'))
1448 1448
1449 1449 def shortest(self, hexnode, minlength=1):
1450 1450 """Find the shortest unambiguous prefix that matches hexnode."""
1451 1451 def isvalid(test):
1452 1452 try:
1453 1453 if self._partialmatch(test) is None:
1454 1454 return False
1455 1455
1456 1456 try:
1457 1457 i = int(test)
1458 1458 # if we are a pure int, then starting with zero will not be
1459 1459 # confused as a rev; or, obviously, if the int is larger
1460 1460 # than the value of the tip rev
1461 1461 if test[0] == '0' or i > len(self):
1462 1462 return True
1463 1463 return False
1464 1464 except ValueError:
1465 1465 return True
1466 1466 except error.RevlogError:
1467 1467 return False
1468 1468 except error.WdirUnsupported:
1469 1469 # single 'ff...' match
1470 1470 return True
1471 1471
1472 1472 shortest = hexnode
1473 1473 startlength = max(6, minlength)
1474 1474 length = startlength
1475 1475 while True:
1476 1476 test = hexnode[:length]
1477 1477 if isvalid(test):
1478 1478 shortest = test
1479 1479 if length == minlength or length > startlength:
1480 1480 return shortest
1481 1481 length -= 1
1482 1482 else:
1483 1483 length += 1
1484 1484 if len(shortest) <= length:
1485 1485 return shortest
1486 1486
1487 1487 def cmp(self, node, text):
1488 1488 """compare text with a given file revision
1489 1489
1490 1490 returns True if text is different than what is stored.
1491 1491 """
1492 1492 p1, p2 = self.parents(node)
1493 1493 return hash(text, p1, p2) != node
1494 1494
1495 1495 def _cachesegment(self, offset, data):
1496 1496 """Add a segment to the revlog cache.
1497 1497
1498 1498 Accepts an absolute offset and the data that is at that location.
1499 1499 """
1500 1500 o, d = self._chunkcache
1501 1501 # try to add to existing cache
1502 1502 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1503 1503 self._chunkcache = o, d + data
1504 1504 else:
1505 1505 self._chunkcache = offset, data
1506 1506
1507 1507 def _readsegment(self, offset, length, df=None):
1508 1508 """Load a segment of raw data from the revlog.
1509 1509
1510 1510 Accepts an absolute offset, length to read, and an optional existing
1511 1511 file handle to read from.
1512 1512
1513 1513 If an existing file handle is passed, it will be seeked and the
1514 1514 original seek position will NOT be restored.
1515 1515
1516 1516 Returns a str or buffer of raw byte data.
1517 1517 """
1518 1518 # Cache data both forward and backward around the requested
1519 1519 # data, in a fixed size window. This helps speed up operations
1520 1520 # involving reading the revlog backwards.
1521 1521 cachesize = self._chunkcachesize
1522 1522 realoffset = offset & ~(cachesize - 1)
1523 1523 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1524 1524 - realoffset)
1525 1525 with self._datareadfp(df) as df:
1526 1526 df.seek(realoffset)
1527 1527 d = df.read(reallength)
1528 1528 self._cachesegment(realoffset, d)
1529 1529 if offset != realoffset or reallength != length:
1530 1530 return util.buffer(d, offset - realoffset, length)
1531 1531 return d
1532 1532
1533 1533 def _getsegment(self, offset, length, df=None):
1534 1534 """Obtain a segment of raw data from the revlog.
1535 1535
1536 1536 Accepts an absolute offset, length of bytes to obtain, and an
1537 1537 optional file handle to the already-opened revlog. If the file
1538 1538 handle is used, it's original seek position will not be preserved.
1539 1539
1540 1540 Requests for data may be returned from a cache.
1541 1541
1542 1542 Returns a str or a buffer instance of raw byte data.
1543 1543 """
1544 1544 o, d = self._chunkcache
1545 1545 l = len(d)
1546 1546
1547 1547 # is it in the cache?
1548 1548 cachestart = offset - o
1549 1549 cacheend = cachestart + length
1550 1550 if cachestart >= 0 and cacheend <= l:
1551 1551 if cachestart == 0 and cacheend == l:
1552 1552 return d # avoid a copy
1553 1553 return util.buffer(d, cachestart, cacheend - cachestart)
1554 1554
1555 1555 return self._readsegment(offset, length, df=df)
1556 1556
1557 1557 def _getsegmentforrevs(self, startrev, endrev, df=None):
1558 1558 """Obtain a segment of raw data corresponding to a range of revisions.
1559 1559
1560 1560 Accepts the start and end revisions and an optional already-open
1561 1561 file handle to be used for reading. If the file handle is read, its
1562 1562 seek position will not be preserved.
1563 1563
1564 1564 Requests for data may be satisfied by a cache.
1565 1565
1566 1566 Returns a 2-tuple of (offset, data) for the requested range of
1567 1567 revisions. Offset is the integer offset from the beginning of the
1568 1568 revlog and data is a str or buffer of the raw byte data.
1569 1569
1570 1570 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1571 1571 to determine where each revision's data begins and ends.
1572 1572 """
1573 1573 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1574 1574 # (functions are expensive).
1575 1575 index = self.index
1576 1576 istart = index[startrev]
1577 1577 start = int(istart[0] >> 16)
1578 1578 if startrev == endrev:
1579 1579 end = start + istart[1]
1580 1580 else:
1581 1581 iend = index[endrev]
1582 1582 end = int(iend[0] >> 16) + iend[1]
1583 1583
1584 1584 if self._inline:
1585 1585 start += (startrev + 1) * self._io.size
1586 1586 end += (endrev + 1) * self._io.size
1587 1587 length = end - start
1588 1588
1589 1589 return start, self._getsegment(start, length, df=df)
1590 1590
1591 1591 def _chunk(self, rev, df=None):
1592 1592 """Obtain a single decompressed chunk for a revision.
1593 1593
1594 1594 Accepts an integer revision and an optional already-open file handle
1595 1595 to be used for reading. If used, the seek position of the file will not
1596 1596 be preserved.
1597 1597
1598 1598 Returns a str holding uncompressed data for the requested revision.
1599 1599 """
1600 1600 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1601 1601
1602 1602 def _chunks(self, revs, df=None):
1603 1603 """Obtain decompressed chunks for the specified revisions.
1604 1604
1605 1605 Accepts an iterable of numeric revisions that are assumed to be in
1606 1606 ascending order. Also accepts an optional already-open file handle
1607 1607 to be used for reading. If used, the seek position of the file will
1608 1608 not be preserved.
1609 1609
1610 1610 This function is similar to calling ``self._chunk()`` multiple times,
1611 1611 but is faster.
1612 1612
1613 1613 Returns a list with decompressed data for each requested revision.
1614 1614 """
1615 1615 if not revs:
1616 1616 return []
1617 1617 start = self.start
1618 1618 length = self.length
1619 1619 inline = self._inline
1620 1620 iosize = self._io.size
1621 1621 buffer = util.buffer
1622 1622
1623 1623 l = []
1624 1624 ladd = l.append
1625 1625
1626 1626 if not self._withsparseread:
1627 1627 slicedchunks = (revs,)
1628 1628 else:
1629 1629 slicedchunks = _slicechunk(self, revs)
1630 1630
1631 1631 for revschunk in slicedchunks:
1632 1632 firstrev = revschunk[0]
1633 1633 # Skip trailing revisions with empty diff
1634 1634 for lastrev in revschunk[::-1]:
1635 1635 if length(lastrev) != 0:
1636 1636 break
1637 1637
1638 1638 try:
1639 1639 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1640 1640 except OverflowError:
1641 1641 # issue4215 - we can't cache a run of chunks greater than
1642 1642 # 2G on Windows
1643 1643 return [self._chunk(rev, df=df) for rev in revschunk]
1644 1644
1645 1645 decomp = self.decompress
1646 1646 for rev in revschunk:
1647 1647 chunkstart = start(rev)
1648 1648 if inline:
1649 1649 chunkstart += (rev + 1) * iosize
1650 1650 chunklength = length(rev)
1651 1651 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1652 1652
1653 1653 return l
1654 1654
1655 1655 def _chunkclear(self):
1656 1656 """Clear the raw chunk cache."""
1657 1657 self._chunkcache = (0, '')
1658 1658
1659 1659 def deltaparent(self, rev):
1660 1660 """return deltaparent of the given revision"""
1661 1661 base = self.index[rev][3]
1662 1662 if base == rev:
1663 1663 return nullrev
1664 1664 elif self._generaldelta:
1665 1665 return base
1666 1666 else:
1667 1667 return rev - 1
1668 1668
1669 1669 def revdiff(self, rev1, rev2):
1670 1670 """return or calculate a delta between two revisions
1671 1671
1672 1672 The delta calculated is in binary form and is intended to be written to
1673 1673 revlog data directly. So this function needs raw revision data.
1674 1674 """
1675 1675 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1676 1676 return bytes(self._chunk(rev2))
1677 1677
1678 1678 return mdiff.textdiff(self.revision(rev1, raw=True),
1679 1679 self.revision(rev2, raw=True))
1680 1680
1681 1681 def revision(self, nodeorrev, _df=None, raw=False):
1682 1682 """return an uncompressed revision of a given node or revision
1683 1683 number.
1684 1684
1685 1685 _df - an existing file handle to read from. (internal-only)
1686 1686 raw - an optional argument specifying if the revision data is to be
1687 1687 treated as raw data when applying flag transforms. 'raw' should be set
1688 1688 to True when generating changegroups or in debug commands.
1689 1689 """
1690 1690 if isinstance(nodeorrev, int):
1691 1691 rev = nodeorrev
1692 1692 node = self.node(rev)
1693 1693 else:
1694 1694 node = nodeorrev
1695 1695 rev = None
1696 1696
1697 1697 cachedrev = None
1698 1698 flags = None
1699 1699 rawtext = None
1700 1700 if node == nullid:
1701 1701 return ""
1702 1702 if self._cache:
1703 1703 if self._cache[0] == node:
1704 1704 # _cache only stores rawtext
1705 1705 if raw:
1706 1706 return self._cache[2]
1707 1707 # duplicated, but good for perf
1708 1708 if rev is None:
1709 1709 rev = self.rev(node)
1710 1710 if flags is None:
1711 1711 flags = self.flags(rev)
1712 1712 # no extra flags set, no flag processor runs, text = rawtext
1713 1713 if flags == REVIDX_DEFAULT_FLAGS:
1714 1714 return self._cache[2]
1715 1715 # rawtext is reusable. need to run flag processor
1716 1716 rawtext = self._cache[2]
1717 1717
1718 1718 cachedrev = self._cache[1]
1719 1719
1720 1720 # look up what we need to read
1721 1721 if rawtext is None:
1722 1722 if rev is None:
1723 1723 rev = self.rev(node)
1724 1724
1725 1725 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1726 1726 if stopped:
1727 1727 rawtext = self._cache[2]
1728 1728
1729 1729 # drop cache to save memory
1730 1730 self._cache = None
1731 1731
1732 1732 bins = self._chunks(chain, df=_df)
1733 1733 if rawtext is None:
1734 1734 rawtext = bytes(bins[0])
1735 1735 bins = bins[1:]
1736 1736
1737 1737 rawtext = mdiff.patches(rawtext, bins)
1738 1738 self._cache = (node, rev, rawtext)
1739 1739
1740 1740 if flags is None:
1741 1741 if rev is None:
1742 1742 rev = self.rev(node)
1743 1743 flags = self.flags(rev)
1744 1744
1745 1745 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1746 1746 if validatehash:
1747 1747 self.checkhash(text, node, rev=rev)
1748 1748
1749 1749 return text
1750 1750
1751 1751 def hash(self, text, p1, p2):
1752 1752 """Compute a node hash.
1753 1753
1754 1754 Available as a function so that subclasses can replace the hash
1755 1755 as needed.
1756 1756 """
1757 1757 return hash(text, p1, p2)
1758 1758
1759 1759 def _processflags(self, text, flags, operation, raw=False):
1760 1760 """Inspect revision data flags and applies transforms defined by
1761 1761 registered flag processors.
1762 1762
1763 1763 ``text`` - the revision data to process
1764 1764 ``flags`` - the revision flags
1765 1765 ``operation`` - the operation being performed (read or write)
1766 1766 ``raw`` - an optional argument describing if the raw transform should be
1767 1767 applied.
1768 1768
1769 1769 This method processes the flags in the order (or reverse order if
1770 1770 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1771 1771 flag processors registered for present flags. The order of flags defined
1772 1772 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1773 1773
1774 1774 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1775 1775 processed text and ``validatehash`` is a bool indicating whether the
1776 1776 returned text should be checked for hash integrity.
1777 1777
1778 1778 Note: If the ``raw`` argument is set, it has precedence over the
1779 1779 operation and will only update the value of ``validatehash``.
1780 1780 """
1781 1781 # fast path: no flag processors will run
1782 1782 if flags == 0:
1783 1783 return text, True
1784 1784 if not operation in ('read', 'write'):
1785 1785 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1786 1786 # Check all flags are known.
1787 1787 if flags & ~REVIDX_KNOWN_FLAGS:
1788 1788 raise RevlogError(_("incompatible revision flag '%#x'") %
1789 1789 (flags & ~REVIDX_KNOWN_FLAGS))
1790 1790 validatehash = True
1791 1791 # Depending on the operation (read or write), the order might be
1792 1792 # reversed due to non-commutative transforms.
1793 1793 orderedflags = REVIDX_FLAGS_ORDER
1794 1794 if operation == 'write':
1795 1795 orderedflags = reversed(orderedflags)
1796 1796
1797 1797 for flag in orderedflags:
1798 1798 # If a flagprocessor has been registered for a known flag, apply the
1799 1799 # related operation transform and update result tuple.
1800 1800 if flag & flags:
1801 1801 vhash = True
1802 1802
1803 1803 if flag not in _flagprocessors:
1804 1804 message = _("missing processor for flag '%#x'") % (flag)
1805 1805 raise RevlogError(message)
1806 1806
1807 1807 processor = _flagprocessors[flag]
1808 1808 if processor is not None:
1809 1809 readtransform, writetransform, rawtransform = processor
1810 1810
1811 1811 if raw:
1812 1812 vhash = rawtransform(self, text)
1813 1813 elif operation == 'read':
1814 1814 text, vhash = readtransform(self, text)
1815 1815 else: # write operation
1816 1816 text, vhash = writetransform(self, text)
1817 1817 validatehash = validatehash and vhash
1818 1818
1819 1819 return text, validatehash
1820 1820
1821 1821 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1822 1822 """Check node hash integrity.
1823 1823
1824 1824 Available as a function so that subclasses can extend hash mismatch
1825 1825 behaviors as needed.
1826 1826 """
1827 1827 if p1 is None and p2 is None:
1828 1828 p1, p2 = self.parents(node)
1829 1829 if node != self.hash(text, p1, p2):
1830 1830 revornode = rev
1831 1831 if revornode is None:
1832 1832 revornode = templatefilters.short(hex(node))
1833 1833 raise RevlogError(_("integrity check failed on %s:%s")
1834 1834 % (self.indexfile, pycompat.bytestr(revornode)))
1835 1835
1836 1836 def _enforceinlinesize(self, tr, fp=None):
1837 1837 """Check if the revlog is too big for inline and convert if so.
1838 1838
1839 1839 This should be called after revisions are added to the revlog. If the
1840 1840 revlog has grown too large to be an inline revlog, it will convert it
1841 1841 to use multiple index and data files.
1842 1842 """
1843 1843 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1844 1844 return
1845 1845
1846 1846 trinfo = tr.find(self.indexfile)
1847 1847 if trinfo is None:
1848 1848 raise RevlogError(_("%s not found in the transaction")
1849 1849 % self.indexfile)
1850 1850
1851 1851 trindex = trinfo[2]
1852 1852 if trindex is not None:
1853 1853 dataoff = self.start(trindex)
1854 1854 else:
1855 1855 # revlog was stripped at start of transaction, use all leftover data
1856 1856 trindex = len(self) - 1
1857 1857 dataoff = self.end(-2)
1858 1858
1859 1859 tr.add(self.datafile, dataoff)
1860 1860
1861 1861 if fp:
1862 1862 fp.flush()
1863 1863 fp.close()
1864 1864
1865 1865 with self._datafp('w') as df:
1866 1866 for r in self:
1867 1867 df.write(self._getsegmentforrevs(r, r)[1])
1868 1868
1869 1869 with self._indexfp('w') as fp:
1870 1870 self.version &= ~FLAG_INLINE_DATA
1871 1871 self._inline = False
1872 1872 io = self._io
1873 1873 for i in self:
1874 1874 e = io.packentry(self.index[i], self.node, self.version, i)
1875 1875 fp.write(e)
1876 1876
1877 1877 # the temp file replace the real index when we exit the context
1878 1878 # manager
1879 1879
1880 1880 tr.replace(self.indexfile, trindex * self._io.size)
1881 1881 self._chunkclear()
1882 1882
1883 1883 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1884 1884 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1885 1885 """add a revision to the log
1886 1886
1887 1887 text - the revision data to add
1888 1888 transaction - the transaction object used for rollback
1889 1889 link - the linkrev data to add
1890 1890 p1, p2 - the parent nodeids of the revision
1891 1891 cachedelta - an optional precomputed delta
1892 1892 node - nodeid of revision; typically node is not specified, and it is
1893 1893 computed by default as hash(text, p1, p2), however subclasses might
1894 1894 use different hashing method (and override checkhash() in such case)
1895 1895 flags - the known flags to set on the revision
1896 1896 deltacomputer - an optional _deltacomputer instance shared between
1897 1897 multiple calls
1898 1898 """
1899 1899 if link == nullrev:
1900 1900 raise RevlogError(_("attempted to add linkrev -1 to %s")
1901 1901 % self.indexfile)
1902 1902
1903 1903 if flags:
1904 1904 node = node or self.hash(text, p1, p2)
1905 1905
1906 1906 rawtext, validatehash = self._processflags(text, flags, 'write')
1907 1907
1908 1908 # If the flag processor modifies the revision data, ignore any provided
1909 1909 # cachedelta.
1910 1910 if rawtext != text:
1911 1911 cachedelta = None
1912 1912
1913 1913 if len(rawtext) > _maxentrysize:
1914 1914 raise RevlogError(
1915 1915 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1916 1916 % (self.indexfile, len(rawtext)))
1917 1917
1918 1918 node = node or self.hash(rawtext, p1, p2)
1919 1919 if node in self.nodemap:
1920 1920 return node
1921 1921
1922 1922 if validatehash:
1923 1923 self.checkhash(rawtext, node, p1=p1, p2=p2)
1924 1924
1925 1925 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1926 1926 flags, cachedelta=cachedelta,
1927 1927 deltacomputer=deltacomputer)
1928 1928
1929 1929 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1930 1930 cachedelta=None, deltacomputer=None):
1931 1931 """add a raw revision with known flags, node and parents
1932 1932 useful when reusing a revision not stored in this revlog (ex: received
1933 1933 over wire, or read from an external bundle).
1934 1934 """
1935 1935 dfh = None
1936 1936 if not self._inline:
1937 1937 dfh = self._datafp("a+")
1938 1938 ifh = self._indexfp("a+")
1939 1939 try:
1940 1940 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1941 1941 flags, cachedelta, ifh, dfh,
1942 1942 deltacomputer=deltacomputer)
1943 1943 finally:
1944 1944 if dfh:
1945 1945 dfh.close()
1946 1946 ifh.close()
1947 1947
1948 1948 def compress(self, data):
1949 1949 """Generate a possibly-compressed representation of data."""
1950 1950 if not data:
1951 1951 return '', data
1952 1952
1953 1953 compressed = self._compressor.compress(data)
1954 1954
1955 1955 if compressed:
1956 1956 # The revlog compressor added the header in the returned data.
1957 1957 return '', compressed
1958 1958
1959 1959 if data[0:1] == '\0':
1960 1960 return '', data
1961 1961 return 'u', data
1962 1962
1963 1963 def decompress(self, data):
1964 1964 """Decompress a revlog chunk.
1965 1965
1966 1966 The chunk is expected to begin with a header identifying the
1967 1967 format type so it can be routed to an appropriate decompressor.
1968 1968 """
1969 1969 if not data:
1970 1970 return data
1971 1971
1972 1972 # Revlogs are read much more frequently than they are written and many
1973 1973 # chunks only take microseconds to decompress, so performance is
1974 1974 # important here.
1975 1975 #
1976 1976 # We can make a few assumptions about revlogs:
1977 1977 #
1978 1978 # 1) the majority of chunks will be compressed (as opposed to inline
1979 1979 # raw data).
1980 1980 # 2) decompressing *any* data will likely by at least 10x slower than
1981 1981 # returning raw inline data.
1982 1982 # 3) we want to prioritize common and officially supported compression
1983 1983 # engines
1984 1984 #
1985 1985 # It follows that we want to optimize for "decompress compressed data
1986 1986 # when encoded with common and officially supported compression engines"
1987 1987 # case over "raw data" and "data encoded by less common or non-official
1988 1988 # compression engines." That is why we have the inline lookup first
1989 1989 # followed by the compengines lookup.
1990 1990 #
1991 1991 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1992 1992 # compressed chunks. And this matters for changelog and manifest reads.
1993 1993 t = data[0:1]
1994 1994
1995 1995 if t == 'x':
1996 1996 try:
1997 1997 return _zlibdecompress(data)
1998 1998 except zlib.error as e:
1999 1999 raise RevlogError(_('revlog decompress error: %s') %
2000 2000 util.forcebytestr(e))
2001 2001 # '\0' is more common than 'u' so it goes first.
2002 2002 elif t == '\0':
2003 2003 return data
2004 2004 elif t == 'u':
2005 2005 return util.buffer(data, 1)
2006 2006
2007 2007 try:
2008 2008 compressor = self._decompressors[t]
2009 2009 except KeyError:
2010 2010 try:
2011 2011 engine = util.compengines.forrevlogheader(t)
2012 2012 compressor = engine.revlogcompressor()
2013 2013 self._decompressors[t] = compressor
2014 2014 except KeyError:
2015 2015 raise RevlogError(_('unknown compression type %r') % t)
2016 2016
2017 2017 return compressor.decompress(data)
2018 2018
2019 2019 def _isgooddeltainfo(self, d, textlen):
2020 2020 """Returns True if the given delta is good. Good means that it is within
2021 2021 the disk span, disk size, and chain length bounds that we know to be
2022 2022 performant."""
2023 2023 if d is None:
2024 2024 return False
2025 2025
2026 2026 # - 'd.distance' is the distance from the base revision -- bounding it
2027 2027 # limits the amount of I/O we need to do.
2028 2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2029 2029 # need to apply -- bounding it limits the amount of CPU we consume.
2030 2030
2031 2031 defaultmax = textlen * 4
2032 2032 maxdist = self._maxdeltachainspan
2033 2033 if not maxdist:
2034 2034 maxdist = d.distance # ensure the conditional pass
2035 2035 maxdist = max(maxdist, defaultmax)
2036 2036 if (d.distance > maxdist or d.deltalen > textlen or
2037 2037 d.compresseddeltalen > textlen * 2 or
2038 2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2039 2039 return False
2040 2040
2041 2041 return True
2042 2042
2043 2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2044 2044 cachedelta, ifh, dfh, alwayscache=False,
2045 2045 deltacomputer=None):
2046 2046 """internal function to add revisions to the log
2047 2047
2048 2048 see addrevision for argument descriptions.
2049 2049
2050 2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2051 2051
2052 2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2053 2053 be used.
2054 2054
2055 2055 invariants:
2056 2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2057 2057 if both are set, they must correspond to each other.
2058 2058 """
2059 2059 if node == nullid:
2060 2060 raise RevlogError(_("%s: attempt to add null revision") %
2061 2061 (self.indexfile))
2062 2062 if node == wdirid:
2063 2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2064 2064 (self.indexfile))
2065 2065
2066 2066 if self._inline:
2067 2067 fh = ifh
2068 2068 else:
2069 2069 fh = dfh
2070 2070
2071 2071 btext = [rawtext]
2072 2072
2073 2073 curr = len(self)
2074 2074 prev = curr - 1
2075 2075 offset = self.end(prev)
2076 2076 p1r, p2r = self.rev(p1), self.rev(p2)
2077 2077
2078 2078 # full versions are inserted when the needed deltas
2079 2079 # become comparable to the uncompressed text
2080 2080 if rawtext is None:
2081 2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2082 2082 cachedelta[1])
2083 2083 else:
2084 2084 textlen = len(rawtext)
2085 2085
2086 2086 if deltacomputer is None:
2087 2087 deltacomputer = _deltacomputer(self)
2088 2088
2089 2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2090 2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2091 2091
2092 2092 if deltainfo is not None:
2093 2093 base = deltainfo.base
2094 2094 chainbase = deltainfo.chainbase
2095 2095 data = deltainfo.data
2096 2096 l = deltainfo.deltalen
2097 2097 else:
2098 2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2099 2099 data = self.compress(rawtext)
2100 2100 l = len(data[1]) + len(data[0])
2101 2101 base = chainbase = curr
2102 2102
2103 2103 e = (offset_type(offset, flags), l, textlen,
2104 2104 base, link, p1r, p2r, node)
2105 2105 self.index.insert(-1, e)
2106 2106 self.nodemap[node] = curr
2107 2107
2108 2108 entry = self._io.packentry(e, self.node, self.version, curr)
2109 2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2110 2110
2111 2111 if alwayscache and rawtext is None:
2112 2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2113 2113
2114 2114 if type(rawtext) == bytes: # only accept immutable objects
2115 2115 self._cache = (node, curr, rawtext)
2116 2116 self._chainbasecache[curr] = chainbase
2117 2117 return node
2118 2118
2119 2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2120 2120 # Files opened in a+ mode have inconsistent behavior on various
2121 2121 # platforms. Windows requires that a file positioning call be made
2122 2122 # when the file handle transitions between reads and writes. See
2123 2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2124 2124 # platforms, Python or the platform itself can be buggy. Some versions
2125 2125 # of Solaris have been observed to not append at the end of the file
2126 2126 # if the file was seeked to before the end. See issue4943 for more.
2127 2127 #
2128 2128 # We work around this issue by inserting a seek() before writing.
2129 2129 # Note: This is likely not necessary on Python 3.
2130 2130 ifh.seek(0, os.SEEK_END)
2131 2131 if dfh:
2132 2132 dfh.seek(0, os.SEEK_END)
2133 2133
2134 2134 curr = len(self) - 1
2135 2135 if not self._inline:
2136 2136 transaction.add(self.datafile, offset)
2137 2137 transaction.add(self.indexfile, curr * len(entry))
2138 2138 if data[0]:
2139 2139 dfh.write(data[0])
2140 2140 dfh.write(data[1])
2141 2141 ifh.write(entry)
2142 2142 else:
2143 2143 offset += curr * self._io.size
2144 2144 transaction.add(self.indexfile, offset, curr)
2145 2145 ifh.write(entry)
2146 2146 ifh.write(data[0])
2147 2147 ifh.write(data[1])
2148 2148 self._enforceinlinesize(transaction, ifh)
2149 2149
2150 2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2151 2151 """
2152 2152 add a delta group
2153 2153
2154 2154 given a set of deltas, add them to the revision log. the
2155 2155 first delta is against its parent, which should be in our
2156 2156 log, the rest are against the previous delta.
2157 2157
2158 2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2159 2159 this revlog and the node that was added.
2160 2160 """
2161 2161
2162 2162 nodes = []
2163 2163
2164 2164 r = len(self)
2165 2165 end = 0
2166 2166 if r:
2167 2167 end = self.end(r - 1)
2168 2168 ifh = self._indexfp("a+")
2169 2169 isize = r * self._io.size
2170 2170 if self._inline:
2171 2171 transaction.add(self.indexfile, end + isize, r)
2172 2172 dfh = None
2173 2173 else:
2174 2174 transaction.add(self.indexfile, isize, r)
2175 2175 transaction.add(self.datafile, end)
2176 2176 dfh = self._datafp("a+")
2177 2177 def flush():
2178 2178 if dfh:
2179 2179 dfh.flush()
2180 2180 ifh.flush()
2181 2181 try:
2182 2182 deltacomputer = _deltacomputer(self)
2183 2183 # loop through our set of deltas
2184 2184 for data in deltas:
2185 2185 node, p1, p2, linknode, deltabase, delta, flags = data
2186 2186 link = linkmapper(linknode)
2187 2187 flags = flags or REVIDX_DEFAULT_FLAGS
2188 2188
2189 2189 nodes.append(node)
2190 2190
2191 2191 if node in self.nodemap:
2192 2192 # this can happen if two branches make the same change
2193 2193 continue
2194 2194
2195 2195 for p in (p1, p2):
2196 2196 if p not in self.nodemap:
2197 2197 raise LookupError(p, self.indexfile,
2198 2198 _('unknown parent'))
2199 2199
2200 2200 if deltabase not in self.nodemap:
2201 2201 raise LookupError(deltabase, self.indexfile,
2202 2202 _('unknown delta base'))
2203 2203
2204 2204 baserev = self.rev(deltabase)
2205 2205
2206 2206 if baserev != nullrev and self.iscensored(baserev):
2207 2207 # if base is censored, delta must be full replacement in a
2208 2208 # single patch operation
2209 2209 hlen = struct.calcsize(">lll")
2210 2210 oldlen = self.rawsize(baserev)
2211 2211 newlen = len(delta) - hlen
2212 2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2213 2213 raise error.CensoredBaseError(self.indexfile,
2214 2214 self.node(baserev))
2215 2215
2216 2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2217 2217 flags |= REVIDX_ISCENSORED
2218 2218
2219 2219 # We assume consumers of addrevisioncb will want to retrieve
2220 2220 # the added revision, which will require a call to
2221 2221 # revision(). revision() will fast path if there is a cache
2222 2222 # hit. So, we tell _addrevision() to always cache in this case.
2223 2223 # We're only using addgroup() in the context of changegroup
2224 2224 # generation so the revision data can always be handled as raw
2225 2225 # by the flagprocessor.
2226 2226 self._addrevision(node, None, transaction, link,
2227 2227 p1, p2, flags, (baserev, delta),
2228 2228 ifh, dfh,
2229 2229 alwayscache=bool(addrevisioncb),
2230 2230 deltacomputer=deltacomputer)
2231 2231
2232 2232 if addrevisioncb:
2233 2233 addrevisioncb(self, node)
2234 2234
2235 2235 if not dfh and not self._inline:
2236 2236 # addrevision switched from inline to conventional
2237 2237 # reopen the index
2238 2238 ifh.close()
2239 2239 dfh = self._datafp("a+")
2240 2240 ifh = self._indexfp("a+")
2241 2241 finally:
2242 2242 if dfh:
2243 2243 dfh.close()
2244 2244 ifh.close()
2245 2245
2246 2246 return nodes
2247 2247
2248 2248 def iscensored(self, rev):
2249 2249 """Check if a file revision is censored."""
2250 2250 return False
2251 2251
2252 2252 def _peek_iscensored(self, baserev, delta, flush):
2253 2253 """Quickly check if a delta produces a censored revision."""
2254 2254 return False
2255 2255
2256 2256 def getstrippoint(self, minlink):
2257 2257 """find the minimum rev that must be stripped to strip the linkrev
2258 2258
2259 2259 Returns a tuple containing the minimum rev and a set of all revs that
2260 2260 have linkrevs that will be broken by this strip.
2261 2261 """
2262 2262 brokenrevs = set()
2263 2263 strippoint = len(self)
2264 2264
2265 2265 heads = {}
2266 2266 futurelargelinkrevs = set()
2267 2267 for head in self.headrevs():
2268 2268 headlinkrev = self.linkrev(head)
2269 2269 heads[head] = headlinkrev
2270 2270 if headlinkrev >= minlink:
2271 2271 futurelargelinkrevs.add(headlinkrev)
2272 2272
2273 2273 # This algorithm involves walking down the rev graph, starting at the
2274 2274 # heads. Since the revs are topologically sorted according to linkrev,
2275 2275 # once all head linkrevs are below the minlink, we know there are
2276 2276 # no more revs that could have a linkrev greater than minlink.
2277 2277 # So we can stop walking.
2278 2278 while futurelargelinkrevs:
2279 2279 strippoint -= 1
2280 2280 linkrev = heads.pop(strippoint)
2281 2281
2282 2282 if linkrev < minlink:
2283 2283 brokenrevs.add(strippoint)
2284 2284 else:
2285 2285 futurelargelinkrevs.remove(linkrev)
2286 2286
2287 2287 for p in self.parentrevs(strippoint):
2288 2288 if p != nullrev:
2289 2289 plinkrev = self.linkrev(p)
2290 2290 heads[p] = plinkrev
2291 2291 if plinkrev >= minlink:
2292 2292 futurelargelinkrevs.add(plinkrev)
2293 2293
2294 2294 return strippoint, brokenrevs
2295 2295
2296 2296 def strip(self, minlink, transaction):
2297 2297 """truncate the revlog on the first revision with a linkrev >= minlink
2298 2298
2299 2299 This function is called when we're stripping revision minlink and
2300 2300 its descendants from the repository.
2301 2301
2302 2302 We have to remove all revisions with linkrev >= minlink, because
2303 2303 the equivalent changelog revisions will be renumbered after the
2304 2304 strip.
2305 2305
2306 2306 So we truncate the revlog on the first of these revisions, and
2307 2307 trust that the caller has saved the revisions that shouldn't be
2308 2308 removed and that it'll re-add them after this truncation.
2309 2309 """
2310 2310 if len(self) == 0:
2311 2311 return
2312 2312
2313 2313 rev, _ = self.getstrippoint(minlink)
2314 2314 if rev == len(self):
2315 2315 return
2316 2316
2317 2317 # first truncate the files on disk
2318 2318 end = self.start(rev)
2319 2319 if not self._inline:
2320 2320 transaction.add(self.datafile, end)
2321 2321 end = rev * self._io.size
2322 2322 else:
2323 2323 end += rev * self._io.size
2324 2324
2325 2325 transaction.add(self.indexfile, end)
2326 2326
2327 2327 # then reset internal state in memory to forget those revisions
2328 2328 self._cache = None
2329 2329 self._chaininfocache = {}
2330 2330 self._chunkclear()
2331 2331 for x in xrange(rev, len(self)):
2332 2332 del self.nodemap[self.node(x)]
2333 2333
2334 2334 del self.index[rev:-1]
2335 2335
2336 2336 def checksize(self):
2337 2337 expected = 0
2338 2338 if len(self):
2339 2339 expected = max(0, self.end(len(self) - 1))
2340 2340
2341 2341 try:
2342 2342 with self._datafp() as f:
2343 2343 f.seek(0, 2)
2344 2344 actual = f.tell()
2345 2345 dd = actual - expected
2346 2346 except IOError as inst:
2347 2347 if inst.errno != errno.ENOENT:
2348 2348 raise
2349 2349 dd = 0
2350 2350
2351 2351 try:
2352 2352 f = self.opener(self.indexfile)
2353 2353 f.seek(0, 2)
2354 2354 actual = f.tell()
2355 2355 f.close()
2356 2356 s = self._io.size
2357 2357 i = max(0, actual // s)
2358 2358 di = actual - (i * s)
2359 2359 if self._inline:
2360 2360 databytes = 0
2361 2361 for r in self:
2362 2362 databytes += max(0, self.length(r))
2363 2363 dd = 0
2364 2364 di = actual - len(self) * s - databytes
2365 2365 except IOError as inst:
2366 2366 if inst.errno != errno.ENOENT:
2367 2367 raise
2368 2368 di = 0
2369 2369
2370 2370 return (dd, di)
2371 2371
2372 2372 def files(self):
2373 2373 res = [self.indexfile]
2374 2374 if not self._inline:
2375 2375 res.append(self.datafile)
2376 2376 return res
2377 2377
2378 2378 DELTAREUSEALWAYS = 'always'
2379 2379 DELTAREUSESAMEREVS = 'samerevs'
2380 2380 DELTAREUSENEVER = 'never'
2381 2381
2382 2382 DELTAREUSEFULLADD = 'fulladd'
2383 2383
2384 2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385 2385
2386 2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2388 2388 """Copy this revlog to another, possibly with format changes.
2389 2389
2390 2390 The destination revlog will contain the same revisions and nodes.
2391 2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 2392 differences.
2393 2393
2394 2394 The ``deltareuse`` argument control how deltas from the existing revlog
2395 2395 are preserved in the destination revlog. The argument can have the
2396 2396 following values:
2397 2397
2398 2398 DELTAREUSEALWAYS
2399 2399 Deltas will always be reused (if possible), even if the destination
2400 2400 revlog would not select the same revisions for the delta. This is the
2401 2401 fastest mode of operation.
2402 2402 DELTAREUSESAMEREVS
2403 2403 Deltas will be reused if the destination revlog would pick the same
2404 2404 revisions for the delta. This mode strikes a balance between speed
2405 2405 and optimization.
2406 2406 DELTAREUSENEVER
2407 2407 Deltas will never be reused. This is the slowest mode of execution.
2408 2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 2409 algorithm changes).
2410 2410
2411 2411 Delta computation can be slow, so the choice of delta reuse policy can
2412 2412 significantly affect run time.
2413 2413
2414 2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 2415 two extremes. Deltas will be reused if they are appropriate. But if the
2416 2416 delta could choose a better revision, it will do so. This means if you
2417 2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 2418 deltas will be recomputed if the delta's parent isn't a parent of the
2419 2419 revision.
2420 2420
2421 2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2422 2422 controls whether to compute deltas against both parents for merges.
2423 2423 By default, the current default is used.
2424 2424 """
2425 2425 if deltareuse not in self.DELTAREUSEALL:
2426 2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427 2427
2428 2428 if len(destrevlog):
2429 2429 raise ValueError(_('destination revlog is not empty'))
2430 2430
2431 2431 if getattr(self, 'filteredrevs', None):
2432 2432 raise ValueError(_('source revlog has filtered revisions'))
2433 2433 if getattr(destrevlog, 'filteredrevs', None):
2434 2434 raise ValueError(_('destination revlog has filtered revisions'))
2435 2435
2436 2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2437 2437 oldlazydeltabase = destrevlog._lazydeltabase
2438 2438 oldamd = destrevlog._aggressivemergedeltas
2439 2439
2440 2440 try:
2441 2441 if deltareuse == self.DELTAREUSEALWAYS:
2442 2442 destrevlog._lazydeltabase = True
2443 2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2444 2444 destrevlog._lazydeltabase = False
2445 2445
2446 2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2447 2447
2448 2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2449 2449 self.DELTAREUSESAMEREVS)
2450 2450
2451 2451 deltacomputer = _deltacomputer(destrevlog)
2452 2452 index = self.index
2453 2453 for rev in self:
2454 2454 entry = index[rev]
2455 2455
2456 2456 # Some classes override linkrev to take filtered revs into
2457 2457 # account. Use raw entry from index.
2458 2458 flags = entry[0] & 0xffff
2459 2459 linkrev = entry[4]
2460 2460 p1 = index[entry[5]][7]
2461 2461 p2 = index[entry[6]][7]
2462 2462 node = entry[7]
2463 2463
2464 2464 # (Possibly) reuse the delta from the revlog if allowed and
2465 2465 # the revlog chunk is a delta.
2466 2466 cachedelta = None
2467 2467 rawtext = None
2468 2468 if populatecachedelta:
2469 2469 dp = self.deltaparent(rev)
2470 2470 if dp != nullrev:
2471 cachedelta = (dp, str(self._chunk(rev)))
2471 cachedelta = (dp, bytes(self._chunk(rev)))
2472 2472
2473 2473 if not cachedelta:
2474 2474 rawtext = self.revision(rev, raw=True)
2475 2475
2476 2476
2477 2477 if deltareuse == self.DELTAREUSEFULLADD:
2478 2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 2479 cachedelta=cachedelta,
2480 2480 node=node, flags=flags,
2481 2481 deltacomputer=deltacomputer)
2482 2482 else:
2483 2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 2484 checkambig=False)
2485 2485 dfh = None
2486 2486 if not destrevlog._inline:
2487 2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 2488 try:
2489 2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 2490 p2, flags, cachedelta, ifh, dfh,
2491 2491 deltacomputer=deltacomputer)
2492 2492 finally:
2493 2493 if dfh:
2494 2494 dfh.close()
2495 2495 ifh.close()
2496 2496
2497 2497 if addrevisioncb:
2498 2498 addrevisioncb(self, rev, node)
2499 2499 finally:
2500 2500 destrevlog._lazydeltabase = oldlazydeltabase
2501 2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now