##// END OF EJS Templates
revlog: use the method to check if the revlog is being written to...
Raphaël Gomès -
r52751:2fd44b3d default
parent child Browse files
Show More
@@ -1,4126 +1,4126
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import functools
20 20 import io
21 21 import os
22 22 import struct
23 23 import typing
24 24 import weakref
25 25 import zlib
26 26
27 27 from typing import (
28 28 Iterable,
29 29 Iterator,
30 30 Optional,
31 31 Tuple,
32 32 )
33 33
34 34 # import stuff from node for others to import from revlog
35 35 from .node import (
36 36 bin,
37 37 hex,
38 38 nullrev,
39 39 sha1nodeconstants,
40 40 short,
41 41 wdirrev,
42 42 )
43 43 from .i18n import _
44 44 from .revlogutils.constants import (
45 45 ALL_KINDS,
46 46 CHANGELOGV2,
47 47 COMP_MODE_DEFAULT,
48 48 COMP_MODE_INLINE,
49 49 COMP_MODE_PLAIN,
50 50 DELTA_BASE_REUSE_NO,
51 51 DELTA_BASE_REUSE_TRY,
52 52 ENTRY_RANK,
53 53 FEATURES_BY_VERSION,
54 54 FLAG_GENERALDELTA,
55 55 FLAG_INLINE_DATA,
56 56 INDEX_HEADER,
57 57 KIND_CHANGELOG,
58 58 KIND_FILELOG,
59 59 RANK_UNKNOWN,
60 60 REVLOGV0,
61 61 REVLOGV1,
62 62 REVLOGV1_FLAGS,
63 63 REVLOGV2,
64 64 REVLOGV2_FLAGS,
65 65 REVLOG_DEFAULT_FLAGS,
66 66 REVLOG_DEFAULT_FORMAT,
67 67 REVLOG_DEFAULT_VERSION,
68 68 SUPPORTED_FLAGS,
69 69 )
70 70 from .revlogutils.flagutil import (
71 71 REVIDX_DEFAULT_FLAGS,
72 72 REVIDX_ELLIPSIS,
73 73 REVIDX_EXTSTORED,
74 74 REVIDX_FLAGS_ORDER,
75 75 REVIDX_HASCOPIESINFO,
76 76 REVIDX_ISCENSORED,
77 77 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 78 )
79 79 from .thirdparty import attr
80 80
81 81 # Force pytype to use the non-vendored package
82 82 if typing.TYPE_CHECKING:
83 83 # noinspection PyPackageRequirements
84 84 import attr
85 85
86 86 from . import (
87 87 ancestor,
88 88 dagop,
89 89 error,
90 90 mdiff,
91 91 policy,
92 92 pycompat,
93 93 revlogutils,
94 94 templatefilters,
95 95 util,
96 96 vfs as vfsmod,
97 97 )
98 98 from .interfaces import (
99 99 repository,
100 100 util as interfaceutil,
101 101 )
102 102 from .revlogutils import (
103 103 deltas as deltautil,
104 104 docket as docketutil,
105 105 flagutil,
106 106 nodemap as nodemaputil,
107 107 randomaccessfile,
108 108 revlogv0,
109 109 rewrite,
110 110 sidedata as sidedatautil,
111 111 )
112 112 from .utils import (
113 113 storageutil,
114 114 stringutil,
115 115 )
116 116
117 117 # blanked usage of all the name to prevent pyflakes constraints
118 118 # We need these name available in the module for extensions.
119 119
120 120 REVLOGV0
121 121 REVLOGV1
122 122 REVLOGV2
123 123 CHANGELOGV2
124 124 FLAG_INLINE_DATA
125 125 FLAG_GENERALDELTA
126 126 REVLOG_DEFAULT_FLAGS
127 127 REVLOG_DEFAULT_FORMAT
128 128 REVLOG_DEFAULT_VERSION
129 129 REVLOGV1_FLAGS
130 130 REVLOGV2_FLAGS
131 131 REVIDX_ISCENSORED
132 132 REVIDX_ELLIPSIS
133 133 REVIDX_HASCOPIESINFO
134 134 REVIDX_EXTSTORED
135 135 REVIDX_DEFAULT_FLAGS
136 136 REVIDX_FLAGS_ORDER
137 137 REVIDX_RAWTEXT_CHANGING_FLAGS
138 138
139 139 parsers = policy.importmod('parsers')
140 140 rustancestor = policy.importrust('ancestor')
141 141 rustdagop = policy.importrust('dagop')
142 142 rustrevlog = policy.importrust('revlog')
143 143
144 144 # Aliased for performance.
145 145 _zlibdecompress = zlib.decompress
146 146
147 147 # max size of inline data embedded into a revlog
148 148 _maxinline = 131072
149 149
150 150
151 151 # Flag processors for REVIDX_ELLIPSIS.
152 152 def ellipsisreadprocessor(rl, text):
153 153 return text, False
154 154
155 155
156 156 def ellipsiswriteprocessor(rl, text):
157 157 return text, False
158 158
159 159
160 160 def ellipsisrawprocessor(rl, text):
161 161 return False
162 162
163 163
164 164 ellipsisprocessor = (
165 165 ellipsisreadprocessor,
166 166 ellipsiswriteprocessor,
167 167 ellipsisrawprocessor,
168 168 )
169 169
170 170
171 171 def _verify_revision(rl, skipflags, state, node):
172 172 """Verify the integrity of the given revlog ``node`` while providing a hook
173 173 point for extensions to influence the operation."""
174 174 if skipflags:
175 175 state[b'skipread'].add(node)
176 176 else:
177 177 # Side-effect: read content and verify hash.
178 178 rl.revision(node)
179 179
180 180
181 181 # True if a fast implementation for persistent-nodemap is available
182 182 #
183 183 # We also consider we have a "fast" implementation in "pure" python because
184 184 # people using pure don't really have performance consideration (and a
185 185 # wheelbarrow of other slowness source)
186 186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 187 parsers, 'BaseIndexObject'
188 188 )
189 189
190 190
191 191 @attr.s(slots=True)
192 192 class RevLogRevisionDelta:
193 193 node = attr.ib()
194 194 p1node = attr.ib()
195 195 p2node = attr.ib()
196 196 basenode = attr.ib()
197 197 flags = attr.ib()
198 198 baserevisionsize = attr.ib()
199 199 revision = attr.ib()
200 200 delta = attr.ib()
201 201 sidedata = attr.ib()
202 202 protocol_flags = attr.ib()
203 203 linknode = attr.ib(default=None)
204 204
205 205
206 206 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
207 207 RevLogRevisionDelta
208 208 )
209 209
210 210 if typing.TYPE_CHECKING:
211 211 revlogrevisiondelta = RevLogRevisionDelta
212 212
213 213
214 214 @attr.s(frozen=True)
215 215 class RevLogProblem:
216 216 warning = attr.ib(default=None, type=Optional[bytes])
217 217 error = attr.ib(default=None, type=Optional[bytes])
218 218 node = attr.ib(default=None, type=Optional[bytes])
219 219
220 220
221 221 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
222 222 RevLogProblem
223 223 )
224 224
225 225 if typing.TYPE_CHECKING:
226 226 revlogproblem = RevLogProblem
227 227
228 228
229 229 def parse_index_v1(data, inline):
230 230 # call the C implementation to parse the index data
231 231 index, cache = parsers.parse_index2(data, inline)
232 232 return index, cache
233 233
234 234
235 235 def parse_index_v2(data, inline):
236 236 # call the C implementation to parse the index data
237 237 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
238 238 return index, cache
239 239
240 240
241 241 def parse_index_cl_v2(data, inline):
242 242 # call the C implementation to parse the index data
243 243 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
244 244 return index, cache
245 245
246 246
247 247 if hasattr(parsers, 'parse_index_devel_nodemap'):
248 248
249 249 def parse_index_v1_nodemap(data, inline):
250 250 index, cache = parsers.parse_index_devel_nodemap(data, inline)
251 251 return index, cache
252 252
253 253 else:
254 254 parse_index_v1_nodemap = None
255 255
256 256
257 257 def parse_index_v1_rust(data, inline, default_header):
258 258 cache = (0, data) if inline else None
259 259 return rustrevlog.Index(data, default_header), cache
260 260
261 261
262 262 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
263 263 # signed integer)
264 264 _maxentrysize = 0x7FFFFFFF
265 265
266 266 FILE_TOO_SHORT_MSG = _(
267 267 b'cannot read from revlog %s;'
268 268 b' expected %d bytes from offset %d, data size is %d'
269 269 )
270 270
271 271 hexdigits = b'0123456789abcdefABCDEF'
272 272
273 273
274 274 class _Config:
275 275 def copy(self):
276 276 return self.__class__(**self.__dict__)
277 277
278 278
279 279 @attr.s()
280 280 class FeatureConfig(_Config):
281 281 """Hold configuration values about the available revlog features"""
282 282
283 283 # the default compression engine
284 284 compression_engine = attr.ib(default=b'zlib')
285 285 # compression engines options
286 286 compression_engine_options = attr.ib(default=attr.Factory(dict))
287 287
288 288 # can we use censor on this revlog
289 289 censorable = attr.ib(default=False)
290 290 # does this revlog use the "side data" feature
291 291 has_side_data = attr.ib(default=False)
292 292 # might remove rank configuration once the computation has no impact
293 293 compute_rank = attr.ib(default=False)
294 294 # parent order is supposed to be semantically irrelevant, so we
295 295 # normally resort parents to ensure that the first parent is non-null,
296 296 # if there is a non-null parent at all.
297 297 # filelog abuses the parent order as flag to mark some instances of
298 298 # meta-encoded files, so allow it to disable this behavior.
299 299 canonical_parent_order = attr.ib(default=False)
300 300 # can ellipsis commit be used
301 301 enable_ellipsis = attr.ib(default=False)
302 302
303 303 def copy(self):
304 304 new = super().copy()
305 305 new.compression_engine_options = self.compression_engine_options.copy()
306 306 return new
307 307
308 308
309 309 @attr.s()
310 310 class DataConfig(_Config):
311 311 """Hold configuration value about how the revlog data are read"""
312 312
313 313 # should we try to open the "pending" version of the revlog
314 314 try_pending = attr.ib(default=False)
315 315 # should we try to open the "splitted" version of the revlog
316 316 try_split = attr.ib(default=False)
317 317 # When True, indexfile should be opened with checkambig=True at writing,
318 318 # to avoid file stat ambiguity.
319 319 check_ambig = attr.ib(default=False)
320 320
321 321 # If true, use mmap instead of reading to deal with large index
322 322 mmap_large_index = attr.ib(default=False)
323 323 # how much data is large
324 324 mmap_index_threshold = attr.ib(default=None)
325 325 # How much data to read and cache into the raw revlog data cache.
326 326 chunk_cache_size = attr.ib(default=65536)
327 327
328 328 # The size of the uncompressed cache compared to the largest revision seen.
329 329 uncompressed_cache_factor = attr.ib(default=None)
330 330
331 331 # The number of chunk cached
332 332 uncompressed_cache_count = attr.ib(default=None)
333 333
334 334 # Allow sparse reading of the revlog data
335 335 with_sparse_read = attr.ib(default=False)
336 336 # minimal density of a sparse read chunk
337 337 sr_density_threshold = attr.ib(default=0.50)
338 338 # minimal size of data we skip when performing sparse read
339 339 sr_min_gap_size = attr.ib(default=262144)
340 340
341 341 # are delta encoded against arbitrary bases.
342 342 generaldelta = attr.ib(default=False)
343 343
344 344
345 345 @attr.s()
346 346 class DeltaConfig(_Config):
347 347 """Hold configuration value about how new delta are computed
348 348
349 349 Some attributes are duplicated from DataConfig to help havign each object
350 350 self contained.
351 351 """
352 352
353 353 # can delta be encoded against arbitrary bases.
354 354 general_delta = attr.ib(default=False)
355 355 # Allow sparse writing of the revlog data
356 356 sparse_revlog = attr.ib(default=False)
357 357 # maximum length of a delta chain
358 358 max_chain_len = attr.ib(default=None)
359 359 # Maximum distance between delta chain base start and end
360 360 max_deltachain_span = attr.ib(default=-1)
361 361 # If `upper_bound_comp` is not None, this is the expected maximal gain from
362 362 # compression for the data content.
363 363 upper_bound_comp = attr.ib(default=None)
364 364 # Should we try a delta against both parent
365 365 delta_both_parents = attr.ib(default=True)
366 366 # Test delta base candidate group by chunk of this maximal size.
367 367 candidate_group_chunk_size = attr.ib(default=0)
368 368 # Should we display debug information about delta computation
369 369 debug_delta = attr.ib(default=False)
370 370 # trust incoming delta by default
371 371 lazy_delta = attr.ib(default=True)
372 372 # trust the base of incoming delta by default
373 373 lazy_delta_base = attr.ib(default=False)
374 374
375 375
376 376 class _InnerRevlog:
377 377 """An inner layer of the revlog object
378 378
379 379 That layer exist to be able to delegate some operation to Rust, its
380 380 boundaries are arbitrary and based on what we can delegate to Rust.
381 381 """
382 382
383 383 opener: vfsmod.vfs
384 384
385 385 def __init__(
386 386 self,
387 387 opener: vfsmod.vfs,
388 388 index,
389 389 index_file,
390 390 data_file,
391 391 sidedata_file,
392 392 inline,
393 393 data_config,
394 394 delta_config,
395 395 feature_config,
396 396 chunk_cache,
397 397 default_compression_header,
398 398 ):
399 399 self.opener = opener
400 400 self.index = index
401 401
402 402 self.index_file = index_file
403 403 self.data_file = data_file
404 404 self.sidedata_file = sidedata_file
405 405 self.inline = inline
406 406 self.data_config = data_config
407 407 self.delta_config = delta_config
408 408 self.feature_config = feature_config
409 409
410 410 # used during diverted write.
411 411 self._orig_index_file = None
412 412
413 413 self._default_compression_header = default_compression_header
414 414
415 415 # index
416 416
417 417 # 3-tuple of file handles being used for active writing.
418 418 self._writinghandles = None
419 419
420 420 self._segmentfile = randomaccessfile.randomaccessfile(
421 421 self.opener,
422 422 (self.index_file if self.inline else self.data_file),
423 423 self.data_config.chunk_cache_size,
424 424 chunk_cache,
425 425 )
426 426 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
427 427 self.opener,
428 428 self.sidedata_file,
429 429 self.data_config.chunk_cache_size,
430 430 )
431 431
432 432 # revlog header -> revlog compressor
433 433 self._decompressors = {}
434 434 # 3-tuple of (node, rev, text) for a raw revision.
435 435 self._revisioncache = None
436 436
437 437 # cache some uncompressed chunks
438 438 # rev → uncompressed_chunk
439 439 #
440 440 # the max cost is dynamically updated to be proportionnal to the
441 441 # size of revision we actually encounter.
442 442 self._uncompressed_chunk_cache = None
443 443 if self.data_config.uncompressed_cache_factor is not None:
444 444 self._uncompressed_chunk_cache = util.lrucachedict(
445 445 self.data_config.uncompressed_cache_count,
446 446 maxcost=65536, # some arbitrary initial value
447 447 )
448 448
449 449 self._delay_buffer = None
450 450
451 451 def __len__(self):
452 452 return len(self.index)
453 453
454 454 def clear_cache(self):
455 455 assert not self.is_delaying
456 456 self._revisioncache = None
457 457 if self._uncompressed_chunk_cache is not None:
458 458 self._uncompressed_chunk_cache.clear()
459 459 self._segmentfile.clear_cache()
460 460 self._segmentfile_sidedata.clear_cache()
461 461
462 462 @property
463 463 def canonical_index_file(self):
464 464 if self._orig_index_file is not None:
465 465 return self._orig_index_file
466 466 return self.index_file
467 467
468 468 @property
469 469 def is_delaying(self):
470 470 """is the revlog is currently delaying the visibility of written data?
471 471
472 472 The delaying mechanism can be either in-memory or written on disk in a
473 473 side-file."""
474 474 return (self._delay_buffer is not None) or (
475 475 self._orig_index_file is not None
476 476 )
477 477
478 478 # Derived from index values.
479 479
480 480 def start(self, rev):
481 481 """the offset of the data chunk for this revision"""
482 482 return int(self.index[rev][0] >> 16)
483 483
484 484 def length(self, rev):
485 485 """the length of the data chunk for this revision"""
486 486 return self.index[rev][1]
487 487
488 488 def end(self, rev):
489 489 """the end of the data chunk for this revision"""
490 490 return self.start(rev) + self.length(rev)
491 491
492 492 def deltaparent(self, rev):
493 493 """return deltaparent of the given revision"""
494 494 base = self.index[rev][3]
495 495 if base == rev:
496 496 return nullrev
497 497 elif self.delta_config.general_delta:
498 498 return base
499 499 else:
500 500 return rev - 1
501 501
502 502 def issnapshot(self, rev):
503 503 """tells whether rev is a snapshot"""
504 504 if not self.delta_config.sparse_revlog:
505 505 return self.deltaparent(rev) == nullrev
506 506 elif hasattr(self.index, 'issnapshot'):
507 507 # directly assign the method to cache the testing and access
508 508 self.issnapshot = self.index.issnapshot
509 509 return self.issnapshot(rev)
510 510 if rev == nullrev:
511 511 return True
512 512 entry = self.index[rev]
513 513 base = entry[3]
514 514 if base == rev:
515 515 return True
516 516 if base == nullrev:
517 517 return True
518 518 p1 = entry[5]
519 519 while self.length(p1) == 0:
520 520 b = self.deltaparent(p1)
521 521 if b == p1:
522 522 break
523 523 p1 = b
524 524 p2 = entry[6]
525 525 while self.length(p2) == 0:
526 526 b = self.deltaparent(p2)
527 527 if b == p2:
528 528 break
529 529 p2 = b
530 530 if base == p1 or base == p2:
531 531 return False
532 532 return self.issnapshot(base)
533 533
534 534 def _deltachain(self, rev, stoprev=None):
535 535 """Obtain the delta chain for a revision.
536 536
537 537 ``stoprev`` specifies a revision to stop at. If not specified, we
538 538 stop at the base of the chain.
539 539
540 540 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
541 541 revs in ascending order and ``stopped`` is a bool indicating whether
542 542 ``stoprev`` was hit.
543 543 """
544 544 generaldelta = self.delta_config.general_delta
545 545 # Try C implementation.
546 546 try:
547 547 return self.index.deltachain(rev, stoprev, generaldelta)
548 548 except AttributeError:
549 549 pass
550 550
551 551 chain = []
552 552
553 553 # Alias to prevent attribute lookup in tight loop.
554 554 index = self.index
555 555
556 556 iterrev = rev
557 557 e = index[iterrev]
558 558 while iterrev != e[3] and iterrev != stoprev:
559 559 chain.append(iterrev)
560 560 if generaldelta:
561 561 iterrev = e[3]
562 562 else:
563 563 iterrev -= 1
564 564 e = index[iterrev]
565 565
566 566 if iterrev == stoprev:
567 567 stopped = True
568 568 else:
569 569 chain.append(iterrev)
570 570 stopped = False
571 571
572 572 chain.reverse()
573 573 return chain, stopped
574 574
575 575 @util.propertycache
576 576 def _compressor(self):
577 577 engine = util.compengines[self.feature_config.compression_engine]
578 578 return engine.revlogcompressor(
579 579 self.feature_config.compression_engine_options
580 580 )
581 581
582 582 @util.propertycache
583 583 def _decompressor(self):
584 584 """the default decompressor"""
585 585 if self._default_compression_header is None:
586 586 return None
587 587 t = self._default_compression_header
588 588 c = self._get_decompressor(t)
589 589 return c.decompress
590 590
591 591 def _get_decompressor(self, t: bytes):
592 592 try:
593 593 compressor = self._decompressors[t]
594 594 except KeyError:
595 595 try:
596 596 engine = util.compengines.forrevlogheader(t)
597 597 compressor = engine.revlogcompressor(
598 598 self.feature_config.compression_engine_options
599 599 )
600 600 self._decompressors[t] = compressor
601 601 except KeyError:
602 602 raise error.RevlogError(
603 603 _(b'unknown compression type %s') % binascii.hexlify(t)
604 604 )
605 605 return compressor
606 606
607 607 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
608 608 """Generate a possibly-compressed representation of data."""
609 609 if not data:
610 610 return b'', data
611 611
612 612 compressed = self._compressor.compress(data)
613 613
614 614 if compressed:
615 615 # The revlog compressor added the header in the returned data.
616 616 return b'', compressed
617 617
618 618 if data[0:1] == b'\0':
619 619 return b'', data
620 620 return b'u', data
621 621
622 622 def decompress(self, data: bytes):
623 623 """Decompress a revlog chunk.
624 624
625 625 The chunk is expected to begin with a header identifying the
626 626 format type so it can be routed to an appropriate decompressor.
627 627 """
628 628 if not data:
629 629 return data
630 630
631 631 # Revlogs are read much more frequently than they are written and many
632 632 # chunks only take microseconds to decompress, so performance is
633 633 # important here.
634 634 #
635 635 # We can make a few assumptions about revlogs:
636 636 #
637 637 # 1) the majority of chunks will be compressed (as opposed to inline
638 638 # raw data).
639 639 # 2) decompressing *any* data will likely by at least 10x slower than
640 640 # returning raw inline data.
641 641 # 3) we want to prioritize common and officially supported compression
642 642 # engines
643 643 #
644 644 # It follows that we want to optimize for "decompress compressed data
645 645 # when encoded with common and officially supported compression engines"
646 646 # case over "raw data" and "data encoded by less common or non-official
647 647 # compression engines." That is why we have the inline lookup first
648 648 # followed by the compengines lookup.
649 649 #
650 650 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
651 651 # compressed chunks. And this matters for changelog and manifest reads.
652 652 t = data[0:1]
653 653
654 654 if t == b'x':
655 655 try:
656 656 return _zlibdecompress(data)
657 657 except zlib.error as e:
658 658 raise error.RevlogError(
659 659 _(b'revlog decompress error: %s')
660 660 % stringutil.forcebytestr(e)
661 661 )
662 662 # '\0' is more common than 'u' so it goes first.
663 663 elif t == b'\0':
664 664 return data
665 665 elif t == b'u':
666 666 return util.buffer(data, 1)
667 667
668 668 compressor = self._get_decompressor(t)
669 669
670 670 return compressor.decompress(data)
671 671
672 672 @contextlib.contextmanager
673 673 def reading(self):
674 674 """Context manager that keeps data and sidedata files open for reading"""
675 675 if len(self.index) == 0:
676 676 yield # nothing to be read
677 677 elif self._delay_buffer is not None and self.inline:
678 678 msg = "revlog with delayed write should not be inline"
679 679 raise error.ProgrammingError(msg)
680 680 else:
681 681 with self._segmentfile.reading():
682 682 with self._segmentfile_sidedata.reading():
683 683 yield
684 684
685 685 @property
686 686 def is_writing(self):
687 687 """True is a writing context is open"""
688 688 return self._writinghandles is not None
689 689
690 690 @property
691 691 def is_open(self):
692 692 """True if any file handle is being held
693 693
694 694 Used for assert and debug in the python code"""
695 695 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
696 696
697 697 @contextlib.contextmanager
698 698 def writing(self, transaction, data_end=None, sidedata_end=None):
699 699 """Open the revlog files for writing
700 700
701 701 Add content to a revlog should be done within such context.
702 702 """
703 703 if self.is_writing:
704 704 yield
705 705 else:
706 706 ifh = dfh = sdfh = None
707 707 try:
708 708 r = len(self.index)
709 709 # opening the data file.
710 710 dsize = 0
711 711 if r:
712 712 dsize = self.end(r - 1)
713 713 dfh = None
714 714 if not self.inline:
715 715 try:
716 716 dfh = self.opener(self.data_file, mode=b"r+")
717 717 if data_end is None:
718 718 dfh.seek(0, os.SEEK_END)
719 719 else:
720 720 dfh.seek(data_end, os.SEEK_SET)
721 721 except FileNotFoundError:
722 722 dfh = self.opener(self.data_file, mode=b"w+")
723 723 transaction.add(self.data_file, dsize)
724 724 if self.sidedata_file is not None:
725 725 assert sidedata_end is not None
726 726 # revlog-v2 does not inline, help Pytype
727 727 assert dfh is not None
728 728 try:
729 729 sdfh = self.opener(self.sidedata_file, mode=b"r+")
730 730 dfh.seek(sidedata_end, os.SEEK_SET)
731 731 except FileNotFoundError:
732 732 sdfh = self.opener(self.sidedata_file, mode=b"w+")
733 733 transaction.add(self.sidedata_file, sidedata_end)
734 734
735 735 # opening the index file.
736 736 isize = r * self.index.entry_size
737 737 ifh = self.__index_write_fp()
738 738 if self.inline:
739 739 transaction.add(self.index_file, dsize + isize)
740 740 else:
741 741 transaction.add(self.index_file, isize)
742 742 # exposing all file handle for writing.
743 743 self._writinghandles = (ifh, dfh, sdfh)
744 744 self._segmentfile.writing_handle = ifh if self.inline else dfh
745 745 self._segmentfile_sidedata.writing_handle = sdfh
746 746 yield
747 747 finally:
748 748 self._writinghandles = None
749 749 self._segmentfile.writing_handle = None
750 750 self._segmentfile_sidedata.writing_handle = None
751 751 if dfh is not None:
752 752 dfh.close()
753 753 if sdfh is not None:
754 754 sdfh.close()
755 755 # closing the index file last to avoid exposing referent to
756 756 # potential unflushed data content.
757 757 if ifh is not None:
758 758 ifh.close()
759 759
760 760 def __index_write_fp(self, index_end=None):
761 761 """internal method to open the index file for writing
762 762
763 763 You should not use this directly and use `_writing` instead
764 764 """
765 765 try:
766 766 if self._delay_buffer is None:
767 767 f = self.opener(
768 768 self.index_file,
769 769 mode=b"r+",
770 770 checkambig=self.data_config.check_ambig,
771 771 )
772 772 else:
773 773 # check_ambig affect we way we open file for writing, however
774 774 # here, we do not actually open a file for writting as write
775 775 # will appened to a delay_buffer. So check_ambig is not
776 776 # meaningful and unneeded here.
777 777 f = randomaccessfile.appender(
778 778 self.opener, self.index_file, b"r+", self._delay_buffer
779 779 )
780 780 if index_end is None:
781 781 f.seek(0, os.SEEK_END)
782 782 else:
783 783 f.seek(index_end, os.SEEK_SET)
784 784 return f
785 785 except FileNotFoundError:
786 786 if self._delay_buffer is None:
787 787 return self.opener(
788 788 self.index_file,
789 789 mode=b"w+",
790 790 checkambig=self.data_config.check_ambig,
791 791 )
792 792 else:
793 793 return randomaccessfile.appender(
794 794 self.opener, self.index_file, b"w+", self._delay_buffer
795 795 )
796 796
797 797 def __index_new_fp(self):
798 798 """internal method to create a new index file for writing
799 799
800 800 You should not use this unless you are upgrading from inline revlog
801 801 """
802 802 return self.opener(
803 803 self.index_file,
804 804 mode=b"w",
805 805 checkambig=self.data_config.check_ambig,
806 806 )
807 807
808 808 def split_inline(self, tr, header, new_index_file_path=None):
809 809 """split the data of an inline revlog into an index and a data file"""
810 810 assert self._delay_buffer is None
811 811 existing_handles = False
812 812 if self._writinghandles is not None:
813 813 existing_handles = True
814 814 fp = self._writinghandles[0]
815 815 fp.flush()
816 816 fp.close()
817 817 # We can't use the cached file handle after close(). So prevent
818 818 # its usage.
819 819 self._writinghandles = None
820 820 self._segmentfile.writing_handle = None
821 821 # No need to deal with sidedata writing handle as it is only
822 822 # relevant with revlog-v2 which is never inline, not reaching
823 823 # this code
824 824
825 825 new_dfh = self.opener(self.data_file, mode=b"w+")
826 826 new_dfh.truncate(0) # drop any potentially existing data
827 827 try:
828 828 with self.reading():
829 829 for r in range(len(self.index)):
830 830 new_dfh.write(self.get_segment_for_revs(r, r)[1])
831 831 new_dfh.flush()
832 832
833 833 if new_index_file_path is not None:
834 834 self.index_file = new_index_file_path
835 835 with self.__index_new_fp() as fp:
836 836 self.inline = False
837 837 for i in range(len(self.index)):
838 838 e = self.index.entry_binary(i)
839 839 if i == 0:
840 840 packed_header = self.index.pack_header(header)
841 841 e = packed_header + e
842 842 fp.write(e)
843 843
844 844 # If we don't use side-write, the temp file replace the real
845 845 # index when we exit the context manager
846 846
847 847 self._segmentfile = randomaccessfile.randomaccessfile(
848 848 self.opener,
849 849 self.data_file,
850 850 self.data_config.chunk_cache_size,
851 851 )
852 852
853 853 if existing_handles:
854 854 # switched from inline to conventional reopen the index
855 855 ifh = self.__index_write_fp()
856 856 self._writinghandles = (ifh, new_dfh, None)
857 857 self._segmentfile.writing_handle = new_dfh
858 858 new_dfh = None
859 859 # No need to deal with sidedata writing handle as it is only
860 860 # relevant with revlog-v2 which is never inline, not reaching
861 861 # this code
862 862 finally:
863 863 if new_dfh is not None:
864 864 new_dfh.close()
865 865 return self.index_file
866 866
867 867 def get_segment_for_revs(self, startrev, endrev):
868 868 """Obtain a segment of raw data corresponding to a range of revisions.
869 869
870 870 Accepts the start and end revisions and an optional already-open
871 871 file handle to be used for reading. If the file handle is read, its
872 872 seek position will not be preserved.
873 873
874 874 Requests for data may be satisfied by a cache.
875 875
876 876 Returns a 2-tuple of (offset, data) for the requested range of
877 877 revisions. Offset is the integer offset from the beginning of the
878 878 revlog and data is a str or buffer of the raw byte data.
879 879
880 880 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
881 881 to determine where each revision's data begins and ends.
882 882
883 883 API: we should consider making this a private part of the InnerRevlog
884 884 at some point.
885 885 """
886 886 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
887 887 # (functions are expensive).
888 888 index = self.index
889 889 istart = index[startrev]
890 890 start = int(istart[0] >> 16)
891 891 if startrev == endrev:
892 892 end = start + istart[1]
893 893 else:
894 894 iend = index[endrev]
895 895 end = int(iend[0] >> 16) + iend[1]
896 896
897 897 if self.inline:
898 898 start += (startrev + 1) * self.index.entry_size
899 899 end += (endrev + 1) * self.index.entry_size
900 900 length = end - start
901 901
902 902 return start, self._segmentfile.read_chunk(start, length)
903 903
904 904 def _chunk(self, rev):
905 905 """Obtain a single decompressed chunk for a revision.
906 906
907 907 Accepts an integer revision and an optional already-open file handle
908 908 to be used for reading. If used, the seek position of the file will not
909 909 be preserved.
910 910
911 911 Returns a str holding uncompressed data for the requested revision.
912 912 """
913 913 if self._uncompressed_chunk_cache is not None:
914 914 uncomp = self._uncompressed_chunk_cache.get(rev)
915 915 if uncomp is not None:
916 916 return uncomp
917 917
918 918 compression_mode = self.index[rev][10]
919 919 data = self.get_segment_for_revs(rev, rev)[1]
920 920 if compression_mode == COMP_MODE_PLAIN:
921 921 uncomp = data
922 922 elif compression_mode == COMP_MODE_DEFAULT:
923 923 uncomp = self._decompressor(data)
924 924 elif compression_mode == COMP_MODE_INLINE:
925 925 uncomp = self.decompress(data)
926 926 else:
927 927 msg = b'unknown compression mode %d'
928 928 msg %= compression_mode
929 929 raise error.RevlogError(msg)
930 930 if self._uncompressed_chunk_cache is not None:
931 931 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
932 932 return uncomp
933 933
934 934 def _chunks(self, revs, targetsize=None):
935 935 """Obtain decompressed chunks for the specified revisions.
936 936
937 937 Accepts an iterable of numeric revisions that are assumed to be in
938 938 ascending order.
939 939
940 940 This function is similar to calling ``self._chunk()`` multiple times,
941 941 but is faster.
942 942
943 943 Returns a list with decompressed data for each requested revision.
944 944 """
945 945 if not revs:
946 946 return []
947 947 start = self.start
948 948 length = self.length
949 949 inline = self.inline
950 950 iosize = self.index.entry_size
951 951 buffer = util.buffer
952 952
953 953 fetched_revs = []
954 954 fadd = fetched_revs.append
955 955
956 956 chunks = []
957 957 ladd = chunks.append
958 958
959 959 if self._uncompressed_chunk_cache is None:
960 960 fetched_revs = revs
961 961 else:
962 962 for rev in revs:
963 963 cached_value = self._uncompressed_chunk_cache.get(rev)
964 964 if cached_value is None:
965 965 fadd(rev)
966 966 else:
967 967 ladd((rev, cached_value))
968 968
969 969 if not fetched_revs:
970 970 slicedchunks = ()
971 971 elif not self.data_config.with_sparse_read:
972 972 slicedchunks = (fetched_revs,)
973 973 else:
974 974 slicedchunks = deltautil.slicechunk(
975 975 self,
976 976 fetched_revs,
977 977 targetsize=targetsize,
978 978 )
979 979
980 980 for revschunk in slicedchunks:
981 981 firstrev = revschunk[0]
982 982 # Skip trailing revisions with empty diff
983 983 for lastrev in revschunk[::-1]:
984 984 if length(lastrev) != 0:
985 985 break
986 986
987 987 try:
988 988 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 989 except OverflowError:
990 990 # issue4215 - we can't cache a run of chunks greater than
991 991 # 2G on Windows
992 992 for rev in revschunk:
993 993 ladd((rev, self._chunk(rev)))
994 994
995 995 decomp = self.decompress
996 996 # self._decompressor might be None, but will not be used in that case
997 997 def_decomp = self._decompressor
998 998 for rev in revschunk:
999 999 chunkstart = start(rev)
1000 1000 if inline:
1001 1001 chunkstart += (rev + 1) * iosize
1002 1002 chunklength = length(rev)
1003 1003 comp_mode = self.index[rev][10]
1004 1004 c = buffer(data, chunkstart - offset, chunklength)
1005 1005 if comp_mode == COMP_MODE_PLAIN:
1006 1006 c = c
1007 1007 elif comp_mode == COMP_MODE_INLINE:
1008 1008 c = decomp(c)
1009 1009 elif comp_mode == COMP_MODE_DEFAULT:
1010 1010 c = def_decomp(c)
1011 1011 else:
1012 1012 msg = b'unknown compression mode %d'
1013 1013 msg %= comp_mode
1014 1014 raise error.RevlogError(msg)
1015 1015 ladd((rev, c))
1016 1016 if self._uncompressed_chunk_cache is not None:
1017 1017 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018 1018
1019 1019 chunks.sort()
1020 1020 return [x[1] for x in chunks]
1021 1021
1022 1022 def raw_text(self, node, rev):
1023 1023 """return the possibly unvalidated rawtext for a revision
1024 1024
1025 1025 returns rawtext
1026 1026 """
1027 1027
1028 1028 # revision in the cache (could be useful to apply delta)
1029 1029 cachedrev = None
1030 1030 # An intermediate text to apply deltas to
1031 1031 basetext = None
1032 1032
1033 1033 # Check if we have the entry in cache
1034 1034 # The cache entry looks like (node, rev, rawtext)
1035 1035 if self._revisioncache:
1036 1036 cachedrev = self._revisioncache[1]
1037 1037
1038 1038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 1039 if stopped:
1040 1040 basetext = self._revisioncache[2]
1041 1041
1042 1042 # drop cache to save memory, the caller is expected to
1043 1043 # update self._inner._revisioncache after validating the text
1044 1044 self._revisioncache = None
1045 1045
1046 1046 targetsize = None
1047 1047 rawsize = self.index[rev][2]
1048 1048 if 0 <= rawsize:
1049 1049 targetsize = 4 * rawsize
1050 1050
1051 1051 if self._uncompressed_chunk_cache is not None:
1052 1052 # dynamically update the uncompressed_chunk_cache size to the
1053 1053 # largest revision we saw in this revlog.
1054 1054 factor = self.data_config.uncompressed_cache_factor
1055 1055 candidate_size = rawsize * factor
1056 1056 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 1057 self._uncompressed_chunk_cache.maxcost = candidate_size
1058 1058
1059 1059 bins = self._chunks(chain, targetsize=targetsize)
1060 1060 if basetext is None:
1061 1061 basetext = bytes(bins[0])
1062 1062 bins = bins[1:]
1063 1063
1064 1064 rawtext = mdiff.patches(basetext, bins)
1065 1065 del basetext # let us have a chance to free memory early
1066 1066 return rawtext
1067 1067
1068 1068 def sidedata(self, rev, sidedata_end):
1069 1069 """Return the sidedata for a given revision number."""
1070 1070 index_entry = self.index[rev]
1071 1071 sidedata_offset = index_entry[8]
1072 1072 sidedata_size = index_entry[9]
1073 1073
1074 1074 if self.inline:
1075 1075 sidedata_offset += self.index.entry_size * (1 + rev)
1076 1076 if sidedata_size == 0:
1077 1077 return {}
1078 1078
1079 1079 if sidedata_end < sidedata_offset + sidedata_size:
1080 1080 filename = self.sidedata_file
1081 1081 end = sidedata_end
1082 1082 offset = sidedata_offset
1083 1083 length = sidedata_size
1084 1084 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 1085 raise error.RevlogError(m)
1086 1086
1087 1087 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 1088 sidedata_offset, sidedata_size
1089 1089 )
1090 1090
1091 1091 comp = self.index[rev][11]
1092 1092 if comp == COMP_MODE_PLAIN:
1093 1093 segment = comp_segment
1094 1094 elif comp == COMP_MODE_DEFAULT:
1095 1095 segment = self._decompressor(comp_segment)
1096 1096 elif comp == COMP_MODE_INLINE:
1097 1097 segment = self.decompress(comp_segment)
1098 1098 else:
1099 1099 msg = b'unknown compression mode %d'
1100 1100 msg %= comp
1101 1101 raise error.RevlogError(msg)
1102 1102
1103 1103 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 1104 return sidedata
1105 1105
1106 1106 def write_entry(
1107 1107 self,
1108 1108 transaction,
1109 1109 entry,
1110 1110 data,
1111 1111 link,
1112 1112 offset,
1113 1113 sidedata,
1114 1114 sidedata_offset,
1115 1115 index_end,
1116 1116 data_end,
1117 1117 sidedata_end,
1118 1118 ):
1119 1119 # Files opened in a+ mode have inconsistent behavior on various
1120 1120 # platforms. Windows requires that a file positioning call be made
1121 1121 # when the file handle transitions between reads and writes. See
1122 1122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 1123 # platforms, Python or the platform itself can be buggy. Some versions
1124 1124 # of Solaris have been observed to not append at the end of the file
1125 1125 # if the file was seeked to before the end. See issue4943 for more.
1126 1126 #
1127 1127 # We work around this issue by inserting a seek() before writing.
1128 1128 # Note: This is likely not necessary on Python 3. However, because
1129 1129 # the file handle is reused for reads and may be seeked there, we need
1130 1130 # to be careful before changing this.
1131 1131 if self._writinghandles is None:
1132 1132 msg = b'adding revision outside `revlog._writing` context'
1133 1133 raise error.ProgrammingError(msg)
1134 1134 ifh, dfh, sdfh = self._writinghandles
1135 1135 if index_end is None:
1136 1136 ifh.seek(0, os.SEEK_END)
1137 1137 else:
1138 1138 ifh.seek(index_end, os.SEEK_SET)
1139 1139 if dfh:
1140 1140 if data_end is None:
1141 1141 dfh.seek(0, os.SEEK_END)
1142 1142 else:
1143 1143 dfh.seek(data_end, os.SEEK_SET)
1144 1144 if sdfh:
1145 1145 sdfh.seek(sidedata_end, os.SEEK_SET)
1146 1146
1147 1147 curr = len(self.index) - 1
1148 1148 if not self.inline:
1149 1149 transaction.add(self.data_file, offset)
1150 1150 if self.sidedata_file:
1151 1151 transaction.add(self.sidedata_file, sidedata_offset)
1152 1152 transaction.add(self.canonical_index_file, curr * len(entry))
1153 1153 if data[0]:
1154 1154 dfh.write(data[0])
1155 1155 dfh.write(data[1])
1156 1156 if sidedata:
1157 1157 sdfh.write(sidedata)
1158 1158 if self._delay_buffer is None:
1159 1159 ifh.write(entry)
1160 1160 else:
1161 1161 self._delay_buffer.append(entry)
1162 1162 elif self._delay_buffer is not None:
1163 1163 msg = b'invalid delayed write on inline revlog'
1164 1164 raise error.ProgrammingError(msg)
1165 1165 else:
1166 1166 offset += curr * self.index.entry_size
1167 1167 transaction.add(self.canonical_index_file, offset)
1168 1168 assert not sidedata
1169 1169 ifh.write(entry)
1170 1170 ifh.write(data[0])
1171 1171 ifh.write(data[1])
1172 1172 return (
1173 1173 ifh.tell(),
1174 1174 dfh.tell() if dfh else None,
1175 1175 sdfh.tell() if sdfh else None,
1176 1176 )
1177 1177
1178 1178 def _divert_index(self):
1179 1179 index_file = self.index_file
1180 1180 # when we encounter a legacy inline-changelog, split it. However it is
1181 1181 # important to use the expected filename for pending content
1182 1182 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 1183 # pending transaction.
1184 1184 if index_file.endswith(b'.s'):
1185 1185 index_file = self.index_file[:-2]
1186 1186 return index_file + b'.a'
1187 1187
1188 1188 def delay(self):
1189 1189 assert not self.is_open
1190 1190 if self.inline:
1191 1191 msg = "revlog with delayed write should not be inline"
1192 1192 raise error.ProgrammingError(msg)
1193 1193 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 1194 # delay or divert already in place
1195 1195 return None
1196 1196 elif len(self.index) == 0:
1197 1197 self._orig_index_file = self.index_file
1198 1198 self.index_file = self._divert_index()
1199 1199 assert self._orig_index_file is not None
1200 1200 assert self.index_file is not None
1201 1201 if self.opener.exists(self.index_file):
1202 1202 self.opener.unlink(self.index_file)
1203 1203 return self.index_file
1204 1204 else:
1205 1205 self._delay_buffer = []
1206 1206 return None
1207 1207
1208 1208 def write_pending(self):
1209 1209 assert not self.is_open
1210 1210 if self.inline:
1211 1211 msg = "revlog with delayed write should not be inline"
1212 1212 raise error.ProgrammingError(msg)
1213 1213 if self._orig_index_file is not None:
1214 1214 return None, True
1215 1215 any_pending = False
1216 1216 pending_index_file = self._divert_index()
1217 1217 if self.opener.exists(pending_index_file):
1218 1218 self.opener.unlink(pending_index_file)
1219 1219 util.copyfile(
1220 1220 self.opener.join(self.index_file),
1221 1221 self.opener.join(pending_index_file),
1222 1222 )
1223 1223 if self._delay_buffer:
1224 1224 with self.opener(pending_index_file, b'r+') as ifh:
1225 1225 ifh.seek(0, os.SEEK_END)
1226 1226 ifh.write(b"".join(self._delay_buffer))
1227 1227 any_pending = True
1228 1228 self._delay_buffer = None
1229 1229 self._orig_index_file = self.index_file
1230 1230 self.index_file = pending_index_file
1231 1231 return self.index_file, any_pending
1232 1232
1233 1233 def finalize_pending(self):
1234 1234 assert not self.is_open
1235 1235 if self.inline:
1236 1236 msg = "revlog with delayed write should not be inline"
1237 1237 raise error.ProgrammingError(msg)
1238 1238
1239 1239 delay = self._delay_buffer is not None
1240 1240 divert = self._orig_index_file is not None
1241 1241
1242 1242 if delay and divert:
1243 1243 assert False, "unreachable"
1244 1244 elif delay:
1245 1245 if self._delay_buffer:
1246 1246 with self.opener(self.index_file, b'r+') as ifh:
1247 1247 ifh.seek(0, os.SEEK_END)
1248 1248 ifh.write(b"".join(self._delay_buffer))
1249 1249 self._delay_buffer = None
1250 1250 elif divert:
1251 1251 if self.opener.exists(self.index_file):
1252 1252 self.opener.rename(
1253 1253 self.index_file,
1254 1254 self._orig_index_file,
1255 1255 checkambig=True,
1256 1256 )
1257 1257 self.index_file = self._orig_index_file
1258 1258 self._orig_index_file = None
1259 1259 else:
1260 1260 msg = b"not delay or divert found on this revlog"
1261 1261 raise error.ProgrammingError(msg)
1262 1262 return self.canonical_index_file
1263 1263
1264 1264
1265 1265 class revlog:
1266 1266 """
1267 1267 the underlying revision storage object
1268 1268
1269 1269 A revlog consists of two parts, an index and the revision data.
1270 1270
1271 1271 The index is a file with a fixed record size containing
1272 1272 information on each revision, including its nodeid (hash), the
1273 1273 nodeids of its parents, the position and offset of its data within
1274 1274 the data file, and the revision it's based on. Finally, each entry
1275 1275 contains a linkrev entry that can serve as a pointer to external
1276 1276 data.
1277 1277
1278 1278 The revision data itself is a linear collection of data chunks.
1279 1279 Each chunk represents a revision and is usually represented as a
1280 1280 delta against the previous chunk. To bound lookup time, runs of
1281 1281 deltas are limited to about 2 times the length of the original
1282 1282 version data. This makes retrieval of a version proportional to
1283 1283 its size, or O(1) relative to the number of revisions.
1284 1284
1285 1285 Both pieces of the revlog are written to in an append-only
1286 1286 fashion, which means we never need to rewrite a file to insert or
1287 1287 remove data, and can use some simple techniques to avoid the need
1288 1288 for locking while reading.
1289 1289
1290 1290 If checkambig, indexfile is opened with checkambig=True at
1291 1291 writing, to avoid file stat ambiguity.
1292 1292
1293 1293 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 1294 index will be mmapped rather than read if it is larger than the
1295 1295 configured threshold.
1296 1296
1297 1297 If censorable is True, the revlog can have censored revisions.
1298 1298
1299 1299 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 1300 compression for the data content.
1301 1301
1302 1302 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 1303 file handle, a filename, and an expected position. It should check whether
1304 1304 the current position in the file handle is valid, and log/warn/fail (by
1305 1305 raising).
1306 1306
1307 1307 See mercurial/revlogutils/contants.py for details about the content of an
1308 1308 index entry.
1309 1309 """
1310 1310
1311 1311 _flagserrorclass = error.RevlogError
1312 1312
1313 1313 opener: vfsmod.vfs
1314 1314
1315 1315 @staticmethod
1316 1316 def is_inline_index(header_bytes):
1317 1317 """Determine if a revlog is inline from the initial bytes of the index"""
1318 1318 if len(header_bytes) == 0:
1319 1319 return True
1320 1320
1321 1321 header = INDEX_HEADER.unpack(header_bytes)[0]
1322 1322
1323 1323 _format_flags = header & ~0xFFFF
1324 1324 _format_version = header & 0xFFFF
1325 1325
1326 1326 features = FEATURES_BY_VERSION[_format_version]
1327 1327 return features[b'inline'](_format_flags)
1328 1328
1329 1329 _docket_file: Optional[bytes]
1330 1330
1331 1331 def __init__(
1332 1332 self,
1333 1333 opener: vfsmod.vfs,
1334 1334 target,
1335 1335 radix,
1336 1336 postfix=None, # only exist for `tmpcensored` now
1337 1337 checkambig=False,
1338 1338 mmaplargeindex=False,
1339 1339 censorable=False,
1340 1340 upperboundcomp=None,
1341 1341 persistentnodemap=False,
1342 1342 concurrencychecker=None,
1343 1343 trypending=False,
1344 1344 try_split=False,
1345 1345 canonical_parent_order=True,
1346 1346 data_config=None,
1347 1347 delta_config=None,
1348 1348 feature_config=None,
1349 1349 may_inline=True, # may inline new revlog
1350 1350 ):
1351 1351 """
1352 1352 create a revlog object
1353 1353
1354 1354 opener is a function that abstracts the file opening operation
1355 1355 and can be used to implement COW semantics or the like.
1356 1356
1357 1357 `target`: a (KIND, ID) tuple that identify the content stored in
1358 1358 this revlog. It help the rest of the code to understand what the revlog
1359 1359 is about without having to resort to heuristic and index filename
1360 1360 analysis. Note: that this must be reliably be set by normal code, but
1361 1361 that test, debug, or performance measurement code might not set this to
1362 1362 accurate value.
1363 1363 """
1364 1364
1365 1365 self.radix = radix
1366 1366
1367 1367 self._docket_file = None
1368 1368 self._indexfile = None
1369 1369 self._datafile = None
1370 1370 self._sidedatafile = None
1371 1371 self._nodemap_file = None
1372 1372 self.postfix = postfix
1373 1373 self._trypending = trypending
1374 1374 self._try_split = try_split
1375 1375 self._may_inline = may_inline
1376 1376 self.opener = opener
1377 1377 if persistentnodemap:
1378 1378 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1379 1379
1380 1380 assert target[0] in ALL_KINDS
1381 1381 assert len(target) == 2
1382 1382 self.target = target
1383 1383 if feature_config is not None:
1384 1384 self.feature_config = feature_config.copy()
1385 1385 elif b'feature-config' in self.opener.options:
1386 1386 self.feature_config = self.opener.options[b'feature-config'].copy()
1387 1387 else:
1388 1388 self.feature_config = FeatureConfig()
1389 1389 self.feature_config.censorable = censorable
1390 1390 self.feature_config.canonical_parent_order = canonical_parent_order
1391 1391 if data_config is not None:
1392 1392 self.data_config = data_config.copy()
1393 1393 elif b'data-config' in self.opener.options:
1394 1394 self.data_config = self.opener.options[b'data-config'].copy()
1395 1395 else:
1396 1396 self.data_config = DataConfig()
1397 1397 self.data_config.check_ambig = checkambig
1398 1398 self.data_config.mmap_large_index = mmaplargeindex
1399 1399 if delta_config is not None:
1400 1400 self.delta_config = delta_config.copy()
1401 1401 elif b'delta-config' in self.opener.options:
1402 1402 self.delta_config = self.opener.options[b'delta-config'].copy()
1403 1403 else:
1404 1404 self.delta_config = DeltaConfig()
1405 1405 self.delta_config.upper_bound_comp = upperboundcomp
1406 1406
1407 1407 # Maps rev to chain base rev.
1408 1408 self._chainbasecache = util.lrucachedict(100)
1409 1409
1410 1410 self.index = None
1411 1411 self._docket = None
1412 1412 self._nodemap_docket = None
1413 1413 # Mapping of partial identifiers to full nodes.
1414 1414 self._pcache = {}
1415 1415
1416 1416 # other optionnals features
1417 1417
1418 1418 # Make copy of flag processors so each revlog instance can support
1419 1419 # custom flags.
1420 1420 self._flagprocessors = dict(flagutil.flagprocessors)
1421 1421 # prevent nesting of addgroup
1422 1422 self._adding_group = None
1423 1423
1424 1424 chunk_cache = self._loadindex()
1425 1425 self._load_inner(chunk_cache)
1426 1426 self._concurrencychecker = concurrencychecker
1427 1427
1428 1428 def _init_opts(self):
1429 1429 """process options (from above/config) to setup associated default revlog mode
1430 1430
1431 1431 These values might be affected when actually reading on disk information.
1432 1432
1433 1433 The relevant values are returned for use in _loadindex().
1434 1434
1435 1435 * newversionflags:
1436 1436 version header to use if we need to create a new revlog
1437 1437
1438 1438 * mmapindexthreshold:
1439 1439 minimal index size for start to use mmap
1440 1440
1441 1441 * force_nodemap:
1442 1442 force the usage of a "development" version of the nodemap code
1443 1443 """
1444 1444 opts = self.opener.options
1445 1445
1446 1446 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1447 1447 new_header = CHANGELOGV2
1448 1448 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1449 1449 self.feature_config.compute_rank = compute_rank
1450 1450 elif b'revlogv2' in opts:
1451 1451 new_header = REVLOGV2
1452 1452 elif b'revlogv1' in opts:
1453 1453 new_header = REVLOGV1
1454 1454 if self._may_inline:
1455 1455 new_header |= FLAG_INLINE_DATA
1456 1456 if b'generaldelta' in opts:
1457 1457 new_header |= FLAG_GENERALDELTA
1458 1458 elif b'revlogv0' in self.opener.options:
1459 1459 new_header = REVLOGV0
1460 1460 else:
1461 1461 new_header = REVLOG_DEFAULT_VERSION
1462 1462
1463 1463 mmapindexthreshold = None
1464 1464 if self.data_config.mmap_large_index:
1465 1465 mmapindexthreshold = self.data_config.mmap_index_threshold
1466 1466 if self.feature_config.enable_ellipsis:
1467 1467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1468 1468
1469 1469 # revlog v0 doesn't have flag processors
1470 1470 for flag, processor in opts.get(b'flagprocessors', {}).items():
1471 1471 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1472 1472
1473 1473 chunk_cache_size = self.data_config.chunk_cache_size
1474 1474 if chunk_cache_size <= 0:
1475 1475 raise error.RevlogError(
1476 1476 _(b'revlog chunk cache size %r is not greater than 0')
1477 1477 % chunk_cache_size
1478 1478 )
1479 1479 elif chunk_cache_size & (chunk_cache_size - 1):
1480 1480 raise error.RevlogError(
1481 1481 _(b'revlog chunk cache size %r is not a power of 2')
1482 1482 % chunk_cache_size
1483 1483 )
1484 1484 force_nodemap = opts.get(b'devel-force-nodemap', False)
1485 1485 return new_header, mmapindexthreshold, force_nodemap
1486 1486
1487 1487 def _get_data(self, filepath, mmap_threshold, size=None):
1488 1488 """return a file content with or without mmap
1489 1489
1490 1490 If the file is missing return the empty string"""
1491 1491 try:
1492 1492 with self.opener(filepath) as fp:
1493 1493 if mmap_threshold is not None:
1494 1494 file_size = self.opener.fstat(fp).st_size
1495 1495 if (
1496 1496 file_size >= mmap_threshold
1497 1497 and self.opener.is_mmap_safe(filepath)
1498 1498 ):
1499 1499 if size is not None:
1500 1500 # avoid potentiel mmap crash
1501 1501 size = min(file_size, size)
1502 1502 # TODO: should .close() to release resources without
1503 1503 # relying on Python GC
1504 1504 if size is None:
1505 1505 return util.buffer(util.mmapread(fp))
1506 1506 else:
1507 1507 return util.buffer(util.mmapread(fp, size))
1508 1508 if size is None:
1509 1509 return fp.read()
1510 1510 else:
1511 1511 return fp.read(size)
1512 1512 except FileNotFoundError:
1513 1513 return b''
1514 1514
1515 1515 def get_streams(self, max_linkrev, force_inline=False):
1516 1516 """return a list of streams that represent this revlog
1517 1517
1518 1518 This is used by stream-clone to do bytes to bytes copies of a repository.
1519 1519
1520 1520 This streams data for all revisions that refer to a changelog revision up
1521 1521 to `max_linkrev`.
1522 1522
1523 1523 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1524 1524
1525 1525 It returns is a list of three-tuple:
1526 1526
1527 1527 [
1528 1528 (filename, bytes_stream, stream_size),
1529 1529
1530 1530 ]
1531 1531 """
1532 1532 n = len(self)
1533 1533 index = self.index
1534 1534 while n > 0:
1535 1535 linkrev = index[n - 1][4]
1536 1536 if linkrev < max_linkrev:
1537 1537 break
1538 1538 # note: this loop will rarely go through multiple iterations, since
1539 1539 # it only traverses commits created during the current streaming
1540 1540 # pull operation.
1541 1541 #
1542 1542 # If this become a problem, using a binary search should cap the
1543 1543 # runtime of this.
1544 1544 n = n - 1
1545 1545 if n == 0:
1546 1546 # no data to send
1547 1547 return []
1548 1548 index_size = n * index.entry_size
1549 1549 data_size = self.end(n - 1)
1550 1550
1551 1551 # XXX we might have been split (or stripped) since the object
1552 1552 # initialization, We need to close this race too, but having a way to
1553 1553 # pre-open the file we feed to the revlog and never closing them before
1554 1554 # we are done streaming.
1555 1555
1556 1556 if self._inline:
1557 1557
1558 1558 def get_stream():
1559 1559 with self.opener(self._indexfile, mode=b"r") as fp:
1560 1560 yield None
1561 1561 size = index_size + data_size
1562 1562 if size <= 65536:
1563 1563 yield fp.read(size)
1564 1564 else:
1565 1565 yield from util.filechunkiter(fp, limit=size)
1566 1566
1567 1567 inline_stream = get_stream()
1568 1568 next(inline_stream)
1569 1569 return [
1570 1570 (self._indexfile, inline_stream, index_size + data_size),
1571 1571 ]
1572 1572 elif force_inline:
1573 1573
1574 1574 def get_stream():
1575 1575 with self.reading():
1576 1576 yield None
1577 1577
1578 1578 for rev in range(n):
1579 1579 idx = self.index.entry_binary(rev)
1580 1580 if rev == 0 and self._docket is None:
1581 1581 # re-inject the inline flag
1582 1582 header = self._format_flags
1583 1583 header |= self._format_version
1584 1584 header |= FLAG_INLINE_DATA
1585 1585 header = self.index.pack_header(header)
1586 1586 idx = header + idx
1587 1587 yield idx
1588 1588 yield self._inner.get_segment_for_revs(rev, rev)[1]
1589 1589
1590 1590 inline_stream = get_stream()
1591 1591 next(inline_stream)
1592 1592 return [
1593 1593 (self._indexfile, inline_stream, index_size + data_size),
1594 1594 ]
1595 1595 else:
1596 1596
1597 1597 def get_index_stream():
1598 1598 with self.opener(self._indexfile, mode=b"r") as fp:
1599 1599 yield None
1600 1600 if index_size <= 65536:
1601 1601 yield fp.read(index_size)
1602 1602 else:
1603 1603 yield from util.filechunkiter(fp, limit=index_size)
1604 1604
1605 1605 def get_data_stream():
1606 1606 with self._datafp() as fp:
1607 1607 yield None
1608 1608 if data_size <= 65536:
1609 1609 yield fp.read(data_size)
1610 1610 else:
1611 1611 yield from util.filechunkiter(fp, limit=data_size)
1612 1612
1613 1613 index_stream = get_index_stream()
1614 1614 next(index_stream)
1615 1615 data_stream = get_data_stream()
1616 1616 next(data_stream)
1617 1617 return [
1618 1618 (self._datafile, data_stream, data_size),
1619 1619 (self._indexfile, index_stream, index_size),
1620 1620 ]
1621 1621
1622 1622 def _loadindex(self, docket=None):
1623 1623 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1624 1624
1625 1625 if self.postfix is not None:
1626 1626 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1627 1627 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1628 1628 entry_point = b'%s.i.a' % self.radix
1629 1629 elif self._try_split and self.opener.exists(self._split_index_file):
1630 1630 entry_point = self._split_index_file
1631 1631 else:
1632 1632 entry_point = b'%s.i' % self.radix
1633 1633
1634 1634 if docket is not None:
1635 1635 self._docket = docket
1636 1636 self._docket_file = entry_point
1637 1637 else:
1638 1638 self._initempty = True
1639 1639 entry_data = self._get_data(entry_point, mmapindexthreshold)
1640 1640 if len(entry_data) > 0:
1641 1641 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1642 1642 self._initempty = False
1643 1643 else:
1644 1644 header = new_header
1645 1645
1646 1646 self._format_flags = header & ~0xFFFF
1647 1647 self._format_version = header & 0xFFFF
1648 1648
1649 1649 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1650 1650 if supported_flags is None:
1651 1651 msg = _(b'unknown version (%d) in revlog %s')
1652 1652 msg %= (self._format_version, self.display_id)
1653 1653 raise error.RevlogError(msg)
1654 1654 elif self._format_flags & ~supported_flags:
1655 1655 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1656 1656 display_flag = self._format_flags >> 16
1657 1657 msg %= (display_flag, self._format_version, self.display_id)
1658 1658 raise error.RevlogError(msg)
1659 1659
1660 1660 features = FEATURES_BY_VERSION[self._format_version]
1661 1661 self._inline = features[b'inline'](self._format_flags)
1662 1662 self.delta_config.general_delta = features[b'generaldelta'](
1663 1663 self._format_flags
1664 1664 )
1665 1665 self.feature_config.has_side_data = features[b'sidedata']
1666 1666
1667 1667 if not features[b'docket']:
1668 1668 self._indexfile = entry_point
1669 1669 index_data = entry_data
1670 1670 else:
1671 1671 self._docket_file = entry_point
1672 1672 if self._initempty:
1673 1673 self._docket = docketutil.default_docket(self, header)
1674 1674 else:
1675 1675 self._docket = docketutil.parse_docket(
1676 1676 self, entry_data, use_pending=self._trypending
1677 1677 )
1678 1678
1679 1679 if self._docket is not None:
1680 1680 self._indexfile = self._docket.index_filepath()
1681 1681 index_data = b''
1682 1682 index_size = self._docket.index_end
1683 1683 if index_size > 0:
1684 1684 index_data = self._get_data(
1685 1685 self._indexfile, mmapindexthreshold, size=index_size
1686 1686 )
1687 1687 if len(index_data) < index_size:
1688 1688 msg = _(b'too few index data for %s: got %d, expected %d')
1689 1689 msg %= (self.display_id, len(index_data), index_size)
1690 1690 raise error.RevlogError(msg)
1691 1691
1692 1692 self._inline = False
1693 1693 # generaldelta implied by version 2 revlogs.
1694 1694 self.delta_config.general_delta = True
1695 1695 # the logic for persistent nodemap will be dealt with within the
1696 1696 # main docket, so disable it for now.
1697 1697 self._nodemap_file = None
1698 1698
1699 1699 if self._docket is not None:
1700 1700 self._datafile = self._docket.data_filepath()
1701 1701 self._sidedatafile = self._docket.sidedata_filepath()
1702 1702 elif self.postfix is None:
1703 1703 self._datafile = b'%s.d' % self.radix
1704 1704 else:
1705 1705 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1706 1706
1707 1707 self.nodeconstants = sha1nodeconstants
1708 1708 self.nullid = self.nodeconstants.nullid
1709 1709
1710 1710 # sparse-revlog can't be on without general-delta (issue6056)
1711 1711 if not self.delta_config.general_delta:
1712 1712 self.delta_config.sparse_revlog = False
1713 1713
1714 1714 self._storedeltachains = True
1715 1715
1716 1716 devel_nodemap = (
1717 1717 self._nodemap_file
1718 1718 and force_nodemap
1719 1719 and parse_index_v1_nodemap is not None
1720 1720 )
1721 1721
1722 1722 use_rust_index = False
1723 1723 if rustrevlog is not None and self._nodemap_file is not None:
1724 1724 # we would like to use the rust_index in all case, especially
1725 1725 # because it is necessary for AncestorsIterator and LazyAncestors
1726 1726 # since the 6.7 cycle.
1727 1727 #
1728 1728 # However, the performance impact of inconditionnaly building the
1729 1729 # nodemap is currently a problem for non-persistent nodemap
1730 1730 # repository.
1731 1731 use_rust_index = True
1732 1732
1733 1733 self._parse_index = parse_index_v1
1734 1734 if self._format_version == REVLOGV0:
1735 1735 self._parse_index = revlogv0.parse_index_v0
1736 1736 elif self._format_version == REVLOGV2:
1737 1737 self._parse_index = parse_index_v2
1738 1738 elif self._format_version == CHANGELOGV2:
1739 1739 self._parse_index = parse_index_cl_v2
1740 1740 elif devel_nodemap:
1741 1741 self._parse_index = parse_index_v1_nodemap
1742 1742 elif use_rust_index:
1743 1743 self._parse_index = functools.partial(
1744 1744 parse_index_v1_rust, default_header=new_header
1745 1745 )
1746 1746 try:
1747 1747 d = self._parse_index(index_data, self._inline)
1748 1748 index, chunkcache = d
1749 1749 use_nodemap = (
1750 1750 not self._inline
1751 1751 and self._nodemap_file is not None
1752 1752 and hasattr(index, 'update_nodemap_data')
1753 1753 )
1754 1754 if use_nodemap:
1755 1755 nodemap_data = nodemaputil.persisted_data(self)
1756 1756 if nodemap_data is not None:
1757 1757 docket = nodemap_data[0]
1758 1758 if (
1759 1759 len(d[0]) > docket.tip_rev
1760 1760 and d[0][docket.tip_rev][7] == docket.tip_node
1761 1761 ):
1762 1762 # no changelog tampering
1763 1763 self._nodemap_docket = docket
1764 1764 index.update_nodemap_data(*nodemap_data)
1765 1765 except (ValueError, IndexError):
1766 1766 raise error.RevlogError(
1767 1767 _(b"index %s is corrupted") % self.display_id
1768 1768 )
1769 1769 self.index = index
1770 1770 # revnum -> (chain-length, sum-delta-length)
1771 1771 self._chaininfocache = util.lrucachedict(500)
1772 1772
1773 1773 return chunkcache
1774 1774
1775 1775 def _load_inner(self, chunk_cache):
1776 1776 if self._docket is None:
1777 1777 default_compression_header = None
1778 1778 else:
1779 1779 default_compression_header = self._docket.default_compression_header
1780 1780
1781 1781 self._inner = _InnerRevlog(
1782 1782 opener=self.opener,
1783 1783 index=self.index,
1784 1784 index_file=self._indexfile,
1785 1785 data_file=self._datafile,
1786 1786 sidedata_file=self._sidedatafile,
1787 1787 inline=self._inline,
1788 1788 data_config=self.data_config,
1789 1789 delta_config=self.delta_config,
1790 1790 feature_config=self.feature_config,
1791 1791 chunk_cache=chunk_cache,
1792 1792 default_compression_header=default_compression_header,
1793 1793 )
1794 1794
1795 1795 def get_revlog(self):
1796 1796 """simple function to mirror API of other not-really-revlog API"""
1797 1797 return self
1798 1798
1799 1799 @util.propertycache
1800 1800 def revlog_kind(self):
1801 1801 return self.target[0]
1802 1802
1803 1803 @util.propertycache
1804 1804 def display_id(self):
1805 1805 """The public facing "ID" of the revlog that we use in message"""
1806 1806 if self.revlog_kind == KIND_FILELOG:
1807 1807 # Reference the file without the "data/" prefix, so it is familiar
1808 1808 # to the user.
1809 1809 return self.target[1]
1810 1810 else:
1811 1811 return self.radix
1812 1812
1813 1813 def _datafp(self, mode=b'r'):
1814 1814 """file object for the revlog's data file"""
1815 1815 return self.opener(self._datafile, mode=mode)
1816 1816
1817 1817 def tiprev(self):
1818 1818 return len(self.index) - 1
1819 1819
1820 1820 def tip(self):
1821 1821 return self.node(self.tiprev())
1822 1822
1823 1823 def __contains__(self, rev):
1824 1824 return 0 <= rev < len(self)
1825 1825
1826 1826 def __len__(self):
1827 1827 return len(self.index)
1828 1828
1829 1829 def __iter__(self) -> Iterator[int]:
1830 1830 return iter(range(len(self)))
1831 1831
1832 1832 def revs(self, start=0, stop=None):
1833 1833 """iterate over all rev in this revlog (from start to stop)"""
1834 1834 return storageutil.iterrevs(len(self), start=start, stop=stop)
1835 1835
1836 1836 def hasnode(self, node):
1837 1837 try:
1838 1838 self.rev(node)
1839 1839 return True
1840 1840 except KeyError:
1841 1841 return False
1842 1842
1843 1843 def _candelta(self, baserev, rev):
1844 1844 """whether two revisions (baserev, rev) can be delta-ed or not"""
1845 1845 # Disable delta if either rev requires a content-changing flag
1846 1846 # processor (ex. LFS). This is because such flag processor can alter
1847 1847 # the rawtext content that the delta will be based on, and two clients
1848 1848 # could have a same revlog node with different flags (i.e. different
1849 1849 # rawtext contents) and the delta could be incompatible.
1850 1850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1851 1851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1852 1852 ):
1853 1853 return False
1854 1854 return True
1855 1855
1856 1856 def update_caches(self, transaction):
1857 1857 """update on disk cache
1858 1858
1859 1859 If a transaction is passed, the update may be delayed to transaction
1860 1860 commit."""
1861 1861 if self._nodemap_file is not None:
1862 1862 if transaction is None:
1863 1863 nodemaputil.update_persistent_nodemap(self)
1864 1864 else:
1865 1865 nodemaputil.setup_persistent_nodemap(transaction, self)
1866 1866
1867 1867 def clearcaches(self):
1868 1868 """Clear in-memory caches"""
1869 1869 self._chainbasecache.clear()
1870 1870 self._inner.clear_cache()
1871 1871 self._pcache = {}
1872 1872 self._nodemap_docket = None
1873 1873 self.index.clearcaches()
1874 1874 # The python code is the one responsible for validating the docket, we
1875 1875 # end up having to refresh it here.
1876 1876 use_nodemap = (
1877 1877 not self._inline
1878 1878 and self._nodemap_file is not None
1879 1879 and hasattr(self.index, 'update_nodemap_data')
1880 1880 )
1881 1881 if use_nodemap:
1882 1882 nodemap_data = nodemaputil.persisted_data(self)
1883 1883 if nodemap_data is not None:
1884 1884 self._nodemap_docket = nodemap_data[0]
1885 1885 self.index.update_nodemap_data(*nodemap_data)
1886 1886
1887 1887 def rev(self, node):
1888 1888 """return the revision number associated with a <nodeid>"""
1889 1889 try:
1890 1890 return self.index.rev(node)
1891 1891 except TypeError:
1892 1892 raise
1893 1893 except error.RevlogError:
1894 1894 # parsers.c radix tree lookup failed
1895 1895 if (
1896 1896 node == self.nodeconstants.wdirid
1897 1897 or node in self.nodeconstants.wdirfilenodeids
1898 1898 ):
1899 1899 raise error.WdirUnsupported
1900 1900 raise error.LookupError(node, self.display_id, _(b'no node'))
1901 1901
1902 1902 # Accessors for index entries.
1903 1903
1904 1904 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1905 1905 # are flags.
1906 1906 def start(self, rev):
1907 1907 return int(self.index[rev][0] >> 16)
1908 1908
1909 1909 def sidedata_cut_off(self, rev):
1910 1910 sd_cut_off = self.index[rev][8]
1911 1911 if sd_cut_off != 0:
1912 1912 return sd_cut_off
1913 1913 # This is some annoying dance, because entries without sidedata
1914 1914 # currently use 0 as their ofsset. (instead of previous-offset +
1915 1915 # previous-size)
1916 1916 #
1917 1917 # We should reconsider this sidedata → 0 sidata_offset policy.
1918 1918 # In the meantime, we need this.
1919 1919 while 0 <= rev:
1920 1920 e = self.index[rev]
1921 1921 if e[9] != 0:
1922 1922 return e[8] + e[9]
1923 1923 rev -= 1
1924 1924 return 0
1925 1925
1926 1926 def flags(self, rev):
1927 1927 return self.index[rev][0] & 0xFFFF
1928 1928
1929 1929 def length(self, rev):
1930 1930 return self.index[rev][1]
1931 1931
1932 1932 def sidedata_length(self, rev):
1933 1933 if not self.feature_config.has_side_data:
1934 1934 return 0
1935 1935 return self.index[rev][9]
1936 1936
1937 1937 def rawsize(self, rev):
1938 1938 """return the length of the uncompressed text for a given revision"""
1939 1939 l = self.index[rev][2]
1940 1940 if l >= 0:
1941 1941 return l
1942 1942
1943 1943 t = self.rawdata(rev)
1944 1944 return len(t)
1945 1945
1946 1946 def size(self, rev):
1947 1947 """length of non-raw text (processed by a "read" flag processor)"""
1948 1948 # fast path: if no "read" flag processor could change the content,
1949 1949 # size is rawsize. note: ELLIPSIS is known to not change the content.
1950 1950 flags = self.flags(rev)
1951 1951 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1952 1952 return self.rawsize(rev)
1953 1953
1954 1954 return len(self.revision(rev))
1955 1955
1956 1956 def fast_rank(self, rev):
1957 1957 """Return the rank of a revision if already known, or None otherwise.
1958 1958
1959 1959 The rank of a revision is the size of the sub-graph it defines as a
1960 1960 head. Equivalently, the rank of a revision `r` is the size of the set
1961 1961 `ancestors(r)`, `r` included.
1962 1962
1963 1963 This method returns the rank retrieved from the revlog in constant
1964 1964 time. It makes no attempt at computing unknown values for versions of
1965 1965 the revlog which do not persist the rank.
1966 1966 """
1967 1967 rank = self.index[rev][ENTRY_RANK]
1968 1968 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1969 1969 return None
1970 1970 if rev == nullrev:
1971 1971 return 0 # convention
1972 1972 return rank
1973 1973
1974 1974 def chainbase(self, rev):
1975 1975 base = self._chainbasecache.get(rev)
1976 1976 if base is not None:
1977 1977 return base
1978 1978
1979 1979 index = self.index
1980 1980 iterrev = rev
1981 1981 base = index[iterrev][3]
1982 1982 while base != iterrev:
1983 1983 iterrev = base
1984 1984 base = index[iterrev][3]
1985 1985
1986 1986 self._chainbasecache[rev] = base
1987 1987 return base
1988 1988
1989 1989 def linkrev(self, rev):
1990 1990 return self.index[rev][4]
1991 1991
1992 1992 def parentrevs(self, rev):
1993 1993 try:
1994 1994 entry = self.index[rev]
1995 1995 except IndexError:
1996 1996 if rev == wdirrev:
1997 1997 raise error.WdirUnsupported
1998 1998 raise
1999 1999
2000 2000 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2001 2001 return entry[6], entry[5]
2002 2002 else:
2003 2003 return entry[5], entry[6]
2004 2004
2005 2005 # fast parentrevs(rev) where rev isn't filtered
2006 2006 _uncheckedparentrevs = parentrevs
2007 2007
2008 2008 def node(self, rev):
2009 2009 try:
2010 2010 return self.index[rev][7]
2011 2011 except IndexError:
2012 2012 if rev == wdirrev:
2013 2013 raise error.WdirUnsupported
2014 2014 raise
2015 2015
2016 2016 # Derived from index values.
2017 2017
2018 2018 def end(self, rev):
2019 2019 return self.start(rev) + self.length(rev)
2020 2020
2021 2021 def parents(self, node):
2022 2022 i = self.index
2023 2023 d = i[self.rev(node)]
2024 2024 # inline node() to avoid function call overhead
2025 2025 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2026 2026 return i[d[6]][7], i[d[5]][7]
2027 2027 else:
2028 2028 return i[d[5]][7], i[d[6]][7]
2029 2029
2030 2030 def chainlen(self, rev):
2031 2031 return self._chaininfo(rev)[0]
2032 2032
2033 2033 def _chaininfo(self, rev):
2034 2034 chaininfocache = self._chaininfocache
2035 2035 if rev in chaininfocache:
2036 2036 return chaininfocache[rev]
2037 2037 index = self.index
2038 2038 generaldelta = self.delta_config.general_delta
2039 2039 iterrev = rev
2040 2040 e = index[iterrev]
2041 2041 clen = 0
2042 2042 compresseddeltalen = 0
2043 2043 while iterrev != e[3]:
2044 2044 clen += 1
2045 2045 compresseddeltalen += e[1]
2046 2046 if generaldelta:
2047 2047 iterrev = e[3]
2048 2048 else:
2049 2049 iterrev -= 1
2050 2050 if iterrev in chaininfocache:
2051 2051 t = chaininfocache[iterrev]
2052 2052 clen += t[0]
2053 2053 compresseddeltalen += t[1]
2054 2054 break
2055 2055 e = index[iterrev]
2056 2056 else:
2057 2057 # Add text length of base since decompressing that also takes
2058 2058 # work. For cache hits the length is already included.
2059 2059 compresseddeltalen += e[1]
2060 2060 r = (clen, compresseddeltalen)
2061 2061 chaininfocache[rev] = r
2062 2062 return r
2063 2063
2064 2064 def _deltachain(self, rev, stoprev=None):
2065 2065 return self._inner._deltachain(rev, stoprev=stoprev)
2066 2066
2067 2067 def ancestors(self, revs, stoprev=0, inclusive=False):
2068 2068 """Generate the ancestors of 'revs' in reverse revision order.
2069 2069 Does not generate revs lower than stoprev.
2070 2070
2071 2071 See the documentation for ancestor.lazyancestors for more details."""
2072 2072
2073 2073 # first, make sure start revisions aren't filtered
2074 2074 revs = list(revs)
2075 2075 checkrev = self.node
2076 2076 for r in revs:
2077 2077 checkrev(r)
2078 2078 # and we're sure ancestors aren't filtered as well
2079 2079
2080 2080 if rustancestor is not None and self.index.rust_ext_compat:
2081 2081 lazyancestors = rustancestor.LazyAncestors
2082 2082 arg = self.index
2083 2083 else:
2084 2084 lazyancestors = ancestor.lazyancestors
2085 2085 arg = self._uncheckedparentrevs
2086 2086 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2087 2087
2088 2088 def descendants(self, revs):
2089 2089 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2090 2090
2091 2091 def findcommonmissing(self, common=None, heads=None):
2092 2092 """Return a tuple of the ancestors of common and the ancestors of heads
2093 2093 that are not ancestors of common. In revset terminology, we return the
2094 2094 tuple:
2095 2095
2096 2096 ::common, (::heads) - (::common)
2097 2097
2098 2098 The list is sorted by revision number, meaning it is
2099 2099 topologically sorted.
2100 2100
2101 2101 'heads' and 'common' are both lists of node IDs. If heads is
2102 2102 not supplied, uses all of the revlog's heads. If common is not
2103 2103 supplied, uses nullid."""
2104 2104 if common is None:
2105 2105 common = [self.nullid]
2106 2106 if heads is None:
2107 2107 heads = self.heads()
2108 2108
2109 2109 common = [self.rev(n) for n in common]
2110 2110 heads = [self.rev(n) for n in heads]
2111 2111
2112 2112 # we want the ancestors, but inclusive
2113 2113 class lazyset:
2114 2114 def __init__(self, lazyvalues):
2115 2115 self.addedvalues = set()
2116 2116 self.lazyvalues = lazyvalues
2117 2117
2118 2118 def __contains__(self, value):
2119 2119 return value in self.addedvalues or value in self.lazyvalues
2120 2120
2121 2121 def __iter__(self):
2122 2122 added = self.addedvalues
2123 2123 for r in added:
2124 2124 yield r
2125 2125 for r in self.lazyvalues:
2126 2126 if not r in added:
2127 2127 yield r
2128 2128
2129 2129 def add(self, value):
2130 2130 self.addedvalues.add(value)
2131 2131
2132 2132 def update(self, values):
2133 2133 self.addedvalues.update(values)
2134 2134
2135 2135 has = lazyset(self.ancestors(common))
2136 2136 has.add(nullrev)
2137 2137 has.update(common)
2138 2138
2139 2139 # take all ancestors from heads that aren't in has
2140 2140 missing = set()
2141 2141 visit = collections.deque(r for r in heads if r not in has)
2142 2142 while visit:
2143 2143 r = visit.popleft()
2144 2144 if r in missing:
2145 2145 continue
2146 2146 else:
2147 2147 missing.add(r)
2148 2148 for p in self.parentrevs(r):
2149 2149 if p not in has:
2150 2150 visit.append(p)
2151 2151 missing = list(missing)
2152 2152 missing.sort()
2153 2153 return has, [self.node(miss) for miss in missing]
2154 2154
2155 2155 def incrementalmissingrevs(self, common=None):
2156 2156 """Return an object that can be used to incrementally compute the
2157 2157 revision numbers of the ancestors of arbitrary sets that are not
2158 2158 ancestors of common. This is an ancestor.incrementalmissingancestors
2159 2159 object.
2160 2160
2161 2161 'common' is a list of revision numbers. If common is not supplied, uses
2162 2162 nullrev.
2163 2163 """
2164 2164 if common is None:
2165 2165 common = [nullrev]
2166 2166
2167 2167 if rustancestor is not None and self.index.rust_ext_compat:
2168 2168 return rustancestor.MissingAncestors(self.index, common)
2169 2169 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2170 2170
2171 2171 def findmissingrevs(self, common=None, heads=None):
2172 2172 """Return the revision numbers of the ancestors of heads that
2173 2173 are not ancestors of common.
2174 2174
2175 2175 More specifically, return a list of revision numbers corresponding to
2176 2176 nodes N such that every N satisfies the following constraints:
2177 2177
2178 2178 1. N is an ancestor of some node in 'heads'
2179 2179 2. N is not an ancestor of any node in 'common'
2180 2180
2181 2181 The list is sorted by revision number, meaning it is
2182 2182 topologically sorted.
2183 2183
2184 2184 'heads' and 'common' are both lists of revision numbers. If heads is
2185 2185 not supplied, uses all of the revlog's heads. If common is not
2186 2186 supplied, uses nullid."""
2187 2187 if common is None:
2188 2188 common = [nullrev]
2189 2189 if heads is None:
2190 2190 heads = self.headrevs()
2191 2191
2192 2192 inc = self.incrementalmissingrevs(common=common)
2193 2193 return inc.missingancestors(heads)
2194 2194
2195 2195 def findmissing(self, common=None, heads=None):
2196 2196 """Return the ancestors of heads that are not ancestors of common.
2197 2197
2198 2198 More specifically, return a list of nodes N such that every N
2199 2199 satisfies the following constraints:
2200 2200
2201 2201 1. N is an ancestor of some node in 'heads'
2202 2202 2. N is not an ancestor of any node in 'common'
2203 2203
2204 2204 The list is sorted by revision number, meaning it is
2205 2205 topologically sorted.
2206 2206
2207 2207 'heads' and 'common' are both lists of node IDs. If heads is
2208 2208 not supplied, uses all of the revlog's heads. If common is not
2209 2209 supplied, uses nullid."""
2210 2210 if common is None:
2211 2211 common = [self.nullid]
2212 2212 if heads is None:
2213 2213 heads = self.heads()
2214 2214
2215 2215 common = [self.rev(n) for n in common]
2216 2216 heads = [self.rev(n) for n in heads]
2217 2217
2218 2218 inc = self.incrementalmissingrevs(common=common)
2219 2219 return [self.node(r) for r in inc.missingancestors(heads)]
2220 2220
2221 2221 def nodesbetween(self, roots=None, heads=None):
2222 2222 """Return a topological path from 'roots' to 'heads'.
2223 2223
2224 2224 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2225 2225 topologically sorted list of all nodes N that satisfy both of
2226 2226 these constraints:
2227 2227
2228 2228 1. N is a descendant of some node in 'roots'
2229 2229 2. N is an ancestor of some node in 'heads'
2230 2230
2231 2231 Every node is considered to be both a descendant and an ancestor
2232 2232 of itself, so every reachable node in 'roots' and 'heads' will be
2233 2233 included in 'nodes'.
2234 2234
2235 2235 'outroots' is the list of reachable nodes in 'roots', i.e., the
2236 2236 subset of 'roots' that is returned in 'nodes'. Likewise,
2237 2237 'outheads' is the subset of 'heads' that is also in 'nodes'.
2238 2238
2239 2239 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2240 2240 unspecified, uses nullid as the only root. If 'heads' is
2241 2241 unspecified, uses list of all of the revlog's heads."""
2242 2242 nonodes = ([], [], [])
2243 2243 if roots is not None:
2244 2244 roots = list(roots)
2245 2245 if not roots:
2246 2246 return nonodes
2247 2247 lowestrev = min([self.rev(n) for n in roots])
2248 2248 else:
2249 2249 roots = [self.nullid] # Everybody's a descendant of nullid
2250 2250 lowestrev = nullrev
2251 2251 if (lowestrev == nullrev) and (heads is None):
2252 2252 # We want _all_ the nodes!
2253 2253 return (
2254 2254 [self.node(r) for r in self],
2255 2255 [self.nullid],
2256 2256 list(self.heads()),
2257 2257 )
2258 2258 if heads is None:
2259 2259 # All nodes are ancestors, so the latest ancestor is the last
2260 2260 # node.
2261 2261 highestrev = len(self) - 1
2262 2262 # Set ancestors to None to signal that every node is an ancestor.
2263 2263 ancestors = None
2264 2264 # Set heads to an empty dictionary for later discovery of heads
2265 2265 heads = {}
2266 2266 else:
2267 2267 heads = list(heads)
2268 2268 if not heads:
2269 2269 return nonodes
2270 2270 ancestors = set()
2271 2271 # Turn heads into a dictionary so we can remove 'fake' heads.
2272 2272 # Also, later we will be using it to filter out the heads we can't
2273 2273 # find from roots.
2274 2274 heads = dict.fromkeys(heads, False)
2275 2275 # Start at the top and keep marking parents until we're done.
2276 2276 nodestotag = set(heads)
2277 2277 # Remember where the top was so we can use it as a limit later.
2278 2278 highestrev = max([self.rev(n) for n in nodestotag])
2279 2279 while nodestotag:
2280 2280 # grab a node to tag
2281 2281 n = nodestotag.pop()
2282 2282 # Never tag nullid
2283 2283 if n == self.nullid:
2284 2284 continue
2285 2285 # A node's revision number represents its place in a
2286 2286 # topologically sorted list of nodes.
2287 2287 r = self.rev(n)
2288 2288 if r >= lowestrev:
2289 2289 if n not in ancestors:
2290 2290 # If we are possibly a descendant of one of the roots
2291 2291 # and we haven't already been marked as an ancestor
2292 2292 ancestors.add(n) # Mark as ancestor
2293 2293 # Add non-nullid parents to list of nodes to tag.
2294 2294 nodestotag.update(
2295 2295 [p for p in self.parents(n) if p != self.nullid]
2296 2296 )
2297 2297 elif n in heads: # We've seen it before, is it a fake head?
2298 2298 # So it is, real heads should not be the ancestors of
2299 2299 # any other heads.
2300 2300 heads.pop(n)
2301 2301 if not ancestors:
2302 2302 return nonodes
2303 2303 # Now that we have our set of ancestors, we want to remove any
2304 2304 # roots that are not ancestors.
2305 2305
2306 2306 # If one of the roots was nullid, everything is included anyway.
2307 2307 if lowestrev > nullrev:
2308 2308 # But, since we weren't, let's recompute the lowest rev to not
2309 2309 # include roots that aren't ancestors.
2310 2310
2311 2311 # Filter out roots that aren't ancestors of heads
2312 2312 roots = [root for root in roots if root in ancestors]
2313 2313 # Recompute the lowest revision
2314 2314 if roots:
2315 2315 lowestrev = min([self.rev(root) for root in roots])
2316 2316 else:
2317 2317 # No more roots? Return empty list
2318 2318 return nonodes
2319 2319 else:
2320 2320 # We are descending from nullid, and don't need to care about
2321 2321 # any other roots.
2322 2322 lowestrev = nullrev
2323 2323 roots = [self.nullid]
2324 2324 # Transform our roots list into a set.
2325 2325 descendants = set(roots)
2326 2326 # Also, keep the original roots so we can filter out roots that aren't
2327 2327 # 'real' roots (i.e. are descended from other roots).
2328 2328 roots = descendants.copy()
2329 2329 # Our topologically sorted list of output nodes.
2330 2330 orderedout = []
2331 2331 # Don't start at nullid since we don't want nullid in our output list,
2332 2332 # and if nullid shows up in descendants, empty parents will look like
2333 2333 # they're descendants.
2334 2334 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2335 2335 n = self.node(r)
2336 2336 isdescendant = False
2337 2337 if lowestrev == nullrev: # Everybody is a descendant of nullid
2338 2338 isdescendant = True
2339 2339 elif n in descendants:
2340 2340 # n is already a descendant
2341 2341 isdescendant = True
2342 2342 # This check only needs to be done here because all the roots
2343 2343 # will start being marked is descendants before the loop.
2344 2344 if n in roots:
2345 2345 # If n was a root, check if it's a 'real' root.
2346 2346 p = tuple(self.parents(n))
2347 2347 # If any of its parents are descendants, it's not a root.
2348 2348 if (p[0] in descendants) or (p[1] in descendants):
2349 2349 roots.remove(n)
2350 2350 else:
2351 2351 p = tuple(self.parents(n))
2352 2352 # A node is a descendant if either of its parents are
2353 2353 # descendants. (We seeded the dependents list with the roots
2354 2354 # up there, remember?)
2355 2355 if (p[0] in descendants) or (p[1] in descendants):
2356 2356 descendants.add(n)
2357 2357 isdescendant = True
2358 2358 if isdescendant and ((ancestors is None) or (n in ancestors)):
2359 2359 # Only include nodes that are both descendants and ancestors.
2360 2360 orderedout.append(n)
2361 2361 if (ancestors is not None) and (n in heads):
2362 2362 # We're trying to figure out which heads are reachable
2363 2363 # from roots.
2364 2364 # Mark this head as having been reached
2365 2365 heads[n] = True
2366 2366 elif ancestors is None:
2367 2367 # Otherwise, we're trying to discover the heads.
2368 2368 # Assume this is a head because if it isn't, the next step
2369 2369 # will eventually remove it.
2370 2370 heads[n] = True
2371 2371 # But, obviously its parents aren't.
2372 2372 for p in self.parents(n):
2373 2373 heads.pop(p, None)
2374 2374 heads = [head for head, flag in heads.items() if flag]
2375 2375 roots = list(roots)
2376 2376 assert orderedout
2377 2377 assert roots
2378 2378 assert heads
2379 2379 return (orderedout, roots, heads)
2380 2380
2381 2381 def headrevs(self, revs=None):
2382 2382 if revs is None:
2383 2383 try:
2384 2384 return self.index.headrevs()
2385 2385 except AttributeError:
2386 2386 return self._headrevs()
2387 2387 if rustdagop is not None and self.index.rust_ext_compat:
2388 2388 return rustdagop.headrevs(self.index, revs)
2389 2389 return dagop.headrevs(revs, self._uncheckedparentrevs)
2390 2390
2391 2391 def headrevsdiff(self, start, stop):
2392 2392 try:
2393 2393 return self.index.headrevsdiff(start, stop)
2394 2394 except AttributeError:
2395 2395 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2396 2396
2397 2397 def computephases(self, roots):
2398 2398 return self.index.computephasesmapsets(roots)
2399 2399
2400 2400 def _headrevs(self):
2401 2401 count = len(self)
2402 2402 if not count:
2403 2403 return [nullrev]
2404 2404 # we won't iter over filtered rev so nobody is a head at start
2405 2405 ishead = [0] * (count + 1)
2406 2406 index = self.index
2407 2407 for r in self:
2408 2408 ishead[r] = 1 # I may be an head
2409 2409 e = index[r]
2410 2410 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2411 2411 return [r for r, val in enumerate(ishead) if val]
2412 2412
2413 2413 def _head_node_ids(self):
2414 2414 try:
2415 2415 return self.index.head_node_ids()
2416 2416 except AttributeError:
2417 2417 return [self.node(r) for r in self.headrevs()]
2418 2418
2419 2419 def heads(self, start=None, stop=None):
2420 2420 """return the list of all nodes that have no children
2421 2421
2422 2422 if start is specified, only heads that are descendants of
2423 2423 start will be returned
2424 2424 if stop is specified, it will consider all the revs from stop
2425 2425 as if they had no children
2426 2426 """
2427 2427 if start is None and stop is None:
2428 2428 if not len(self):
2429 2429 return [self.nullid]
2430 2430 return self._head_node_ids()
2431 2431 if start is None:
2432 2432 start = nullrev
2433 2433 else:
2434 2434 start = self.rev(start)
2435 2435
2436 2436 stoprevs = {self.rev(n) for n in stop or []}
2437 2437
2438 2438 revs = dagop.headrevssubset(
2439 2439 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2440 2440 )
2441 2441
2442 2442 return [self.node(rev) for rev in revs]
2443 2443
2444 2444 def diffheads(self, start, stop):
2445 2445 """return the nodes that make up the difference between
2446 2446 heads of revs before `start` and heads of revs before `stop`"""
2447 2447 removed, added = self.headrevsdiff(start, stop)
2448 2448 return [self.node(r) for r in removed], [self.node(r) for r in added]
2449 2449
2450 2450 def children(self, node):
2451 2451 """find the children of a given node"""
2452 2452 c = []
2453 2453 p = self.rev(node)
2454 2454 for r in self.revs(start=p + 1):
2455 2455 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2456 2456 if prevs:
2457 2457 for pr in prevs:
2458 2458 if pr == p:
2459 2459 c.append(self.node(r))
2460 2460 elif p == nullrev:
2461 2461 c.append(self.node(r))
2462 2462 return c
2463 2463
2464 2464 def commonancestorsheads(self, a, b):
2465 2465 """calculate all the heads of the common ancestors of nodes a and b"""
2466 2466 a, b = self.rev(a), self.rev(b)
2467 2467 ancs = self._commonancestorsheads(a, b)
2468 2468 return pycompat.maplist(self.node, ancs)
2469 2469
2470 2470 def _commonancestorsheads(self, *revs):
2471 2471 """calculate all the heads of the common ancestors of revs"""
2472 2472 try:
2473 2473 ancs = self.index.commonancestorsheads(*revs)
2474 2474 except (AttributeError, OverflowError): # C implementation failed
2475 2475 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2476 2476 return ancs
2477 2477
2478 2478 def isancestor(self, a, b):
2479 2479 """return True if node a is an ancestor of node b
2480 2480
2481 2481 A revision is considered an ancestor of itself."""
2482 2482 a, b = self.rev(a), self.rev(b)
2483 2483 return self.isancestorrev(a, b)
2484 2484
2485 2485 def isancestorrev(self, a, b):
2486 2486 """return True if revision a is an ancestor of revision b
2487 2487
2488 2488 A revision is considered an ancestor of itself.
2489 2489
2490 2490 The implementation of this is trivial but the use of
2491 2491 reachableroots is not."""
2492 2492 if a == nullrev:
2493 2493 return True
2494 2494 elif a == b:
2495 2495 return True
2496 2496 elif a > b:
2497 2497 return False
2498 2498 return bool(self.reachableroots(a, [b], [a], includepath=False))
2499 2499
2500 2500 def reachableroots(self, minroot, heads, roots, includepath=False):
2501 2501 """return (heads(::(<roots> and <roots>::<heads>)))
2502 2502
2503 2503 If includepath is True, return (<roots>::<heads>)."""
2504 2504 try:
2505 2505 return self.index.reachableroots2(
2506 2506 minroot, heads, roots, includepath
2507 2507 )
2508 2508 except AttributeError:
2509 2509 return dagop._reachablerootspure(
2510 2510 self.parentrevs, minroot, roots, heads, includepath
2511 2511 )
2512 2512
2513 2513 def ancestor(self, a, b):
2514 2514 """calculate the "best" common ancestor of nodes a and b"""
2515 2515
2516 2516 a, b = self.rev(a), self.rev(b)
2517 2517 try:
2518 2518 ancs = self.index.ancestors(a, b)
2519 2519 except (AttributeError, OverflowError):
2520 2520 ancs = ancestor.ancestors(self.parentrevs, a, b)
2521 2521 if ancs:
2522 2522 # choose a consistent winner when there's a tie
2523 2523 return min(map(self.node, ancs))
2524 2524 return self.nullid
2525 2525
2526 2526 def _match(self, id):
2527 2527 if isinstance(id, int):
2528 2528 # rev
2529 2529 return self.node(id)
2530 2530 if len(id) == self.nodeconstants.nodelen:
2531 2531 # possibly a binary node
2532 2532 # odds of a binary node being all hex in ASCII are 1 in 10**25
2533 2533 try:
2534 2534 node = id
2535 2535 self.rev(node) # quick search the index
2536 2536 return node
2537 2537 except error.LookupError:
2538 2538 pass # may be partial hex id
2539 2539 try:
2540 2540 # str(rev)
2541 2541 rev = int(id)
2542 2542 if b"%d" % rev != id:
2543 2543 raise ValueError
2544 2544 if rev < 0:
2545 2545 rev = len(self) + rev
2546 2546 if rev < 0 or rev >= len(self):
2547 2547 raise ValueError
2548 2548 return self.node(rev)
2549 2549 except (ValueError, OverflowError):
2550 2550 pass
2551 2551 if len(id) == 2 * self.nodeconstants.nodelen:
2552 2552 try:
2553 2553 # a full hex nodeid?
2554 2554 node = bin(id)
2555 2555 self.rev(node)
2556 2556 return node
2557 2557 except (binascii.Error, error.LookupError):
2558 2558 pass
2559 2559
2560 2560 def _partialmatch(self, id):
2561 2561 # we don't care wdirfilenodeids as they should be always full hash
2562 2562 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2563 2563 ambiguous = False
2564 2564 try:
2565 2565 partial = self.index.partialmatch(id)
2566 2566 if partial and self.hasnode(partial):
2567 2567 if maybewdir:
2568 2568 # single 'ff...' match in radix tree, ambiguous with wdir
2569 2569 ambiguous = True
2570 2570 else:
2571 2571 return partial
2572 2572 elif maybewdir:
2573 2573 # no 'ff...' match in radix tree, wdir identified
2574 2574 raise error.WdirUnsupported
2575 2575 else:
2576 2576 return None
2577 2577 except error.RevlogError:
2578 2578 # parsers.c radix tree lookup gave multiple matches
2579 2579 # fast path: for unfiltered changelog, radix tree is accurate
2580 2580 if not getattr(self, 'filteredrevs', None):
2581 2581 ambiguous = True
2582 2582 # fall through to slow path that filters hidden revisions
2583 2583 except (AttributeError, ValueError):
2584 2584 # we are pure python, or key is not hex
2585 2585 pass
2586 2586 if ambiguous:
2587 2587 raise error.AmbiguousPrefixLookupError(
2588 2588 id, self.display_id, _(b'ambiguous identifier')
2589 2589 )
2590 2590
2591 2591 if id in self._pcache:
2592 2592 return self._pcache[id]
2593 2593
2594 2594 if len(id) <= 40:
2595 2595 # hex(node)[:...]
2596 2596 l = len(id) // 2 * 2 # grab an even number of digits
2597 2597 try:
2598 2598 # we're dropping the last digit, so let's check that it's hex,
2599 2599 # to avoid the expensive computation below if it's not
2600 2600 if len(id) % 2 > 0:
2601 2601 if not (id[-1] in hexdigits):
2602 2602 return None
2603 2603 prefix = bin(id[:l])
2604 2604 except binascii.Error:
2605 2605 pass
2606 2606 else:
2607 2607 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2608 2608 nl = [
2609 2609 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2610 2610 ]
2611 2611 if self.nodeconstants.nullhex.startswith(id):
2612 2612 nl.append(self.nullid)
2613 2613 if len(nl) > 0:
2614 2614 if len(nl) == 1 and not maybewdir:
2615 2615 self._pcache[id] = nl[0]
2616 2616 return nl[0]
2617 2617 raise error.AmbiguousPrefixLookupError(
2618 2618 id, self.display_id, _(b'ambiguous identifier')
2619 2619 )
2620 2620 if maybewdir:
2621 2621 raise error.WdirUnsupported
2622 2622 return None
2623 2623
2624 2624 def lookup(self, id):
2625 2625 """locate a node based on:
2626 2626 - revision number or str(revision number)
2627 2627 - nodeid or subset of hex nodeid
2628 2628 """
2629 2629 n = self._match(id)
2630 2630 if n is not None:
2631 2631 return n
2632 2632 n = self._partialmatch(id)
2633 2633 if n:
2634 2634 return n
2635 2635
2636 2636 raise error.LookupError(id, self.display_id, _(b'no match found'))
2637 2637
2638 2638 def shortest(self, node, minlength=1):
2639 2639 """Find the shortest unambiguous prefix that matches node."""
2640 2640
2641 2641 def isvalid(prefix):
2642 2642 try:
2643 2643 matchednode = self._partialmatch(prefix)
2644 2644 except error.AmbiguousPrefixLookupError:
2645 2645 return False
2646 2646 except error.WdirUnsupported:
2647 2647 # single 'ff...' match
2648 2648 return True
2649 2649 if matchednode is None:
2650 2650 raise error.LookupError(node, self.display_id, _(b'no node'))
2651 2651 return True
2652 2652
2653 2653 def maybewdir(prefix):
2654 2654 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2655 2655
2656 2656 hexnode = hex(node)
2657 2657
2658 2658 def disambiguate(hexnode, minlength):
2659 2659 """Disambiguate against wdirid."""
2660 2660 for length in range(minlength, len(hexnode) + 1):
2661 2661 prefix = hexnode[:length]
2662 2662 if not maybewdir(prefix):
2663 2663 return prefix
2664 2664
2665 2665 if not getattr(self, 'filteredrevs', None):
2666 2666 try:
2667 2667 length = max(self.index.shortest(node), minlength)
2668 2668 return disambiguate(hexnode, length)
2669 2669 except error.RevlogError:
2670 2670 if node != self.nodeconstants.wdirid:
2671 2671 raise error.LookupError(
2672 2672 node, self.display_id, _(b'no node')
2673 2673 )
2674 2674 except AttributeError:
2675 2675 # Fall through to pure code
2676 2676 pass
2677 2677
2678 2678 if node == self.nodeconstants.wdirid:
2679 2679 for length in range(minlength, len(hexnode) + 1):
2680 2680 prefix = hexnode[:length]
2681 2681 if isvalid(prefix):
2682 2682 return prefix
2683 2683
2684 2684 for length in range(minlength, len(hexnode) + 1):
2685 2685 prefix = hexnode[:length]
2686 2686 if isvalid(prefix):
2687 2687 return disambiguate(hexnode, length)
2688 2688
2689 2689 def cmp(self, node, text):
2690 2690 """compare text with a given file revision
2691 2691
2692 2692 returns True if text is different than what is stored.
2693 2693 """
2694 2694 p1, p2 = self.parents(node)
2695 2695 return storageutil.hashrevisionsha1(text, p1, p2) != node
2696 2696
2697 2697 def deltaparent(self, rev):
2698 2698 """return deltaparent of the given revision"""
2699 2699 base = self.index[rev][3]
2700 2700 if base == rev:
2701 2701 return nullrev
2702 2702 elif self.delta_config.general_delta:
2703 2703 return base
2704 2704 else:
2705 2705 return rev - 1
2706 2706
2707 2707 def issnapshot(self, rev):
2708 2708 """tells whether rev is a snapshot"""
2709 2709 ret = self._inner.issnapshot(rev)
2710 2710 self.issnapshot = self._inner.issnapshot
2711 2711 return ret
2712 2712
2713 2713 def snapshotdepth(self, rev):
2714 2714 """number of snapshot in the chain before this one"""
2715 2715 if not self.issnapshot(rev):
2716 2716 raise error.ProgrammingError(b'revision %d not a snapshot')
2717 2717 return len(self._inner._deltachain(rev)[0]) - 1
2718 2718
2719 2719 def revdiff(self, rev1, rev2):
2720 2720 """return or calculate a delta between two revisions
2721 2721
2722 2722 The delta calculated is in binary form and is intended to be written to
2723 2723 revlog data directly. So this function needs raw revision data.
2724 2724 """
2725 2725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2726 2726 return bytes(self._inner._chunk(rev2))
2727 2727
2728 2728 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2729 2729
2730 2730 def revision(self, nodeorrev):
2731 2731 """return an uncompressed revision of a given node or revision
2732 2732 number.
2733 2733 """
2734 2734 return self._revisiondata(nodeorrev)
2735 2735
2736 2736 def sidedata(self, nodeorrev):
2737 2737 """a map of extra data related to the changeset but not part of the hash
2738 2738
2739 2739 This function currently return a dictionary. However, more advanced
2740 2740 mapping object will likely be used in the future for a more
2741 2741 efficient/lazy code.
2742 2742 """
2743 2743 # deal with <nodeorrev> argument type
2744 2744 if isinstance(nodeorrev, int):
2745 2745 rev = nodeorrev
2746 2746 else:
2747 2747 rev = self.rev(nodeorrev)
2748 2748 return self._sidedata(rev)
2749 2749
2750 2750 def _rawtext(self, node, rev):
2751 2751 """return the possibly unvalidated rawtext for a revision
2752 2752
2753 2753 returns (rev, rawtext, validated)
2754 2754 """
2755 2755 # Check if we have the entry in cache
2756 2756 # The cache entry looks like (node, rev, rawtext)
2757 2757 if self._inner._revisioncache:
2758 2758 if self._inner._revisioncache[0] == node:
2759 2759 return (rev, self._inner._revisioncache[2], True)
2760 2760
2761 2761 if rev is None:
2762 2762 rev = self.rev(node)
2763 2763
2764 2764 text = self._inner.raw_text(node, rev)
2765 2765 return (rev, text, False)
2766 2766
2767 2767 def _revisiondata(self, nodeorrev, raw=False):
2768 2768 # deal with <nodeorrev> argument type
2769 2769 if isinstance(nodeorrev, int):
2770 2770 rev = nodeorrev
2771 2771 node = self.node(rev)
2772 2772 else:
2773 2773 node = nodeorrev
2774 2774 rev = None
2775 2775
2776 2776 # fast path the special `nullid` rev
2777 2777 if node == self.nullid:
2778 2778 return b""
2779 2779
2780 2780 # ``rawtext`` is the text as stored inside the revlog. Might be the
2781 2781 # revision or might need to be processed to retrieve the revision.
2782 2782 rev, rawtext, validated = self._rawtext(node, rev)
2783 2783
2784 2784 if raw and validated:
2785 2785 # if we don't want to process the raw text and that raw
2786 2786 # text is cached, we can exit early.
2787 2787 return rawtext
2788 2788 if rev is None:
2789 2789 rev = self.rev(node)
2790 2790 # the revlog's flag for this revision
2791 2791 # (usually alter its state or content)
2792 2792 flags = self.flags(rev)
2793 2793
2794 2794 if validated and flags == REVIDX_DEFAULT_FLAGS:
2795 2795 # no extra flags set, no flag processor runs, text = rawtext
2796 2796 return rawtext
2797 2797
2798 2798 if raw:
2799 2799 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2800 2800 text = rawtext
2801 2801 else:
2802 2802 r = flagutil.processflagsread(self, rawtext, flags)
2803 2803 text, validatehash = r
2804 2804 if validatehash:
2805 2805 self.checkhash(text, node, rev=rev)
2806 2806 if not validated:
2807 2807 self._inner._revisioncache = (node, rev, rawtext)
2808 2808
2809 2809 return text
2810 2810
2811 2811 def _sidedata(self, rev):
2812 2812 """Return the sidedata for a given revision number."""
2813 2813 if self._sidedatafile is None:
2814 2814 return {}
2815 2815 sidedata_end = None
2816 2816 if self._docket is not None:
2817 2817 sidedata_end = self._docket.sidedata_end
2818 2818 return self._inner.sidedata(rev, sidedata_end)
2819 2819
2820 2820 def rawdata(self, nodeorrev):
2821 2821 """return an uncompressed raw data of a given node or revision number."""
2822 2822 return self._revisiondata(nodeorrev, raw=True)
2823 2823
2824 2824 def hash(self, text, p1, p2):
2825 2825 """Compute a node hash.
2826 2826
2827 2827 Available as a function so that subclasses can replace the hash
2828 2828 as needed.
2829 2829 """
2830 2830 return storageutil.hashrevisionsha1(text, p1, p2)
2831 2831
2832 2832 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2833 2833 """Check node hash integrity.
2834 2834
2835 2835 Available as a function so that subclasses can extend hash mismatch
2836 2836 behaviors as needed.
2837 2837 """
2838 2838 try:
2839 2839 if p1 is None and p2 is None:
2840 2840 p1, p2 = self.parents(node)
2841 2841 if node != self.hash(text, p1, p2):
2842 2842 # Clear the revision cache on hash failure. The revision cache
2843 2843 # only stores the raw revision and clearing the cache does have
2844 2844 # the side-effect that we won't have a cache hit when the raw
2845 2845 # revision data is accessed. But this case should be rare and
2846 2846 # it is extra work to teach the cache about the hash
2847 2847 # verification state.
2848 2848 if (
2849 2849 self._inner._revisioncache
2850 2850 and self._inner._revisioncache[0] == node
2851 2851 ):
2852 2852 self._inner._revisioncache = None
2853 2853
2854 2854 revornode = rev
2855 2855 if revornode is None:
2856 2856 revornode = templatefilters.short(hex(node))
2857 2857 raise error.RevlogError(
2858 2858 _(b"integrity check failed on %s:%s")
2859 2859 % (self.display_id, pycompat.bytestr(revornode))
2860 2860 )
2861 2861 except error.RevlogError:
2862 2862 if self.feature_config.censorable and storageutil.iscensoredtext(
2863 2863 text
2864 2864 ):
2865 2865 raise error.CensoredNodeError(self.display_id, node, text)
2866 2866 raise
2867 2867
2868 2868 @property
2869 2869 def _split_index_file(self):
2870 2870 """the path where to expect the index of an ongoing splitting operation
2871 2871
2872 2872 The file will only exist if a splitting operation is in progress, but
2873 2873 it is always expected at the same location."""
2874 2874 parts = self.radix.split(b'/')
2875 2875 if len(parts) > 1:
2876 2876 # adds a '-s' prefix to the ``data/` or `meta/` base
2877 2877 head = parts[0] + b'-s'
2878 2878 mids = parts[1:-1]
2879 2879 tail = parts[-1] + b'.i'
2880 2880 pieces = [head] + mids + [tail]
2881 2881 return b'/'.join(pieces)
2882 2882 else:
2883 2883 # the revlog is stored at the root of the store (changelog or
2884 2884 # manifest), no risk of collision.
2885 2885 return self.radix + b'.i.s'
2886 2886
2887 2887 def _enforceinlinesize(self, tr):
2888 2888 """Check if the revlog is too big for inline and convert if so.
2889 2889
2890 2890 This should be called after revisions are added to the revlog. If the
2891 2891 revlog has grown too large to be an inline revlog, it will convert it
2892 2892 to use multiple index and data files.
2893 2893 """
2894 2894 tiprev = len(self) - 1
2895 2895 total_size = self.start(tiprev) + self.length(tiprev)
2896 2896 if not self._inline or (self._may_inline and total_size < _maxinline):
2897 2897 return
2898 2898
2899 2899 if self._docket is not None:
2900 2900 msg = b"inline revlog should not have a docket"
2901 2901 raise error.ProgrammingError(msg)
2902 2902
2903 2903 # In the common case, we enforce inline size because the revlog has
2904 2904 # been appened too. And in such case, it must have an initial offset
2905 2905 # recorded in the transaction.
2906 2906 troffset = tr.findoffset(self._inner.canonical_index_file)
2907 2907 pre_touched = troffset is not None
2908 2908 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2909 2909 raise error.RevlogError(
2910 2910 _(b"%s not found in the transaction") % self._indexfile
2911 2911 )
2912 2912
2913 2913 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2914 2914 tr.add(self._datafile, 0)
2915 2915
2916 2916 new_index_file_path = None
2917 2917 old_index_file_path = self._indexfile
2918 2918 new_index_file_path = self._split_index_file
2919 2919 opener = self.opener
2920 2920 weak_self = weakref.ref(self)
2921 2921
2922 2922 # the "split" index replace the real index when the transaction is
2923 2923 # finalized
2924 2924 def finalize_callback(tr):
2925 2925 opener.rename(
2926 2926 new_index_file_path,
2927 2927 old_index_file_path,
2928 2928 checkambig=True,
2929 2929 )
2930 2930 maybe_self = weak_self()
2931 2931 if maybe_self is not None:
2932 2932 maybe_self._indexfile = old_index_file_path
2933 2933 maybe_self._inner.index_file = maybe_self._indexfile
2934 2934
2935 2935 def abort_callback(tr):
2936 2936 maybe_self = weak_self()
2937 2937 if maybe_self is not None:
2938 2938 maybe_self._indexfile = old_index_file_path
2939 2939 maybe_self._inner.inline = True
2940 2940 maybe_self._inner.index_file = old_index_file_path
2941 2941
2942 2942 tr.registertmp(new_index_file_path)
2943 2943 # we use 001 here to make this this happens after the finalisation of
2944 2944 # pending changelog write (using 000). Otherwise the two finalizer
2945 2945 # would step over each other and delete the changelog.i file.
2946 2946 if self.target[1] is not None:
2947 2947 callback_id = b'001-revlog-split-%d-%s' % self.target
2948 2948 else:
2949 2949 callback_id = b'001-revlog-split-%d' % self.target[0]
2950 2950 tr.addfinalize(callback_id, finalize_callback)
2951 2951 tr.addabort(callback_id, abort_callback)
2952 2952
2953 2953 self._format_flags &= ~FLAG_INLINE_DATA
2954 2954 self._inner.split_inline(
2955 2955 tr,
2956 2956 self._format_flags | self._format_version,
2957 2957 new_index_file_path=new_index_file_path,
2958 2958 )
2959 2959
2960 2960 self._inline = False
2961 2961 if new_index_file_path is not None:
2962 2962 self._indexfile = new_index_file_path
2963 2963
2964 2964 nodemaputil.setup_persistent_nodemap(tr, self)
2965 2965
2966 2966 def _nodeduplicatecallback(self, transaction, node):
2967 2967 """called when trying to add a node already stored."""
2968 2968
2969 2969 @contextlib.contextmanager
2970 2970 def reading(self):
2971 2971 with self._inner.reading():
2972 2972 yield
2973 2973
2974 2974 @contextlib.contextmanager
2975 2975 def _writing(self, transaction):
2976 2976 if self._trypending:
2977 2977 msg = b'try to write in a `trypending` revlog: %s'
2978 2978 msg %= self.display_id
2979 2979 raise error.ProgrammingError(msg)
2980 2980 if self._inner.is_writing:
2981 2981 yield
2982 2982 else:
2983 2983 data_end = None
2984 2984 sidedata_end = None
2985 2985 if self._docket is not None:
2986 2986 data_end = self._docket.data_end
2987 2987 sidedata_end = self._docket.sidedata_end
2988 2988 with self._inner.writing(
2989 2989 transaction,
2990 2990 data_end=data_end,
2991 2991 sidedata_end=sidedata_end,
2992 2992 ):
2993 2993 yield
2994 2994 if self._docket is not None:
2995 2995 self._write_docket(transaction)
2996 2996
2997 2997 @property
2998 2998 def is_delaying(self):
2999 2999 return self._inner.is_delaying
3000 3000
3001 3001 def _write_docket(self, transaction):
3002 3002 """write the current docket on disk
3003 3003
3004 3004 Exist as a method to help changelog to implement transaction logic
3005 3005
3006 3006 We could also imagine using the same transaction logic for all revlog
3007 3007 since docket are cheap."""
3008 3008 self._docket.write(transaction)
3009 3009
3010 3010 def addrevision(
3011 3011 self,
3012 3012 text,
3013 3013 transaction,
3014 3014 link,
3015 3015 p1,
3016 3016 p2,
3017 3017 cachedelta=None,
3018 3018 node=None,
3019 3019 flags=REVIDX_DEFAULT_FLAGS,
3020 3020 deltacomputer=None,
3021 3021 sidedata=None,
3022 3022 ):
3023 3023 """add a revision to the log
3024 3024
3025 3025 text - the revision data to add
3026 3026 transaction - the transaction object used for rollback
3027 3027 link - the linkrev data to add
3028 3028 p1, p2 - the parent nodeids of the revision
3029 3029 cachedelta - an optional precomputed delta
3030 3030 node - nodeid of revision; typically node is not specified, and it is
3031 3031 computed by default as hash(text, p1, p2), however subclasses might
3032 3032 use different hashing method (and override checkhash() in such case)
3033 3033 flags - the known flags to set on the revision
3034 3034 deltacomputer - an optional deltacomputer instance shared between
3035 3035 multiple calls
3036 3036 """
3037 3037 if link == nullrev:
3038 3038 raise error.RevlogError(
3039 3039 _(b"attempted to add linkrev -1 to %s") % self.display_id
3040 3040 )
3041 3041
3042 3042 if sidedata is None:
3043 3043 sidedata = {}
3044 3044 elif sidedata and not self.feature_config.has_side_data:
3045 3045 raise error.ProgrammingError(
3046 3046 _(b"trying to add sidedata to a revlog who don't support them")
3047 3047 )
3048 3048
3049 3049 if flags:
3050 3050 node = node or self.hash(text, p1, p2)
3051 3051
3052 3052 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3053 3053
3054 3054 # If the flag processor modifies the revision data, ignore any provided
3055 3055 # cachedelta.
3056 3056 if rawtext != text:
3057 3057 cachedelta = None
3058 3058
3059 3059 if len(rawtext) > _maxentrysize:
3060 3060 raise error.RevlogError(
3061 3061 _(
3062 3062 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3063 3063 )
3064 3064 % (self.display_id, len(rawtext))
3065 3065 )
3066 3066
3067 3067 node = node or self.hash(rawtext, p1, p2)
3068 3068 rev = self.index.get_rev(node)
3069 3069 if rev is not None:
3070 3070 return rev
3071 3071
3072 3072 if validatehash:
3073 3073 self.checkhash(rawtext, node, p1=p1, p2=p2)
3074 3074
3075 3075 return self.addrawrevision(
3076 3076 rawtext,
3077 3077 transaction,
3078 3078 link,
3079 3079 p1,
3080 3080 p2,
3081 3081 node,
3082 3082 flags,
3083 3083 cachedelta=cachedelta,
3084 3084 deltacomputer=deltacomputer,
3085 3085 sidedata=sidedata,
3086 3086 )
3087 3087
3088 3088 def addrawrevision(
3089 3089 self,
3090 3090 rawtext,
3091 3091 transaction,
3092 3092 link,
3093 3093 p1,
3094 3094 p2,
3095 3095 node,
3096 3096 flags,
3097 3097 cachedelta=None,
3098 3098 deltacomputer=None,
3099 3099 sidedata=None,
3100 3100 ):
3101 3101 """add a raw revision with known flags, node and parents
3102 3102 useful when reusing a revision not stored in this revlog (ex: received
3103 3103 over wire, or read from an external bundle).
3104 3104 """
3105 3105 with self._writing(transaction):
3106 3106 return self._addrevision(
3107 3107 node,
3108 3108 rawtext,
3109 3109 transaction,
3110 3110 link,
3111 3111 p1,
3112 3112 p2,
3113 3113 flags,
3114 3114 cachedelta,
3115 3115 deltacomputer=deltacomputer,
3116 3116 sidedata=sidedata,
3117 3117 )
3118 3118
3119 3119 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3120 3120 return self._inner.compress(data)
3121 3121
3122 3122 def decompress(self, data):
3123 3123 return self._inner.decompress(data)
3124 3124
3125 3125 def _addrevision(
3126 3126 self,
3127 3127 node,
3128 3128 rawtext,
3129 3129 transaction,
3130 3130 link,
3131 3131 p1,
3132 3132 p2,
3133 3133 flags,
3134 3134 cachedelta,
3135 3135 alwayscache=False,
3136 3136 deltacomputer=None,
3137 3137 sidedata=None,
3138 3138 ):
3139 3139 """internal function to add revisions to the log
3140 3140
3141 3141 see addrevision for argument descriptions.
3142 3142
3143 3143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3144 3144
3145 3145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3146 3146 be used.
3147 3147
3148 3148 invariants:
3149 3149 - rawtext is optional (can be None); if not set, cachedelta must be set.
3150 3150 if both are set, they must correspond to each other.
3151 3151 """
3152 3152 if node == self.nullid:
3153 3153 raise error.RevlogError(
3154 3154 _(b"%s: attempt to add null revision") % self.display_id
3155 3155 )
3156 3156 if (
3157 3157 node == self.nodeconstants.wdirid
3158 3158 or node in self.nodeconstants.wdirfilenodeids
3159 3159 ):
3160 3160 raise error.RevlogError(
3161 3161 _(b"%s: attempt to add wdir revision") % self.display_id
3162 3162 )
3163 if self._inner._writinghandles is None:
3163 if not self._inner.is_writing:
3164 3164 msg = b'adding revision outside `revlog._writing` context'
3165 3165 raise error.ProgrammingError(msg)
3166 3166
3167 3167 btext = [rawtext]
3168 3168
3169 3169 curr = len(self)
3170 3170 prev = curr - 1
3171 3171
3172 3172 offset = self._get_data_offset(prev)
3173 3173
3174 3174 if self._concurrencychecker:
3175 3175 ifh, dfh, sdfh = self._inner._writinghandles
3176 3176 # XXX no checking for the sidedata file
3177 3177 if self._inline:
3178 3178 # offset is "as if" it were in the .d file, so we need to add on
3179 3179 # the size of the entry metadata.
3180 3180 self._concurrencychecker(
3181 3181 ifh, self._indexfile, offset + curr * self.index.entry_size
3182 3182 )
3183 3183 else:
3184 3184 # Entries in the .i are a consistent size.
3185 3185 self._concurrencychecker(
3186 3186 ifh, self._indexfile, curr * self.index.entry_size
3187 3187 )
3188 3188 self._concurrencychecker(dfh, self._datafile, offset)
3189 3189
3190 3190 p1r, p2r = self.rev(p1), self.rev(p2)
3191 3191
3192 3192 # full versions are inserted when the needed deltas
3193 3193 # become comparable to the uncompressed text
3194 3194 if rawtext is None:
3195 3195 # need rawtext size, before changed by flag processors, which is
3196 3196 # the non-raw size. use revlog explicitly to avoid filelog's extra
3197 3197 # logic that might remove metadata size.
3198 3198 textlen = mdiff.patchedsize(
3199 3199 revlog.size(self, cachedelta[0]), cachedelta[1]
3200 3200 )
3201 3201 else:
3202 3202 textlen = len(rawtext)
3203 3203
3204 3204 if deltacomputer is None:
3205 3205 write_debug = None
3206 3206 if self.delta_config.debug_delta:
3207 3207 write_debug = transaction._report
3208 3208 deltacomputer = deltautil.deltacomputer(
3209 3209 self, write_debug=write_debug
3210 3210 )
3211 3211
3212 3212 if cachedelta is not None and len(cachedelta) == 2:
3213 3213 # If the cached delta has no information about how it should be
3214 3214 # reused, add the default reuse instruction according to the
3215 3215 # revlog's configuration.
3216 3216 if (
3217 3217 self.delta_config.general_delta
3218 3218 and self.delta_config.lazy_delta_base
3219 3219 ):
3220 3220 delta_base_reuse = DELTA_BASE_REUSE_TRY
3221 3221 else:
3222 3222 delta_base_reuse = DELTA_BASE_REUSE_NO
3223 3223 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3224 3224
3225 3225 revinfo = revlogutils.revisioninfo(
3226 3226 node,
3227 3227 p1,
3228 3228 p2,
3229 3229 btext,
3230 3230 textlen,
3231 3231 cachedelta,
3232 3232 flags,
3233 3233 )
3234 3234
3235 3235 deltainfo = deltacomputer.finddeltainfo(revinfo)
3236 3236
3237 3237 compression_mode = COMP_MODE_INLINE
3238 3238 if self._docket is not None:
3239 3239 default_comp = self._docket.default_compression_header
3240 3240 r = deltautil.delta_compression(default_comp, deltainfo)
3241 3241 compression_mode, deltainfo = r
3242 3242
3243 3243 sidedata_compression_mode = COMP_MODE_INLINE
3244 3244 if sidedata and self.feature_config.has_side_data:
3245 3245 sidedata_compression_mode = COMP_MODE_PLAIN
3246 3246 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3247 3247 sidedata_offset = self._docket.sidedata_end
3248 3248 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3249 3249 if (
3250 3250 h != b'u'
3251 3251 and comp_sidedata[0:1] != b'\0'
3252 3252 and len(comp_sidedata) < len(serialized_sidedata)
3253 3253 ):
3254 3254 assert not h
3255 3255 if (
3256 3256 comp_sidedata[0:1]
3257 3257 == self._docket.default_compression_header
3258 3258 ):
3259 3259 sidedata_compression_mode = COMP_MODE_DEFAULT
3260 3260 serialized_sidedata = comp_sidedata
3261 3261 else:
3262 3262 sidedata_compression_mode = COMP_MODE_INLINE
3263 3263 serialized_sidedata = comp_sidedata
3264 3264 else:
3265 3265 serialized_sidedata = b""
3266 3266 # Don't store the offset if the sidedata is empty, that way
3267 3267 # we can easily detect empty sidedata and they will be no different
3268 3268 # than ones we manually add.
3269 3269 sidedata_offset = 0
3270 3270
3271 3271 rank = RANK_UNKNOWN
3272 3272 if self.feature_config.compute_rank:
3273 3273 if (p1r, p2r) == (nullrev, nullrev):
3274 3274 rank = 1
3275 3275 elif p1r != nullrev and p2r == nullrev:
3276 3276 rank = 1 + self.fast_rank(p1r)
3277 3277 elif p1r == nullrev and p2r != nullrev:
3278 3278 rank = 1 + self.fast_rank(p2r)
3279 3279 else: # merge node
3280 3280 if rustdagop is not None and self.index.rust_ext_compat:
3281 3281 rank = rustdagop.rank(self.index, p1r, p2r)
3282 3282 else:
3283 3283 pmin, pmax = sorted((p1r, p2r))
3284 3284 rank = 1 + self.fast_rank(pmax)
3285 3285 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3286 3286
3287 3287 e = revlogutils.entry(
3288 3288 flags=flags,
3289 3289 data_offset=offset,
3290 3290 data_compressed_length=deltainfo.deltalen,
3291 3291 data_uncompressed_length=textlen,
3292 3292 data_compression_mode=compression_mode,
3293 3293 data_delta_base=deltainfo.base,
3294 3294 link_rev=link,
3295 3295 parent_rev_1=p1r,
3296 3296 parent_rev_2=p2r,
3297 3297 node_id=node,
3298 3298 sidedata_offset=sidedata_offset,
3299 3299 sidedata_compressed_length=len(serialized_sidedata),
3300 3300 sidedata_compression_mode=sidedata_compression_mode,
3301 3301 rank=rank,
3302 3302 )
3303 3303
3304 3304 self.index.append(e)
3305 3305 entry = self.index.entry_binary(curr)
3306 3306 if curr == 0 and self._docket is None:
3307 3307 header = self._format_flags | self._format_version
3308 3308 header = self.index.pack_header(header)
3309 3309 entry = header + entry
3310 3310 self._writeentry(
3311 3311 transaction,
3312 3312 entry,
3313 3313 deltainfo.data,
3314 3314 link,
3315 3315 offset,
3316 3316 serialized_sidedata,
3317 3317 sidedata_offset,
3318 3318 )
3319 3319
3320 3320 rawtext = btext[0]
3321 3321
3322 3322 if alwayscache and rawtext is None:
3323 3323 rawtext = deltacomputer.buildtext(revinfo)
3324 3324
3325 3325 if type(rawtext) == bytes: # only accept immutable objects
3326 3326 self._inner._revisioncache = (node, curr, rawtext)
3327 3327 self._chainbasecache[curr] = deltainfo.chainbase
3328 3328 return curr
3329 3329
3330 3330 def _get_data_offset(self, prev):
3331 3331 """Returns the current offset in the (in-transaction) data file.
3332 3332 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3333 3333 file to store that information: since sidedata can be rewritten to the
3334 3334 end of the data file within a transaction, you can have cases where, for
3335 3335 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3336 3336 to `n - 1`'s sidedata being written after `n`'s data.
3337 3337
3338 3338 TODO cache this in a docket file before getting out of experimental."""
3339 3339 if self._docket is None:
3340 3340 return self.end(prev)
3341 3341 else:
3342 3342 return self._docket.data_end
3343 3343
3344 3344 def _writeentry(
3345 3345 self,
3346 3346 transaction,
3347 3347 entry,
3348 3348 data,
3349 3349 link,
3350 3350 offset,
3351 3351 sidedata,
3352 3352 sidedata_offset,
3353 3353 ):
3354 3354 # Files opened in a+ mode have inconsistent behavior on various
3355 3355 # platforms. Windows requires that a file positioning call be made
3356 3356 # when the file handle transitions between reads and writes. See
3357 3357 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3358 3358 # platforms, Python or the platform itself can be buggy. Some versions
3359 3359 # of Solaris have been observed to not append at the end of the file
3360 3360 # if the file was seeked to before the end. See issue4943 for more.
3361 3361 #
3362 3362 # We work around this issue by inserting a seek() before writing.
3363 3363 # Note: This is likely not necessary on Python 3. However, because
3364 3364 # the file handle is reused for reads and may be seeked there, we need
3365 3365 # to be careful before changing this.
3366 3366 index_end = data_end = sidedata_end = None
3367 3367 if self._docket is not None:
3368 3368 index_end = self._docket.index_end
3369 3369 data_end = self._docket.data_end
3370 3370 sidedata_end = self._docket.sidedata_end
3371 3371
3372 3372 files_end = self._inner.write_entry(
3373 3373 transaction,
3374 3374 entry,
3375 3375 data,
3376 3376 link,
3377 3377 offset,
3378 3378 sidedata,
3379 3379 sidedata_offset,
3380 3380 index_end,
3381 3381 data_end,
3382 3382 sidedata_end,
3383 3383 )
3384 3384 self._enforceinlinesize(transaction)
3385 3385 if self._docket is not None:
3386 3386 self._docket.index_end = files_end[0]
3387 3387 self._docket.data_end = files_end[1]
3388 3388 self._docket.sidedata_end = files_end[2]
3389 3389
3390 3390 nodemaputil.setup_persistent_nodemap(transaction, self)
3391 3391
3392 3392 def addgroup(
3393 3393 self,
3394 3394 deltas,
3395 3395 linkmapper,
3396 3396 transaction,
3397 3397 alwayscache=False,
3398 3398 addrevisioncb=None,
3399 3399 duplicaterevisioncb=None,
3400 3400 debug_info=None,
3401 3401 delta_base_reuse_policy=None,
3402 3402 ):
3403 3403 """
3404 3404 add a delta group
3405 3405
3406 3406 given a set of deltas, add them to the revision log. the
3407 3407 first delta is against its parent, which should be in our
3408 3408 log, the rest are against the previous delta.
3409 3409
3410 3410 If ``addrevisioncb`` is defined, it will be called with arguments of
3411 3411 this revlog and the node that was added.
3412 3412 """
3413 3413
3414 3414 if self._adding_group:
3415 3415 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3416 3416
3417 3417 # read the default delta-base reuse policy from revlog config if the
3418 3418 # group did not specify one.
3419 3419 if delta_base_reuse_policy is None:
3420 3420 if (
3421 3421 self.delta_config.general_delta
3422 3422 and self.delta_config.lazy_delta_base
3423 3423 ):
3424 3424 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3425 3425 else:
3426 3426 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3427 3427
3428 3428 self._adding_group = True
3429 3429 empty = True
3430 3430 try:
3431 3431 with self._writing(transaction):
3432 3432 write_debug = None
3433 3433 if self.delta_config.debug_delta:
3434 3434 write_debug = transaction._report
3435 3435 deltacomputer = deltautil.deltacomputer(
3436 3436 self,
3437 3437 write_debug=write_debug,
3438 3438 debug_info=debug_info,
3439 3439 )
3440 3440 # loop through our set of deltas
3441 3441 for data in deltas:
3442 3442 (
3443 3443 node,
3444 3444 p1,
3445 3445 p2,
3446 3446 linknode,
3447 3447 deltabase,
3448 3448 delta,
3449 3449 flags,
3450 3450 sidedata,
3451 3451 ) = data
3452 3452 link = linkmapper(linknode)
3453 3453 flags = flags or REVIDX_DEFAULT_FLAGS
3454 3454
3455 3455 rev = self.index.get_rev(node)
3456 3456 if rev is not None:
3457 3457 # this can happen if two branches make the same change
3458 3458 self._nodeduplicatecallback(transaction, rev)
3459 3459 if duplicaterevisioncb:
3460 3460 duplicaterevisioncb(self, rev)
3461 3461 empty = False
3462 3462 continue
3463 3463
3464 3464 for p in (p1, p2):
3465 3465 if not self.index.has_node(p):
3466 3466 raise error.LookupError(
3467 3467 p, self.radix, _(b'unknown parent')
3468 3468 )
3469 3469
3470 3470 if not self.index.has_node(deltabase):
3471 3471 raise error.LookupError(
3472 3472 deltabase, self.display_id, _(b'unknown delta base')
3473 3473 )
3474 3474
3475 3475 baserev = self.rev(deltabase)
3476 3476
3477 3477 if baserev != nullrev and self.iscensored(baserev):
3478 3478 # if base is censored, delta must be full replacement in a
3479 3479 # single patch operation
3480 3480 hlen = struct.calcsize(b">lll")
3481 3481 oldlen = self.rawsize(baserev)
3482 3482 newlen = len(delta) - hlen
3483 3483 if delta[:hlen] != mdiff.replacediffheader(
3484 3484 oldlen, newlen
3485 3485 ):
3486 3486 raise error.CensoredBaseError(
3487 3487 self.display_id, self.node(baserev)
3488 3488 )
3489 3489
3490 3490 if not flags and self._peek_iscensored(baserev, delta):
3491 3491 flags |= REVIDX_ISCENSORED
3492 3492
3493 3493 # We assume consumers of addrevisioncb will want to retrieve
3494 3494 # the added revision, which will require a call to
3495 3495 # revision(). revision() will fast path if there is a cache
3496 3496 # hit. So, we tell _addrevision() to always cache in this case.
3497 3497 # We're only using addgroup() in the context of changegroup
3498 3498 # generation so the revision data can always be handled as raw
3499 3499 # by the flagprocessor.
3500 3500 rev = self._addrevision(
3501 3501 node,
3502 3502 None,
3503 3503 transaction,
3504 3504 link,
3505 3505 p1,
3506 3506 p2,
3507 3507 flags,
3508 3508 (baserev, delta, delta_base_reuse_policy),
3509 3509 alwayscache=alwayscache,
3510 3510 deltacomputer=deltacomputer,
3511 3511 sidedata=sidedata,
3512 3512 )
3513 3513
3514 3514 if addrevisioncb:
3515 3515 addrevisioncb(self, rev)
3516 3516 empty = False
3517 3517 finally:
3518 3518 self._adding_group = False
3519 3519 return not empty
3520 3520
3521 3521 def iscensored(self, rev):
3522 3522 """Check if a file revision is censored."""
3523 3523 if not self.feature_config.censorable:
3524 3524 return False
3525 3525
3526 3526 return self.flags(rev) & REVIDX_ISCENSORED
3527 3527
3528 3528 def _peek_iscensored(self, baserev, delta):
3529 3529 """Quickly check if a delta produces a censored revision."""
3530 3530 if not self.feature_config.censorable:
3531 3531 return False
3532 3532
3533 3533 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3534 3534
3535 3535 def getstrippoint(self, minlink):
3536 3536 """find the minimum rev that must be stripped to strip the linkrev
3537 3537
3538 3538 Returns a tuple containing the minimum rev and a set of all revs that
3539 3539 have linkrevs that will be broken by this strip.
3540 3540 """
3541 3541 return storageutil.resolvestripinfo(
3542 3542 minlink,
3543 3543 len(self) - 1,
3544 3544 self.headrevs(),
3545 3545 self.linkrev,
3546 3546 self.parentrevs,
3547 3547 )
3548 3548
3549 3549 def strip(self, minlink, transaction):
3550 3550 """truncate the revlog on the first revision with a linkrev >= minlink
3551 3551
3552 3552 This function is called when we're stripping revision minlink and
3553 3553 its descendants from the repository.
3554 3554
3555 3555 We have to remove all revisions with linkrev >= minlink, because
3556 3556 the equivalent changelog revisions will be renumbered after the
3557 3557 strip.
3558 3558
3559 3559 So we truncate the revlog on the first of these revisions, and
3560 3560 trust that the caller has saved the revisions that shouldn't be
3561 3561 removed and that it'll re-add them after this truncation.
3562 3562 """
3563 3563 if len(self) == 0:
3564 3564 return
3565 3565
3566 3566 rev, _ = self.getstrippoint(minlink)
3567 3567 if rev == len(self):
3568 3568 return
3569 3569
3570 3570 # first truncate the files on disk
3571 3571 data_end = self.start(rev)
3572 3572 if not self._inline:
3573 3573 transaction.add(self._datafile, data_end)
3574 3574 end = rev * self.index.entry_size
3575 3575 else:
3576 3576 end = data_end + (rev * self.index.entry_size)
3577 3577
3578 3578 if self._sidedatafile:
3579 3579 sidedata_end = self.sidedata_cut_off(rev)
3580 3580 transaction.add(self._sidedatafile, sidedata_end)
3581 3581
3582 3582 transaction.add(self._indexfile, end)
3583 3583 if self._docket is not None:
3584 3584 # XXX we could, leverage the docket while stripping. However it is
3585 3585 # not powerfull enough at the time of this comment
3586 3586 self._docket.index_end = end
3587 3587 self._docket.data_end = data_end
3588 3588 self._docket.sidedata_end = sidedata_end
3589 3589 self._docket.write(transaction, stripping=True)
3590 3590
3591 3591 # then reset internal state in memory to forget those revisions
3592 3592 self._chaininfocache = util.lrucachedict(500)
3593 3593 self._inner.clear_cache()
3594 3594
3595 3595 del self.index[rev:-1]
3596 3596
3597 3597 def checksize(self):
3598 3598 """Check size of index and data files
3599 3599
3600 3600 return a (dd, di) tuple.
3601 3601 - dd: extra bytes for the "data" file
3602 3602 - di: extra bytes for the "index" file
3603 3603
3604 3604 A healthy revlog will return (0, 0).
3605 3605 """
3606 3606 expected = 0
3607 3607 if len(self):
3608 3608 expected = max(0, self.end(len(self) - 1))
3609 3609
3610 3610 try:
3611 3611 with self._datafp() as f:
3612 3612 f.seek(0, io.SEEK_END)
3613 3613 actual = f.tell()
3614 3614 dd = actual - expected
3615 3615 except FileNotFoundError:
3616 3616 dd = 0
3617 3617
3618 3618 try:
3619 3619 f = self.opener(self._indexfile)
3620 3620 f.seek(0, io.SEEK_END)
3621 3621 actual = f.tell()
3622 3622 f.close()
3623 3623 s = self.index.entry_size
3624 3624 i = max(0, actual // s)
3625 3625 di = actual - (i * s)
3626 3626 if self._inline:
3627 3627 databytes = 0
3628 3628 for r in self:
3629 3629 databytes += max(0, self.length(r))
3630 3630 dd = 0
3631 3631 di = actual - len(self) * s - databytes
3632 3632 except FileNotFoundError:
3633 3633 di = 0
3634 3634
3635 3635 return (dd, di)
3636 3636
3637 3637 def files(self):
3638 3638 """return list of files that compose this revlog"""
3639 3639 res = [self._indexfile]
3640 3640 if self._docket_file is None:
3641 3641 if not self._inline:
3642 3642 res.append(self._datafile)
3643 3643 else:
3644 3644 res.append(self._docket_file)
3645 3645 res.extend(self._docket.old_index_filepaths(include_empty=False))
3646 3646 if self._docket.data_end:
3647 3647 res.append(self._datafile)
3648 3648 res.extend(self._docket.old_data_filepaths(include_empty=False))
3649 3649 if self._docket.sidedata_end:
3650 3650 res.append(self._sidedatafile)
3651 3651 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3652 3652 return res
3653 3653
3654 3654 def emitrevisions(
3655 3655 self,
3656 3656 nodes,
3657 3657 nodesorder=None,
3658 3658 revisiondata=False,
3659 3659 assumehaveparentrevisions=False,
3660 3660 deltamode=repository.CG_DELTAMODE_STD,
3661 3661 sidedata_helpers=None,
3662 3662 debug_info=None,
3663 3663 ):
3664 3664 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3665 3665 raise error.ProgrammingError(
3666 3666 b'unhandled value for nodesorder: %s' % nodesorder
3667 3667 )
3668 3668
3669 3669 if nodesorder is None and not self.delta_config.general_delta:
3670 3670 nodesorder = b'storage'
3671 3671
3672 3672 if (
3673 3673 not self._storedeltachains
3674 3674 and deltamode != repository.CG_DELTAMODE_PREV
3675 3675 ):
3676 3676 deltamode = repository.CG_DELTAMODE_FULL
3677 3677
3678 3678 return storageutil.emitrevisions(
3679 3679 self,
3680 3680 nodes,
3681 3681 nodesorder,
3682 3682 revlogrevisiondelta,
3683 3683 deltaparentfn=self.deltaparent,
3684 3684 candeltafn=self._candelta,
3685 3685 rawsizefn=self.rawsize,
3686 3686 revdifffn=self.revdiff,
3687 3687 flagsfn=self.flags,
3688 3688 deltamode=deltamode,
3689 3689 revisiondata=revisiondata,
3690 3690 assumehaveparentrevisions=assumehaveparentrevisions,
3691 3691 sidedata_helpers=sidedata_helpers,
3692 3692 debug_info=debug_info,
3693 3693 )
3694 3694
3695 3695 DELTAREUSEALWAYS = b'always'
3696 3696 DELTAREUSESAMEREVS = b'samerevs'
3697 3697 DELTAREUSENEVER = b'never'
3698 3698
3699 3699 DELTAREUSEFULLADD = b'fulladd'
3700 3700
3701 3701 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3702 3702
3703 3703 def clone(
3704 3704 self,
3705 3705 tr,
3706 3706 destrevlog,
3707 3707 addrevisioncb=None,
3708 3708 deltareuse=DELTAREUSESAMEREVS,
3709 3709 forcedeltabothparents=None,
3710 3710 sidedata_helpers=None,
3711 3711 ):
3712 3712 """Copy this revlog to another, possibly with format changes.
3713 3713
3714 3714 The destination revlog will contain the same revisions and nodes.
3715 3715 However, it may not be bit-for-bit identical due to e.g. delta encoding
3716 3716 differences.
3717 3717
3718 3718 The ``deltareuse`` argument control how deltas from the existing revlog
3719 3719 are preserved in the destination revlog. The argument can have the
3720 3720 following values:
3721 3721
3722 3722 DELTAREUSEALWAYS
3723 3723 Deltas will always be reused (if possible), even if the destination
3724 3724 revlog would not select the same revisions for the delta. This is the
3725 3725 fastest mode of operation.
3726 3726 DELTAREUSESAMEREVS
3727 3727 Deltas will be reused if the destination revlog would pick the same
3728 3728 revisions for the delta. This mode strikes a balance between speed
3729 3729 and optimization.
3730 3730 DELTAREUSENEVER
3731 3731 Deltas will never be reused. This is the slowest mode of execution.
3732 3732 This mode can be used to recompute deltas (e.g. if the diff/delta
3733 3733 algorithm changes).
3734 3734 DELTAREUSEFULLADD
3735 3735 Revision will be re-added as if their were new content. This is
3736 3736 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3737 3737 eg: large file detection and handling.
3738 3738
3739 3739 Delta computation can be slow, so the choice of delta reuse policy can
3740 3740 significantly affect run time.
3741 3741
3742 3742 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3743 3743 two extremes. Deltas will be reused if they are appropriate. But if the
3744 3744 delta could choose a better revision, it will do so. This means if you
3745 3745 are converting a non-generaldelta revlog to a generaldelta revlog,
3746 3746 deltas will be recomputed if the delta's parent isn't a parent of the
3747 3747 revision.
3748 3748
3749 3749 In addition to the delta policy, the ``forcedeltabothparents``
3750 3750 argument controls whether to force compute deltas against both parents
3751 3751 for merges. By default, the current default is used.
3752 3752
3753 3753 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3754 3754 `sidedata_helpers`.
3755 3755 """
3756 3756 if deltareuse not in self.DELTAREUSEALL:
3757 3757 raise ValueError(
3758 3758 _(b'value for deltareuse invalid: %s') % deltareuse
3759 3759 )
3760 3760
3761 3761 if len(destrevlog):
3762 3762 raise ValueError(_(b'destination revlog is not empty'))
3763 3763
3764 3764 if getattr(self, 'filteredrevs', None):
3765 3765 raise ValueError(_(b'source revlog has filtered revisions'))
3766 3766 if getattr(destrevlog, 'filteredrevs', None):
3767 3767 raise ValueError(_(b'destination revlog has filtered revisions'))
3768 3768
3769 3769 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3770 3770 # if possible.
3771 3771 old_delta_config = destrevlog.delta_config
3772 3772 destrevlog.delta_config = destrevlog.delta_config.copy()
3773 3773
3774 3774 try:
3775 3775 if deltareuse == self.DELTAREUSEALWAYS:
3776 3776 destrevlog.delta_config.lazy_delta_base = True
3777 3777 destrevlog.delta_config.lazy_delta = True
3778 3778 elif deltareuse == self.DELTAREUSESAMEREVS:
3779 3779 destrevlog.delta_config.lazy_delta_base = False
3780 3780 destrevlog.delta_config.lazy_delta = True
3781 3781 elif deltareuse == self.DELTAREUSENEVER:
3782 3782 destrevlog.delta_config.lazy_delta_base = False
3783 3783 destrevlog.delta_config.lazy_delta = False
3784 3784
3785 3785 delta_both_parents = (
3786 3786 forcedeltabothparents or old_delta_config.delta_both_parents
3787 3787 )
3788 3788 destrevlog.delta_config.delta_both_parents = delta_both_parents
3789 3789
3790 3790 with self.reading(), destrevlog._writing(tr):
3791 3791 self._clone(
3792 3792 tr,
3793 3793 destrevlog,
3794 3794 addrevisioncb,
3795 3795 deltareuse,
3796 3796 forcedeltabothparents,
3797 3797 sidedata_helpers,
3798 3798 )
3799 3799
3800 3800 finally:
3801 3801 destrevlog.delta_config = old_delta_config
3802 3802
3803 3803 def _clone(
3804 3804 self,
3805 3805 tr,
3806 3806 destrevlog,
3807 3807 addrevisioncb,
3808 3808 deltareuse,
3809 3809 forcedeltabothparents,
3810 3810 sidedata_helpers,
3811 3811 ):
3812 3812 """perform the core duty of `revlog.clone` after parameter processing"""
3813 3813 write_debug = None
3814 3814 if self.delta_config.debug_delta:
3815 3815 write_debug = tr._report
3816 3816 deltacomputer = deltautil.deltacomputer(
3817 3817 destrevlog,
3818 3818 write_debug=write_debug,
3819 3819 )
3820 3820 index = self.index
3821 3821 for rev in self:
3822 3822 entry = index[rev]
3823 3823
3824 3824 # Some classes override linkrev to take filtered revs into
3825 3825 # account. Use raw entry from index.
3826 3826 flags = entry[0] & 0xFFFF
3827 3827 linkrev = entry[4]
3828 3828 p1 = index[entry[5]][7]
3829 3829 p2 = index[entry[6]][7]
3830 3830 node = entry[7]
3831 3831
3832 3832 # (Possibly) reuse the delta from the revlog if allowed and
3833 3833 # the revlog chunk is a delta.
3834 3834 cachedelta = None
3835 3835 rawtext = None
3836 3836 if deltareuse == self.DELTAREUSEFULLADD:
3837 3837 text = self._revisiondata(rev)
3838 3838 sidedata = self.sidedata(rev)
3839 3839
3840 3840 if sidedata_helpers is not None:
3841 3841 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3842 3842 self, sidedata_helpers, sidedata, rev
3843 3843 )
3844 3844 flags = flags | new_flags[0] & ~new_flags[1]
3845 3845
3846 3846 destrevlog.addrevision(
3847 3847 text,
3848 3848 tr,
3849 3849 linkrev,
3850 3850 p1,
3851 3851 p2,
3852 3852 cachedelta=cachedelta,
3853 3853 node=node,
3854 3854 flags=flags,
3855 3855 deltacomputer=deltacomputer,
3856 3856 sidedata=sidedata,
3857 3857 )
3858 3858 else:
3859 3859 if destrevlog.delta_config.lazy_delta:
3860 3860 dp = self.deltaparent(rev)
3861 3861 if dp != nullrev:
3862 3862 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3863 3863
3864 3864 sidedata = None
3865 3865 if not cachedelta:
3866 3866 try:
3867 3867 rawtext = self._revisiondata(rev)
3868 3868 except error.CensoredNodeError as censored:
3869 3869 assert flags & REVIDX_ISCENSORED
3870 3870 rawtext = censored.tombstone
3871 3871 sidedata = self.sidedata(rev)
3872 3872 if sidedata is None:
3873 3873 sidedata = self.sidedata(rev)
3874 3874
3875 3875 if sidedata_helpers is not None:
3876 3876 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3877 3877 self, sidedata_helpers, sidedata, rev
3878 3878 )
3879 3879 flags = flags | new_flags[0] & ~new_flags[1]
3880 3880
3881 3881 destrevlog._addrevision(
3882 3882 node,
3883 3883 rawtext,
3884 3884 tr,
3885 3885 linkrev,
3886 3886 p1,
3887 3887 p2,
3888 3888 flags,
3889 3889 cachedelta,
3890 3890 deltacomputer=deltacomputer,
3891 3891 sidedata=sidedata,
3892 3892 )
3893 3893
3894 3894 if addrevisioncb:
3895 3895 addrevisioncb(self, rev, node)
3896 3896
3897 3897 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3898 3898 if self._format_version == REVLOGV0:
3899 3899 raise error.RevlogError(
3900 3900 _(b'cannot censor with version %d revlogs')
3901 3901 % self._format_version
3902 3902 )
3903 3903 elif self._format_version == REVLOGV1:
3904 3904 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3905 3905 else:
3906 3906 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3907 3907
3908 3908 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3909 3909 """Verifies the integrity of the revlog.
3910 3910
3911 3911 Yields ``revlogproblem`` instances describing problems that are
3912 3912 found.
3913 3913 """
3914 3914 dd, di = self.checksize()
3915 3915 if dd:
3916 3916 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3917 3917 if di:
3918 3918 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3919 3919
3920 3920 version = self._format_version
3921 3921
3922 3922 # The verifier tells us what version revlog we should be.
3923 3923 if version != state[b'expectedversion']:
3924 3924 yield revlogproblem(
3925 3925 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3926 3926 % (self.display_id, version, state[b'expectedversion'])
3927 3927 )
3928 3928
3929 3929 state[b'skipread'] = set()
3930 3930 state[b'safe_renamed'] = set()
3931 3931
3932 3932 for rev in self:
3933 3933 node = self.node(rev)
3934 3934
3935 3935 # Verify contents. 4 cases to care about:
3936 3936 #
3937 3937 # common: the most common case
3938 3938 # rename: with a rename
3939 3939 # meta: file content starts with b'\1\n', the metadata
3940 3940 # header defined in filelog.py, but without a rename
3941 3941 # ext: content stored externally
3942 3942 #
3943 3943 # More formally, their differences are shown below:
3944 3944 #
3945 3945 # | common | rename | meta | ext
3946 3946 # -------------------------------------------------------
3947 3947 # flags() | 0 | 0 | 0 | not 0
3948 3948 # renamed() | False | True | False | ?
3949 3949 # rawtext[0:2]=='\1\n'| False | True | True | ?
3950 3950 #
3951 3951 # "rawtext" means the raw text stored in revlog data, which
3952 3952 # could be retrieved by "rawdata(rev)". "text"
3953 3953 # mentioned below is "revision(rev)".
3954 3954 #
3955 3955 # There are 3 different lengths stored physically:
3956 3956 # 1. L1: rawsize, stored in revlog index
3957 3957 # 2. L2: len(rawtext), stored in revlog data
3958 3958 # 3. L3: len(text), stored in revlog data if flags==0, or
3959 3959 # possibly somewhere else if flags!=0
3960 3960 #
3961 3961 # L1 should be equal to L2. L3 could be different from them.
3962 3962 # "text" may or may not affect commit hash depending on flag
3963 3963 # processors (see flagutil.addflagprocessor).
3964 3964 #
3965 3965 # | common | rename | meta | ext
3966 3966 # -------------------------------------------------
3967 3967 # rawsize() | L1 | L1 | L1 | L1
3968 3968 # size() | L1 | L2-LM | L1(*) | L1 (?)
3969 3969 # len(rawtext) | L2 | L2 | L2 | L2
3970 3970 # len(text) | L2 | L2 | L2 | L3
3971 3971 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3972 3972 #
3973 3973 # LM: length of metadata, depending on rawtext
3974 3974 # (*): not ideal, see comment in filelog.size
3975 3975 # (?): could be "- len(meta)" if the resolved content has
3976 3976 # rename metadata
3977 3977 #
3978 3978 # Checks needed to be done:
3979 3979 # 1. length check: L1 == L2, in all cases.
3980 3980 # 2. hash check: depending on flag processor, we may need to
3981 3981 # use either "text" (external), or "rawtext" (in revlog).
3982 3982
3983 3983 try:
3984 3984 skipflags = state.get(b'skipflags', 0)
3985 3985 if skipflags:
3986 3986 skipflags &= self.flags(rev)
3987 3987
3988 3988 _verify_revision(self, skipflags, state, node)
3989 3989
3990 3990 l1 = self.rawsize(rev)
3991 3991 l2 = len(self.rawdata(node))
3992 3992
3993 3993 if l1 != l2:
3994 3994 yield revlogproblem(
3995 3995 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3996 3996 node=node,
3997 3997 )
3998 3998
3999 3999 except error.CensoredNodeError:
4000 4000 if state[b'erroroncensored']:
4001 4001 yield revlogproblem(
4002 4002 error=_(b'censored file data'), node=node
4003 4003 )
4004 4004 state[b'skipread'].add(node)
4005 4005 except Exception as e:
4006 4006 yield revlogproblem(
4007 4007 error=_(b'unpacking %s: %s')
4008 4008 % (short(node), stringutil.forcebytestr(e)),
4009 4009 node=node,
4010 4010 )
4011 4011 state[b'skipread'].add(node)
4012 4012
4013 4013 def storageinfo(
4014 4014 self,
4015 4015 exclusivefiles=False,
4016 4016 sharedfiles=False,
4017 4017 revisionscount=False,
4018 4018 trackedsize=False,
4019 4019 storedsize=False,
4020 4020 ):
4021 4021 d = {}
4022 4022
4023 4023 if exclusivefiles:
4024 4024 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4025 4025 if not self._inline:
4026 4026 d[b'exclusivefiles'].append((self.opener, self._datafile))
4027 4027
4028 4028 if sharedfiles:
4029 4029 d[b'sharedfiles'] = []
4030 4030
4031 4031 if revisionscount:
4032 4032 d[b'revisionscount'] = len(self)
4033 4033
4034 4034 if trackedsize:
4035 4035 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4036 4036
4037 4037 if storedsize:
4038 4038 d[b'storedsize'] = sum(
4039 4039 self.opener.stat(path).st_size for path in self.files()
4040 4040 )
4041 4041
4042 4042 return d
4043 4043
4044 4044 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4045 4045 if not self.feature_config.has_side_data:
4046 4046 return
4047 4047 # revlog formats with sidedata support does not support inline
4048 4048 assert not self._inline
4049 4049 if not helpers[1] and not helpers[2]:
4050 4050 # Nothing to generate or remove
4051 4051 return
4052 4052
4053 4053 new_entries = []
4054 4054 # append the new sidedata
4055 4055 with self._writing(transaction):
4056 4056 ifh, dfh, sdfh = self._inner._writinghandles
4057 4057 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4058 4058
4059 4059 current_offset = sdfh.tell()
4060 4060 for rev in range(startrev, endrev + 1):
4061 4061 entry = self.index[rev]
4062 4062 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4063 4063 store=self,
4064 4064 sidedata_helpers=helpers,
4065 4065 sidedata={},
4066 4066 rev=rev,
4067 4067 )
4068 4068
4069 4069 serialized_sidedata = sidedatautil.serialize_sidedata(
4070 4070 new_sidedata
4071 4071 )
4072 4072
4073 4073 sidedata_compression_mode = COMP_MODE_INLINE
4074 4074 if serialized_sidedata and self.feature_config.has_side_data:
4075 4075 sidedata_compression_mode = COMP_MODE_PLAIN
4076 4076 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4077 4077 if (
4078 4078 h != b'u'
4079 4079 and comp_sidedata[0] != b'\0'
4080 4080 and len(comp_sidedata) < len(serialized_sidedata)
4081 4081 ):
4082 4082 assert not h
4083 4083 if (
4084 4084 comp_sidedata[0]
4085 4085 == self._docket.default_compression_header
4086 4086 ):
4087 4087 sidedata_compression_mode = COMP_MODE_DEFAULT
4088 4088 serialized_sidedata = comp_sidedata
4089 4089 else:
4090 4090 sidedata_compression_mode = COMP_MODE_INLINE
4091 4091 serialized_sidedata = comp_sidedata
4092 4092 if entry[8] != 0 or entry[9] != 0:
4093 4093 # rewriting entries that already have sidedata is not
4094 4094 # supported yet, because it introduces garbage data in the
4095 4095 # revlog.
4096 4096 msg = b"rewriting existing sidedata is not supported yet"
4097 4097 raise error.Abort(msg)
4098 4098
4099 4099 # Apply (potential) flags to add and to remove after running
4100 4100 # the sidedata helpers
4101 4101 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4102 4102 entry_update = (
4103 4103 current_offset,
4104 4104 len(serialized_sidedata),
4105 4105 new_offset_flags,
4106 4106 sidedata_compression_mode,
4107 4107 )
4108 4108
4109 4109 # the sidedata computation might have move the file cursors around
4110 4110 sdfh.seek(current_offset, os.SEEK_SET)
4111 4111 sdfh.write(serialized_sidedata)
4112 4112 new_entries.append(entry_update)
4113 4113 current_offset += len(serialized_sidedata)
4114 4114 self._docket.sidedata_end = sdfh.tell()
4115 4115
4116 4116 # rewrite the new index entries
4117 4117 ifh.seek(startrev * self.index.entry_size)
4118 4118 for i, e in enumerate(new_entries):
4119 4119 rev = startrev + i
4120 4120 self.index.replace_sidedata_info(rev, *e)
4121 4121 packed = self.index.entry_binary(rev)
4122 4122 if rev == 0 and self._docket is None:
4123 4123 header = self._format_flags | self._format_version
4124 4124 header = self.index.pack_header(header)
4125 4125 packed = header + packed
4126 4126 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now