##// END OF EJS Templates
rust-pyo3: plugging in the dagop module...
Georges Racinet -
r53312:4eec920b default
parent child Browse files
Show More
@@ -1,4244 +1,4244
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import annotations
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import io
21 21 import os
22 22 import struct
23 23 import typing
24 24 import weakref
25 25 import zlib
26 26
27 27 from typing import (
28 28 Iterable,
29 29 Iterator,
30 30 Optional,
31 31 Tuple,
32 32 )
33 33
34 34 # import stuff from node for others to import from revlog
35 35 from .node import (
36 36 bin,
37 37 hex,
38 38 nullrev,
39 39 sha1nodeconstants,
40 40 short,
41 41 wdirrev,
42 42 )
43 43 from .i18n import _
44 44 from .revlogutils.constants import (
45 45 ALL_KINDS,
46 46 CHANGELOGV2,
47 47 COMP_MODE_DEFAULT,
48 48 COMP_MODE_INLINE,
49 49 COMP_MODE_PLAIN,
50 50 DELTA_BASE_REUSE_NO,
51 51 DELTA_BASE_REUSE_TRY,
52 52 ENTRY_RANK,
53 53 FEATURES_BY_VERSION,
54 54 FLAG_GENERALDELTA,
55 55 FLAG_INLINE_DATA,
56 56 INDEX_HEADER,
57 57 KIND_CHANGELOG,
58 58 KIND_FILELOG,
59 59 RANK_UNKNOWN,
60 60 REVLOGV0,
61 61 REVLOGV1,
62 62 REVLOGV1_FLAGS,
63 63 REVLOGV2,
64 64 REVLOGV2_FLAGS,
65 65 REVLOG_DEFAULT_FLAGS,
66 66 REVLOG_DEFAULT_FORMAT,
67 67 REVLOG_DEFAULT_VERSION,
68 68 SUPPORTED_FLAGS,
69 69 )
70 70 from .revlogutils.flagutil import (
71 71 REVIDX_DEFAULT_FLAGS,
72 72 REVIDX_ELLIPSIS,
73 73 REVIDX_EXTSTORED,
74 74 REVIDX_FLAGS_ORDER,
75 75 REVIDX_HASCOPIESINFO,
76 76 REVIDX_ISCENSORED,
77 77 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 78 )
79 79 from .thirdparty import attr
80 80
81 81 # Force pytype to use the non-vendored package
82 82 if typing.TYPE_CHECKING:
83 83 # noinspection PyPackageRequirements
84 84 import attr
85 85 from .pure.parsers import BaseIndexObject
86 86
87 87 from . import (
88 88 ancestor,
89 89 dagop,
90 90 error,
91 91 mdiff,
92 92 policy,
93 93 pycompat,
94 94 revlogutils,
95 95 templatefilters,
96 96 util,
97 97 vfs as vfsmod,
98 98 )
99 99 from .interfaces import (
100 100 repository,
101 101 )
102 102 from .revlogutils import (
103 103 deltas as deltautil,
104 104 docket as docketutil,
105 105 flagutil,
106 106 nodemap as nodemaputil,
107 107 randomaccessfile,
108 108 revlogv0,
109 109 rewrite,
110 110 sidedata as sidedatautil,
111 111 )
112 112 from .utils import (
113 113 storageutil,
114 114 stringutil,
115 115 )
116 116
117 117 # blanked usage of all the name to prevent pyflakes constraints
118 118 # We need these name available in the module for extensions.
119 119
120 120 REVLOGV0
121 121 REVLOGV1
122 122 REVLOGV2
123 123 CHANGELOGV2
124 124 FLAG_INLINE_DATA
125 125 FLAG_GENERALDELTA
126 126 REVLOG_DEFAULT_FLAGS
127 127 REVLOG_DEFAULT_FORMAT
128 128 REVLOG_DEFAULT_VERSION
129 129 REVLOGV1_FLAGS
130 130 REVLOGV2_FLAGS
131 131 REVIDX_ISCENSORED
132 132 REVIDX_ELLIPSIS
133 133 REVIDX_HASCOPIESINFO
134 134 REVIDX_EXTSTORED
135 135 REVIDX_DEFAULT_FLAGS
136 136 REVIDX_FLAGS_ORDER
137 137 REVIDX_RAWTEXT_CHANGING_FLAGS
138 138
139 139 parsers = policy.importmod('parsers')
140 140 rustancestor = policy.importrust('ancestor')
141 rustdagop = policy.importrust('dagop')
141 rustdagop = policy.importrust('dagop', pyo3=True)
142 142 rustrevlog = policy.importrust('revlog')
143 143
144 144 # Aliased for performance.
145 145 _zlibdecompress = zlib.decompress
146 146
147 147 # max size of inline data embedded into a revlog
148 148 _maxinline = 131072
149 149
150 150
151 151 # Flag processors for REVIDX_ELLIPSIS.
152 152 def ellipsisreadprocessor(rl, text):
153 153 return text, False
154 154
155 155
156 156 def ellipsiswriteprocessor(rl, text):
157 157 return text, False
158 158
159 159
160 160 def ellipsisrawprocessor(rl, text):
161 161 return False
162 162
163 163
164 164 ellipsisprocessor = (
165 165 ellipsisreadprocessor,
166 166 ellipsiswriteprocessor,
167 167 ellipsisrawprocessor,
168 168 )
169 169
170 170
171 171 def _verify_revision(rl, skipflags, state, node):
172 172 """Verify the integrity of the given revlog ``node`` while providing a hook
173 173 point for extensions to influence the operation."""
174 174 if skipflags:
175 175 state[b'skipread'].add(node)
176 176 else:
177 177 # Side-effect: read content and verify hash.
178 178 rl.revision(node)
179 179
180 180
181 181 # True if a fast implementation for persistent-nodemap is available
182 182 #
183 183 # We also consider we have a "fast" implementation in "pure" python because
184 184 # people using pure don't really have performance consideration (and a
185 185 # wheelbarrow of other slowness source)
186 186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 187 parsers, 'BaseIndexObject'
188 188 )
189 189
190 190
191 191 @attr.s(slots=True)
192 192 class revlogrevisiondelta: # (repository.irevisiondelta)
193 193 node = attr.ib()
194 194 p1node = attr.ib()
195 195 p2node = attr.ib()
196 196 basenode = attr.ib()
197 197 flags = attr.ib()
198 198 baserevisionsize = attr.ib()
199 199 revision = attr.ib()
200 200 delta = attr.ib()
201 201 sidedata = attr.ib()
202 202 protocol_flags = attr.ib()
203 203 linknode = attr.ib(default=None)
204 204
205 205
206 206 @attr.s(frozen=True)
207 207 class revlogproblem: # (repository.iverifyproblem)
208 208 warning = attr.ib(default=None, type=Optional[bytes])
209 209 error = attr.ib(default=None, type=Optional[bytes])
210 210 node = attr.ib(default=None, type=Optional[bytes])
211 211
212 212
213 213 def parse_index_v1(data, inline):
214 214 # call the C implementation to parse the index data
215 215 index, cache = parsers.parse_index2(data, inline)
216 216 return index, cache
217 217
218 218
219 219 def parse_index_v2(data, inline):
220 220 # call the C implementation to parse the index data
221 221 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
222 222 return index, cache
223 223
224 224
225 225 def parse_index_cl_v2(data, inline):
226 226 # call the C implementation to parse the index data
227 227 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
228 228 return index, cache
229 229
230 230
231 231 if hasattr(parsers, 'parse_index_devel_nodemap'):
232 232
233 233 def parse_index_v1_nodemap(data, inline):
234 234 index, cache = parsers.parse_index_devel_nodemap(data, inline)
235 235 return index, cache
236 236
237 237 else:
238 238 parse_index_v1_nodemap = None
239 239
240 240
241 241 def parse_index_v1_rust(data, inline, default_header):
242 242 cache = (0, data) if inline else None
243 243 return rustrevlog.Index(data, default_header), cache
244 244
245 245
246 246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
247 247 # signed integer)
248 248 _maxentrysize = 0x7FFFFFFF
249 249
250 250 FILE_TOO_SHORT_MSG = _(
251 251 b'cannot read from revlog %s;'
252 252 b' expected %d bytes from offset %d, data size is %d'
253 253 )
254 254
255 255 hexdigits = b'0123456789abcdefABCDEF'
256 256
257 257
258 258 class _Config:
259 259 def copy(self):
260 260 return self.__class__(**self.__dict__)
261 261
262 262
263 263 @attr.s()
264 264 class FeatureConfig(_Config):
265 265 """Hold configuration values about the available revlog features"""
266 266
267 267 # the default compression engine
268 268 compression_engine = attr.ib(default=b'zlib')
269 269 # compression engines options
270 270 compression_engine_options = attr.ib(default=attr.Factory(dict))
271 271
272 272 # can we use censor on this revlog
273 273 censorable = attr.ib(default=False)
274 274 # does this revlog use the "side data" feature
275 275 has_side_data = attr.ib(default=False)
276 276 # might remove rank configuration once the computation has no impact
277 277 compute_rank = attr.ib(default=False)
278 278 # parent order is supposed to be semantically irrelevant, so we
279 279 # normally resort parents to ensure that the first parent is non-null,
280 280 # if there is a non-null parent at all.
281 281 # filelog abuses the parent order as flag to mark some instances of
282 282 # meta-encoded files, so allow it to disable this behavior.
283 283 canonical_parent_order = attr.ib(default=False)
284 284 # can ellipsis commit be used
285 285 enable_ellipsis = attr.ib(default=False)
286 286
287 287 def copy(self):
288 288 new = super().copy()
289 289 new.compression_engine_options = self.compression_engine_options.copy()
290 290 return new
291 291
292 292
293 293 @attr.s()
294 294 class DataConfig(_Config):
295 295 """Hold configuration value about how the revlog data are read"""
296 296
297 297 # should we try to open the "pending" version of the revlog
298 298 try_pending = attr.ib(default=False)
299 299 # should we try to open the "splitted" version of the revlog
300 300 try_split = attr.ib(default=False)
301 301 # When True, indexfile should be opened with checkambig=True at writing,
302 302 # to avoid file stat ambiguity.
303 303 check_ambig = attr.ib(default=False)
304 304
305 305 # If true, use mmap instead of reading to deal with large index
306 306 mmap_large_index = attr.ib(default=False)
307 307 # how much data is large
308 308 mmap_index_threshold = attr.ib(default=None)
309 309 # How much data to read and cache into the raw revlog data cache.
310 310 chunk_cache_size = attr.ib(default=65536)
311 311
312 312 # The size of the uncompressed cache compared to the largest revision seen.
313 313 uncompressed_cache_factor = attr.ib(default=None)
314 314
315 315 # The number of chunk cached
316 316 uncompressed_cache_count = attr.ib(default=None)
317 317
318 318 # Allow sparse reading of the revlog data
319 319 with_sparse_read = attr.ib(default=False)
320 320 # minimal density of a sparse read chunk
321 321 sr_density_threshold = attr.ib(default=0.50)
322 322 # minimal size of data we skip when performing sparse read
323 323 sr_min_gap_size = attr.ib(default=262144)
324 324
325 325 # are delta encoded against arbitrary bases.
326 326 generaldelta = attr.ib(default=False)
327 327
328 328
329 329 @attr.s()
330 330 class DeltaConfig(_Config):
331 331 """Hold configuration value about how new delta are computed
332 332
333 333 Some attributes are duplicated from DataConfig to help havign each object
334 334 self contained.
335 335 """
336 336
337 337 # can delta be encoded against arbitrary bases.
338 338 general_delta = attr.ib(default=False)
339 339 # Allow sparse writing of the revlog data
340 340 sparse_revlog = attr.ib(default=False)
341 341 # maximum length of a delta chain
342 342 max_chain_len = attr.ib(default=None)
343 343 # Maximum distance between delta chain base start and end
344 344 max_deltachain_span = attr.ib(default=-1)
345 345 # If `upper_bound_comp` is not None, this is the expected maximal gain from
346 346 # compression for the data content.
347 347 upper_bound_comp = attr.ib(default=None)
348 348 # Should we try a delta against both parent
349 349 delta_both_parents = attr.ib(default=True)
350 350 # Test delta base candidate group by chunk of this maximal size.
351 351 candidate_group_chunk_size = attr.ib(default=0)
352 352 # Should we display debug information about delta computation
353 353 debug_delta = attr.ib(default=False)
354 354 # trust incoming delta by default
355 355 lazy_delta = attr.ib(default=True)
356 356 # trust the base of incoming delta by default
357 357 lazy_delta_base = attr.ib(default=False)
358 358
359 359
360 360 class _InnerRevlog:
361 361 """An inner layer of the revlog object
362 362
363 363 That layer exist to be able to delegate some operation to Rust, its
364 364 boundaries are arbitrary and based on what we can delegate to Rust.
365 365 """
366 366
367 367 opener: vfsmod.vfs
368 368
369 369 def __init__(
370 370 self,
371 371 opener: vfsmod.vfs,
372 372 index,
373 373 index_file,
374 374 data_file,
375 375 sidedata_file,
376 376 inline,
377 377 data_config,
378 378 delta_config,
379 379 feature_config,
380 380 chunk_cache,
381 381 default_compression_header,
382 382 ):
383 383 self.opener = opener
384 384 self.index: BaseIndexObject = index
385 385
386 386 self.index_file = index_file
387 387 self.data_file = data_file
388 388 self.sidedata_file = sidedata_file
389 389 self.inline = inline
390 390 self.data_config = data_config
391 391 self.delta_config = delta_config
392 392 self.feature_config = feature_config
393 393
394 394 # used during diverted write.
395 395 self._orig_index_file = None
396 396
397 397 self._default_compression_header = default_compression_header
398 398
399 399 # index
400 400
401 401 # 3-tuple of file handles being used for active writing.
402 402 self._writinghandles = None
403 403
404 404 self._segmentfile = randomaccessfile.randomaccessfile(
405 405 self.opener,
406 406 (self.index_file if self.inline else self.data_file),
407 407 self.data_config.chunk_cache_size,
408 408 chunk_cache,
409 409 )
410 410 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
411 411 self.opener,
412 412 self.sidedata_file,
413 413 self.data_config.chunk_cache_size,
414 414 )
415 415
416 416 # revlog header -> revlog compressor
417 417 self._decompressors = {}
418 418 # 3-tuple of (node, rev, text) for a raw revision.
419 419 self._revisioncache = None
420 420
421 421 # cache some uncompressed chunks
422 422 # rev β†’ uncompressed_chunk
423 423 #
424 424 # the max cost is dynamically updated to be proportionnal to the
425 425 # size of revision we actually encounter.
426 426 self._uncompressed_chunk_cache = None
427 427 if self.data_config.uncompressed_cache_factor is not None:
428 428 self._uncompressed_chunk_cache = util.lrucachedict(
429 429 self.data_config.uncompressed_cache_count,
430 430 maxcost=65536, # some arbitrary initial value
431 431 )
432 432
433 433 self._delay_buffer = None
434 434
435 435 def __len__(self):
436 436 return len(self.index)
437 437
438 438 def clear_cache(self):
439 439 assert not self.is_delaying
440 440 self._revisioncache = None
441 441 if self._uncompressed_chunk_cache is not None:
442 442 self._uncompressed_chunk_cache.clear()
443 443 self._segmentfile.clear_cache()
444 444 self._segmentfile_sidedata.clear_cache()
445 445
446 446 @property
447 447 def canonical_index_file(self):
448 448 if self._orig_index_file is not None:
449 449 return self._orig_index_file
450 450 return self.index_file
451 451
452 452 @property
453 453 def is_delaying(self):
454 454 """is the revlog is currently delaying the visibility of written data?
455 455
456 456 The delaying mechanism can be either in-memory or written on disk in a
457 457 side-file."""
458 458 return (self._delay_buffer is not None) or (
459 459 self._orig_index_file is not None
460 460 )
461 461
462 462 # Derived from index values.
463 463
464 464 def start(self, rev):
465 465 """the offset of the data chunk for this revision"""
466 466 return int(self.index[rev][0] >> 16)
467 467
468 468 def length(self, rev):
469 469 """the length of the data chunk for this revision"""
470 470 return self.index[rev][1]
471 471
472 472 def end(self, rev):
473 473 """the end of the data chunk for this revision"""
474 474 return self.start(rev) + self.length(rev)
475 475
476 476 def deltaparent(self, rev):
477 477 """return deltaparent of the given revision"""
478 478 base = self.index[rev][3]
479 479 if base == rev:
480 480 return nullrev
481 481 elif self.delta_config.general_delta:
482 482 return base
483 483 else:
484 484 return rev - 1
485 485
486 486 def issnapshot(self, rev):
487 487 """tells whether rev is a snapshot"""
488 488 if not self.delta_config.sparse_revlog:
489 489 return self.deltaparent(rev) == nullrev
490 490 elif hasattr(self.index, 'issnapshot'):
491 491 # directly assign the method to cache the testing and access
492 492 self.issnapshot = self.index.issnapshot
493 493 return self.issnapshot(rev)
494 494 if rev == nullrev:
495 495 return True
496 496 entry = self.index[rev]
497 497 base = entry[3]
498 498 if base == rev:
499 499 return True
500 500 if base == nullrev:
501 501 return True
502 502 p1 = entry[5]
503 503 while self.length(p1) == 0:
504 504 b = self.deltaparent(p1)
505 505 if b == p1:
506 506 break
507 507 p1 = b
508 508 p2 = entry[6]
509 509 while self.length(p2) == 0:
510 510 b = self.deltaparent(p2)
511 511 if b == p2:
512 512 break
513 513 p2 = b
514 514 if base == p1 or base == p2:
515 515 return False
516 516 return self.issnapshot(base)
517 517
518 518 def _deltachain(self, rev, stoprev=None):
519 519 """Obtain the delta chain for a revision.
520 520
521 521 ``stoprev`` specifies a revision to stop at. If not specified, we
522 522 stop at the base of the chain.
523 523
524 524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
525 525 revs in ascending order and ``stopped`` is a bool indicating whether
526 526 ``stoprev`` was hit.
527 527 """
528 528 generaldelta = self.delta_config.general_delta
529 529 # Try C implementation.
530 530 try:
531 531 return self.index.deltachain(
532 532 rev, stoprev, generaldelta
533 533 ) # pytype: disable=attribute-error
534 534 except AttributeError:
535 535 pass
536 536
537 537 chain = []
538 538
539 539 # Alias to prevent attribute lookup in tight loop.
540 540 index = self.index
541 541
542 542 iterrev = rev
543 543 e = index[iterrev]
544 544 while iterrev != e[3] and iterrev != stoprev:
545 545 chain.append(iterrev)
546 546 if generaldelta:
547 547 iterrev = e[3]
548 548 else:
549 549 iterrev -= 1
550 550 e = index[iterrev]
551 551
552 552 if iterrev == stoprev:
553 553 stopped = True
554 554 else:
555 555 chain.append(iterrev)
556 556 stopped = False
557 557
558 558 chain.reverse()
559 559 return chain, stopped
560 560
561 561 @util.propertycache
562 562 def _compressor(self):
563 563 engine = util.compengines[self.feature_config.compression_engine]
564 564 return engine.revlogcompressor(
565 565 self.feature_config.compression_engine_options
566 566 )
567 567
568 568 @util.propertycache
569 569 def _decompressor(self):
570 570 """the default decompressor"""
571 571 if self._default_compression_header is None:
572 572 return None
573 573 t = self._default_compression_header
574 574 c = self._get_decompressor(t)
575 575 return c.decompress
576 576
577 577 def _get_decompressor(self, t: bytes):
578 578 try:
579 579 compressor = self._decompressors[t]
580 580 except KeyError:
581 581 try:
582 582 engine = util.compengines.forrevlogheader(t)
583 583 compressor = engine.revlogcompressor(
584 584 self.feature_config.compression_engine_options
585 585 )
586 586 self._decompressors[t] = compressor
587 587 except KeyError:
588 588 raise error.RevlogError(
589 589 _(b'unknown compression type %s') % binascii.hexlify(t)
590 590 )
591 591 return compressor
592 592
593 593 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
594 594 """Generate a possibly-compressed representation of data."""
595 595 if not data:
596 596 return b'', data
597 597
598 598 compressed = self._compressor.compress(data)
599 599
600 600 if compressed:
601 601 # The revlog compressor added the header in the returned data.
602 602 return b'', compressed
603 603
604 604 if data[0:1] == b'\0':
605 605 return b'', data
606 606 return b'u', data
607 607
608 608 def decompress(self, data: bytes):
609 609 """Decompress a revlog chunk.
610 610
611 611 The chunk is expected to begin with a header identifying the
612 612 format type so it can be routed to an appropriate decompressor.
613 613 """
614 614 if not data:
615 615 return data
616 616
617 617 # Revlogs are read much more frequently than they are written and many
618 618 # chunks only take microseconds to decompress, so performance is
619 619 # important here.
620 620 #
621 621 # We can make a few assumptions about revlogs:
622 622 #
623 623 # 1) the majority of chunks will be compressed (as opposed to inline
624 624 # raw data).
625 625 # 2) decompressing *any* data will likely by at least 10x slower than
626 626 # returning raw inline data.
627 627 # 3) we want to prioritize common and officially supported compression
628 628 # engines
629 629 #
630 630 # It follows that we want to optimize for "decompress compressed data
631 631 # when encoded with common and officially supported compression engines"
632 632 # case over "raw data" and "data encoded by less common or non-official
633 633 # compression engines." That is why we have the inline lookup first
634 634 # followed by the compengines lookup.
635 635 #
636 636 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
637 637 # compressed chunks. And this matters for changelog and manifest reads.
638 638 t = data[0:1]
639 639
640 640 if t == b'x':
641 641 try:
642 642 return _zlibdecompress(data)
643 643 except zlib.error as e:
644 644 raise error.RevlogError(
645 645 _(b'revlog decompress error: %s')
646 646 % stringutil.forcebytestr(e)
647 647 )
648 648 # '\0' is more common than 'u' so it goes first.
649 649 elif t == b'\0':
650 650 return data
651 651 elif t == b'u':
652 652 return util.buffer(data, 1)
653 653
654 654 compressor = self._get_decompressor(t)
655 655
656 656 return compressor.decompress(data)
657 657
658 658 @contextlib.contextmanager
659 659 def reading(self):
660 660 """Context manager that keeps data and sidedata files open for reading"""
661 661 if len(self.index) == 0:
662 662 yield # nothing to be read
663 663 elif self._delay_buffer is not None and self.inline:
664 664 msg = "revlog with delayed write should not be inline"
665 665 raise error.ProgrammingError(msg)
666 666 else:
667 667 with self._segmentfile.reading():
668 668 with self._segmentfile_sidedata.reading():
669 669 yield
670 670
671 671 @property
672 672 def is_writing(self):
673 673 """True is a writing context is open"""
674 674 return self._writinghandles is not None
675 675
676 676 @property
677 677 def is_open(self):
678 678 """True if any file handle is being held
679 679
680 680 Used for assert and debug in the python code"""
681 681 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
682 682
683 683 @contextlib.contextmanager
684 684 def writing(self, transaction, data_end=None, sidedata_end=None):
685 685 """Open the revlog files for writing
686 686
687 687 Add content to a revlog should be done within such context.
688 688 """
689 689 if self.is_writing:
690 690 yield
691 691 else:
692 692 ifh = dfh = sdfh = None
693 693 try:
694 694 r = len(self.index)
695 695 # opening the data file.
696 696 dsize = 0
697 697 if r:
698 698 dsize = self.end(r - 1)
699 699 dfh = None
700 700 if not self.inline:
701 701 try:
702 702 dfh = self.opener(self.data_file, mode=b"r+")
703 703 if data_end is None:
704 704 dfh.seek(0, os.SEEK_END)
705 705 else:
706 706 dfh.seek(data_end, os.SEEK_SET)
707 707 except FileNotFoundError:
708 708 dfh = self.opener(self.data_file, mode=b"w+")
709 709 transaction.add(self.data_file, dsize)
710 710 if self.sidedata_file is not None:
711 711 assert sidedata_end is not None
712 712 # revlog-v2 does not inline, help Pytype
713 713 assert dfh is not None
714 714 try:
715 715 sdfh = self.opener(self.sidedata_file, mode=b"r+")
716 716 dfh.seek(sidedata_end, os.SEEK_SET)
717 717 except FileNotFoundError:
718 718 sdfh = self.opener(self.sidedata_file, mode=b"w+")
719 719 transaction.add(self.sidedata_file, sidedata_end)
720 720
721 721 # opening the index file.
722 722 isize = r * self.index.entry_size
723 723 ifh = self.__index_write_fp()
724 724 if self.inline:
725 725 transaction.add(self.index_file, dsize + isize)
726 726 else:
727 727 transaction.add(self.index_file, isize)
728 728 # exposing all file handle for writing.
729 729 self._writinghandles = (ifh, dfh, sdfh)
730 730 self._segmentfile.writing_handle = ifh if self.inline else dfh
731 731 self._segmentfile_sidedata.writing_handle = sdfh
732 732 yield
733 733 finally:
734 734 self._writinghandles = None
735 735 self._segmentfile.writing_handle = None
736 736 self._segmentfile_sidedata.writing_handle = None
737 737 if dfh is not None:
738 738 dfh.close()
739 739 if sdfh is not None:
740 740 sdfh.close()
741 741 # closing the index file last to avoid exposing referent to
742 742 # potential unflushed data content.
743 743 if ifh is not None:
744 744 ifh.close()
745 745
746 746 def __index_write_fp(self, index_end=None):
747 747 """internal method to open the index file for writing
748 748
749 749 You should not use this directly and use `_writing` instead
750 750 """
751 751 try:
752 752 if self._delay_buffer is None:
753 753 f = self.opener(
754 754 self.index_file,
755 755 mode=b"r+",
756 756 checkambig=self.data_config.check_ambig,
757 757 )
758 758 else:
759 759 # check_ambig affect we way we open file for writing, however
760 760 # here, we do not actually open a file for writting as write
761 761 # will appened to a delay_buffer. So check_ambig is not
762 762 # meaningful and unneeded here.
763 763 f = randomaccessfile.appender(
764 764 self.opener, self.index_file, b"r+", self._delay_buffer
765 765 )
766 766 if index_end is None:
767 767 f.seek(0, os.SEEK_END)
768 768 else:
769 769 f.seek(index_end, os.SEEK_SET)
770 770 return f
771 771 except FileNotFoundError:
772 772 if self._delay_buffer is None:
773 773 return self.opener(
774 774 self.index_file,
775 775 mode=b"w+",
776 776 checkambig=self.data_config.check_ambig,
777 777 )
778 778 else:
779 779 return randomaccessfile.appender(
780 780 self.opener, self.index_file, b"w+", self._delay_buffer
781 781 )
782 782
783 783 def __index_new_fp(self):
784 784 """internal method to create a new index file for writing
785 785
786 786 You should not use this unless you are upgrading from inline revlog
787 787 """
788 788 return self.opener(
789 789 self.index_file,
790 790 mode=b"w",
791 791 checkambig=self.data_config.check_ambig,
792 792 )
793 793
794 794 def split_inline(self, tr, header, new_index_file_path=None):
795 795 """split the data of an inline revlog into an index and a data file"""
796 796 assert self._delay_buffer is None
797 797 existing_handles = False
798 798 if self._writinghandles is not None:
799 799 existing_handles = True
800 800 fp = self._writinghandles[0]
801 801 fp.flush()
802 802 fp.close()
803 803 # We can't use the cached file handle after close(). So prevent
804 804 # its usage.
805 805 self._writinghandles = None
806 806 self._segmentfile.writing_handle = None
807 807 # No need to deal with sidedata writing handle as it is only
808 808 # relevant with revlog-v2 which is never inline, not reaching
809 809 # this code
810 810
811 811 new_dfh = self.opener(self.data_file, mode=b"w+")
812 812 new_dfh.truncate(0) # drop any potentially existing data
813 813 try:
814 814 with self.reading():
815 815 for r in range(len(self.index)):
816 816 new_dfh.write(self.get_segment_for_revs(r, r)[1])
817 817 new_dfh.flush()
818 818
819 819 if new_index_file_path is not None:
820 820 self.index_file = new_index_file_path
821 821 with self.__index_new_fp() as fp:
822 822 self.inline = False
823 823 for i in range(len(self.index)):
824 824 e = self.index.entry_binary(i)
825 825 if i == 0:
826 826 packed_header = self.index.pack_header(header)
827 827 e = packed_header + e
828 828 fp.write(e)
829 829
830 830 # If we don't use side-write, the temp file replace the real
831 831 # index when we exit the context manager
832 832
833 833 self._segmentfile = randomaccessfile.randomaccessfile(
834 834 self.opener,
835 835 self.data_file,
836 836 self.data_config.chunk_cache_size,
837 837 )
838 838
839 839 if existing_handles:
840 840 # switched from inline to conventional reopen the index
841 841 ifh = self.__index_write_fp()
842 842 self._writinghandles = (ifh, new_dfh, None)
843 843 self._segmentfile.writing_handle = new_dfh
844 844 new_dfh = None
845 845 # No need to deal with sidedata writing handle as it is only
846 846 # relevant with revlog-v2 which is never inline, not reaching
847 847 # this code
848 848 finally:
849 849 if new_dfh is not None:
850 850 new_dfh.close()
851 851 return self.index_file
852 852
853 853 def get_segment_for_revs(self, startrev, endrev):
854 854 """Obtain a segment of raw data corresponding to a range of revisions.
855 855
856 856 Accepts the start and end revisions and an optional already-open
857 857 file handle to be used for reading. If the file handle is read, its
858 858 seek position will not be preserved.
859 859
860 860 Requests for data may be satisfied by a cache.
861 861
862 862 Returns a 2-tuple of (offset, data) for the requested range of
863 863 revisions. Offset is the integer offset from the beginning of the
864 864 revlog and data is a str or buffer of the raw byte data.
865 865
866 866 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
867 867 to determine where each revision's data begins and ends.
868 868
869 869 API: we should consider making this a private part of the InnerRevlog
870 870 at some point.
871 871 """
872 872 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
873 873 # (functions are expensive).
874 874 index = self.index
875 875 istart = index[startrev]
876 876 start = int(istart[0] >> 16)
877 877 if startrev == endrev:
878 878 end = start + istart[1]
879 879 else:
880 880 iend = index[endrev]
881 881 end = int(iend[0] >> 16) + iend[1]
882 882
883 883 if self.inline:
884 884 start += (startrev + 1) * self.index.entry_size
885 885 end += (endrev + 1) * self.index.entry_size
886 886 length = end - start
887 887
888 888 return start, self._segmentfile.read_chunk(start, length)
889 889
890 890 def _chunk(self, rev):
891 891 """Obtain a single decompressed chunk for a revision.
892 892
893 893 Accepts an integer revision and an optional already-open file handle
894 894 to be used for reading. If used, the seek position of the file will not
895 895 be preserved.
896 896
897 897 Returns a str holding uncompressed data for the requested revision.
898 898 """
899 899 if self._uncompressed_chunk_cache is not None:
900 900 uncomp = self._uncompressed_chunk_cache.get(rev)
901 901 if uncomp is not None:
902 902 return uncomp
903 903
904 904 compression_mode = self.index[rev][10]
905 905 data = self.get_segment_for_revs(rev, rev)[1]
906 906 if compression_mode == COMP_MODE_PLAIN:
907 907 uncomp = data
908 908 elif compression_mode == COMP_MODE_DEFAULT:
909 909 uncomp = self._decompressor(data)
910 910 elif compression_mode == COMP_MODE_INLINE:
911 911 uncomp = self.decompress(data)
912 912 else:
913 913 msg = b'unknown compression mode %d'
914 914 msg %= compression_mode
915 915 raise error.RevlogError(msg)
916 916 if self._uncompressed_chunk_cache is not None:
917 917 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
918 918 return uncomp
919 919
920 920 def _chunks(self, revs, targetsize=None):
921 921 """Obtain decompressed chunks for the specified revisions.
922 922
923 923 Accepts an iterable of numeric revisions that are assumed to be in
924 924 ascending order.
925 925
926 926 This function is similar to calling ``self._chunk()`` multiple times,
927 927 but is faster.
928 928
929 929 Returns a list with decompressed data for each requested revision.
930 930 """
931 931 if not revs:
932 932 return []
933 933 start = self.start
934 934 length = self.length
935 935 inline = self.inline
936 936 iosize = self.index.entry_size
937 937 buffer = util.buffer
938 938
939 939 fetched_revs = []
940 940 fadd = fetched_revs.append
941 941
942 942 chunks = []
943 943 ladd = chunks.append
944 944
945 945 if self._uncompressed_chunk_cache is None:
946 946 fetched_revs = revs
947 947 else:
948 948 for rev in revs:
949 949 cached_value = self._uncompressed_chunk_cache.get(rev)
950 950 if cached_value is None:
951 951 fadd(rev)
952 952 else:
953 953 ladd((rev, cached_value))
954 954
955 955 if not fetched_revs:
956 956 slicedchunks = ()
957 957 elif not self.data_config.with_sparse_read:
958 958 slicedchunks = (fetched_revs,)
959 959 else:
960 960 slicedchunks = deltautil.slicechunk(
961 961 self,
962 962 fetched_revs,
963 963 targetsize=targetsize,
964 964 )
965 965
966 966 for revschunk in slicedchunks:
967 967 firstrev = revschunk[0]
968 968 # Skip trailing revisions with empty diff
969 969 for lastrev in revschunk[::-1]:
970 970 if length(lastrev) != 0:
971 971 break
972 972
973 973 try:
974 974 offset, data = self.get_segment_for_revs(firstrev, lastrev)
975 975 except OverflowError:
976 976 # issue4215 - we can't cache a run of chunks greater than
977 977 # 2G on Windows
978 978 for rev in revschunk:
979 979 ladd((rev, self._chunk(rev)))
980 980
981 981 decomp = self.decompress
982 982 # self._decompressor might be None, but will not be used in that case
983 983 def_decomp = self._decompressor
984 984 for rev in revschunk:
985 985 chunkstart = start(rev)
986 986 if inline:
987 987 chunkstart += (rev + 1) * iosize
988 988 chunklength = length(rev)
989 989 comp_mode = self.index[rev][10]
990 990 c = buffer(data, chunkstart - offset, chunklength)
991 991 if comp_mode == COMP_MODE_PLAIN:
992 992 c = c
993 993 elif comp_mode == COMP_MODE_INLINE:
994 994 c = decomp(c)
995 995 elif comp_mode == COMP_MODE_DEFAULT:
996 996 c = def_decomp(c)
997 997 else:
998 998 msg = b'unknown compression mode %d'
999 999 msg %= comp_mode
1000 1000 raise error.RevlogError(msg)
1001 1001 ladd((rev, c))
1002 1002 if self._uncompressed_chunk_cache is not None:
1003 1003 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1004 1004
1005 1005 chunks.sort()
1006 1006 return [x[1] for x in chunks]
1007 1007
1008 1008 def raw_text(self, node, rev) -> bytes:
1009 1009 """return the possibly unvalidated rawtext for a revision
1010 1010
1011 1011 returns rawtext
1012 1012 """
1013 1013
1014 1014 # revision in the cache (could be useful to apply delta)
1015 1015 cachedrev = None
1016 1016 # An intermediate text to apply deltas to
1017 1017 basetext = None
1018 1018
1019 1019 # Check if we have the entry in cache
1020 1020 # The cache entry looks like (node, rev, rawtext)
1021 1021 if self._revisioncache:
1022 1022 cachedrev = self._revisioncache[1]
1023 1023
1024 1024 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1025 1025 if stopped:
1026 1026 basetext = self._revisioncache[2]
1027 1027
1028 1028 # drop cache to save memory, the caller is expected to
1029 1029 # update self._inner._revisioncache after validating the text
1030 1030 self._revisioncache = None
1031 1031
1032 1032 targetsize = None
1033 1033 rawsize = self.index[rev][2]
1034 1034 if 0 <= rawsize:
1035 1035 targetsize = 4 * rawsize
1036 1036
1037 1037 if self._uncompressed_chunk_cache is not None:
1038 1038 # dynamically update the uncompressed_chunk_cache size to the
1039 1039 # largest revision we saw in this revlog.
1040 1040 factor = self.data_config.uncompressed_cache_factor
1041 1041 candidate_size = rawsize * factor
1042 1042 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1043 1043 self._uncompressed_chunk_cache.maxcost = candidate_size
1044 1044
1045 1045 bins = self._chunks(chain, targetsize=targetsize)
1046 1046 if basetext is None:
1047 1047 basetext = bytes(bins[0])
1048 1048 bins = bins[1:]
1049 1049
1050 1050 rawtext = mdiff.patches(basetext, bins)
1051 1051 del basetext # let us have a chance to free memory early
1052 1052 return rawtext
1053 1053
1054 1054 def sidedata(self, rev, sidedata_end):
1055 1055 """Return the sidedata for a given revision number."""
1056 1056 index_entry = self.index[rev]
1057 1057 sidedata_offset = index_entry[8]
1058 1058 sidedata_size = index_entry[9]
1059 1059
1060 1060 if self.inline:
1061 1061 sidedata_offset += self.index.entry_size * (1 + rev)
1062 1062 if sidedata_size == 0:
1063 1063 return {}
1064 1064
1065 1065 if sidedata_end < sidedata_offset + sidedata_size:
1066 1066 filename = self.sidedata_file
1067 1067 end = sidedata_end
1068 1068 offset = sidedata_offset
1069 1069 length = sidedata_size
1070 1070 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1071 1071 raise error.RevlogError(m)
1072 1072
1073 1073 comp_segment = self._segmentfile_sidedata.read_chunk(
1074 1074 sidedata_offset, sidedata_size
1075 1075 )
1076 1076
1077 1077 comp = self.index[rev][11]
1078 1078 if comp == COMP_MODE_PLAIN:
1079 1079 segment = comp_segment
1080 1080 elif comp == COMP_MODE_DEFAULT:
1081 1081 segment = self._decompressor(comp_segment)
1082 1082 elif comp == COMP_MODE_INLINE:
1083 1083 segment = self.decompress(comp_segment)
1084 1084 else:
1085 1085 msg = b'unknown compression mode %d'
1086 1086 msg %= comp
1087 1087 raise error.RevlogError(msg)
1088 1088
1089 1089 sidedata = sidedatautil.deserialize_sidedata(segment)
1090 1090 return sidedata
1091 1091
1092 1092 def write_entry(
1093 1093 self,
1094 1094 transaction,
1095 1095 entry,
1096 1096 data,
1097 1097 link,
1098 1098 offset,
1099 1099 sidedata,
1100 1100 sidedata_offset,
1101 1101 index_end,
1102 1102 data_end,
1103 1103 sidedata_end,
1104 1104 ):
1105 1105 # Files opened in a+ mode have inconsistent behavior on various
1106 1106 # platforms. Windows requires that a file positioning call be made
1107 1107 # when the file handle transitions between reads and writes. See
1108 1108 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1109 1109 # platforms, Python or the platform itself can be buggy. Some versions
1110 1110 # of Solaris have been observed to not append at the end of the file
1111 1111 # if the file was seeked to before the end. See issue4943 for more.
1112 1112 #
1113 1113 # We work around this issue by inserting a seek() before writing.
1114 1114 # Note: This is likely not necessary on Python 3. However, because
1115 1115 # the file handle is reused for reads and may be seeked there, we need
1116 1116 # to be careful before changing this.
1117 1117 if self._writinghandles is None:
1118 1118 msg = b'adding revision outside `revlog._writing` context'
1119 1119 raise error.ProgrammingError(msg)
1120 1120 ifh, dfh, sdfh = self._writinghandles
1121 1121 if index_end is None:
1122 1122 ifh.seek(0, os.SEEK_END)
1123 1123 else:
1124 1124 ifh.seek(index_end, os.SEEK_SET)
1125 1125 if dfh:
1126 1126 if data_end is None:
1127 1127 dfh.seek(0, os.SEEK_END)
1128 1128 else:
1129 1129 dfh.seek(data_end, os.SEEK_SET)
1130 1130 if sdfh:
1131 1131 sdfh.seek(sidedata_end, os.SEEK_SET)
1132 1132
1133 1133 curr = len(self.index) - 1
1134 1134 if not self.inline:
1135 1135 transaction.add(self.data_file, offset)
1136 1136 if self.sidedata_file:
1137 1137 transaction.add(self.sidedata_file, sidedata_offset)
1138 1138 transaction.add(self.canonical_index_file, curr * len(entry))
1139 1139 if data[0]:
1140 1140 dfh.write(data[0])
1141 1141 dfh.write(data[1])
1142 1142 if sidedata:
1143 1143 sdfh.write(sidedata)
1144 1144 if self._delay_buffer is None:
1145 1145 ifh.write(entry)
1146 1146 else:
1147 1147 self._delay_buffer.append(entry)
1148 1148 elif self._delay_buffer is not None:
1149 1149 msg = b'invalid delayed write on inline revlog'
1150 1150 raise error.ProgrammingError(msg)
1151 1151 else:
1152 1152 offset += curr * self.index.entry_size
1153 1153 transaction.add(self.canonical_index_file, offset)
1154 1154 assert not sidedata
1155 1155 ifh.write(entry)
1156 1156 ifh.write(data[0])
1157 1157 ifh.write(data[1])
1158 1158 return (
1159 1159 ifh.tell(),
1160 1160 dfh.tell() if dfh else None,
1161 1161 sdfh.tell() if sdfh else None,
1162 1162 )
1163 1163
1164 1164 def _divert_index(self):
1165 1165 index_file = self.index_file
1166 1166 # when we encounter a legacy inline-changelog, split it. However it is
1167 1167 # important to use the expected filename for pending content
1168 1168 # (<radix>.a) otherwise hooks won't be seeing the content of the
1169 1169 # pending transaction.
1170 1170 if index_file.endswith(b'.s'):
1171 1171 index_file = self.index_file[:-2]
1172 1172 return index_file + b'.a'
1173 1173
1174 1174 def delay(self):
1175 1175 assert not self.is_open
1176 1176 if self.inline:
1177 1177 msg = "revlog with delayed write should not be inline"
1178 1178 raise error.ProgrammingError(msg)
1179 1179 if self._delay_buffer is not None or self._orig_index_file is not None:
1180 1180 # delay or divert already in place
1181 1181 return None
1182 1182 elif len(self.index) == 0:
1183 1183 self._orig_index_file = self.index_file
1184 1184 self.index_file = self._divert_index()
1185 1185 assert self._orig_index_file is not None
1186 1186 assert self.index_file is not None
1187 1187 if self.opener.exists(self.index_file):
1188 1188 self.opener.unlink(self.index_file)
1189 1189 return self.index_file
1190 1190 else:
1191 1191 self._delay_buffer = []
1192 1192 return None
1193 1193
1194 1194 def write_pending(self):
1195 1195 assert not self.is_open
1196 1196 if self.inline:
1197 1197 msg = "revlog with delayed write should not be inline"
1198 1198 raise error.ProgrammingError(msg)
1199 1199 if self._orig_index_file is not None:
1200 1200 return None, True
1201 1201 any_pending = False
1202 1202 pending_index_file = self._divert_index()
1203 1203 if self.opener.exists(pending_index_file):
1204 1204 self.opener.unlink(pending_index_file)
1205 1205 util.copyfile(
1206 1206 self.opener.join(self.index_file),
1207 1207 self.opener.join(pending_index_file),
1208 1208 )
1209 1209 if self._delay_buffer:
1210 1210 with self.opener(pending_index_file, b'r+') as ifh:
1211 1211 ifh.seek(0, os.SEEK_END)
1212 1212 ifh.write(b"".join(self._delay_buffer))
1213 1213 any_pending = True
1214 1214 self._delay_buffer = None
1215 1215 self._orig_index_file = self.index_file
1216 1216 self.index_file = pending_index_file
1217 1217 return self.index_file, any_pending
1218 1218
1219 1219 def finalize_pending(self):
1220 1220 assert not self.is_open
1221 1221 if self.inline:
1222 1222 msg = "revlog with delayed write should not be inline"
1223 1223 raise error.ProgrammingError(msg)
1224 1224
1225 1225 delay = self._delay_buffer is not None
1226 1226 divert = self._orig_index_file is not None
1227 1227
1228 1228 if delay and divert:
1229 1229 assert False, "unreachable"
1230 1230 elif delay:
1231 1231 if self._delay_buffer:
1232 1232 with self.opener(self.index_file, b'r+') as ifh:
1233 1233 ifh.seek(0, os.SEEK_END)
1234 1234 ifh.write(b"".join(self._delay_buffer))
1235 1235 self._delay_buffer = None
1236 1236 elif divert:
1237 1237 if self.opener.exists(self.index_file):
1238 1238 self.opener.rename(
1239 1239 self.index_file,
1240 1240 self._orig_index_file,
1241 1241 checkambig=True,
1242 1242 )
1243 1243 self.index_file = self._orig_index_file
1244 1244 self._orig_index_file = None
1245 1245 else:
1246 1246 msg = b"not delay or divert found on this revlog"
1247 1247 raise error.ProgrammingError(msg)
1248 1248 return self.canonical_index_file
1249 1249
1250 1250
1251 1251 if typing.TYPE_CHECKING:
1252 1252 # Tell Pytype what kind of object we expect
1253 1253 ProxyBase = BaseIndexObject
1254 1254 else:
1255 1255 ProxyBase = object
1256 1256
1257 1257
1258 1258 class RustIndexProxy(ProxyBase):
1259 1259 """Wrapper around the Rust index to fake having direct access to the index.
1260 1260
1261 1261 Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
1262 1262 so we can't expose the index from Rust directly, since the `InnerRevlog`
1263 1263 already has ownership of the index. This object redirects all calls to the
1264 1264 index through the Rust-backed `InnerRevlog` glue which defines all
1265 1265 necessary forwarding methods.
1266 1266 """
1267 1267
1268 1268 def __init__(self, inner):
1269 1269 # Do not rename as it's being used to access the index from Rust
1270 1270 self.inner = inner
1271 1271
1272 1272 # TODO possibly write all index methods manually to save on overhead?
1273 1273 def __getattr__(self, name):
1274 1274 return getattr(self.inner, f"_index_{name}")
1275 1275
1276 1276 # Magic methods need to be defined explicitely
1277 1277 def __len__(self):
1278 1278 return self.inner._index___len__()
1279 1279
1280 1280 def __getitem__(self, key):
1281 1281 return self.inner._index___getitem__(key)
1282 1282
1283 1283 def __contains__(self, key):
1284 1284 return self.inner._index___contains__(key)
1285 1285
1286 1286 def __delitem__(self, key):
1287 1287 return self.inner._index___delitem__(key)
1288 1288
1289 1289
1290 1290 class RustVFSWrapper:
1291 1291 """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
1292 1292 calling back multiple times into Python.
1293 1293 """
1294 1294
1295 1295 def __init__(self, inner):
1296 1296 self.inner = inner
1297 1297
1298 1298 def __call__(
1299 1299 self,
1300 1300 path: bytes,
1301 1301 mode: bytes = b"rb",
1302 1302 atomictemp=False,
1303 1303 checkambig=False,
1304 1304 ):
1305 1305 fd = self.inner.__call__(
1306 1306 path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
1307 1307 )
1308 1308 # Information that Rust needs to get ownership of the file that's
1309 1309 # being opened.
1310 1310 return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
1311 1311
1312 1312 def __getattr__(self, name):
1313 1313 return getattr(self.inner, name)
1314 1314
1315 1315
1316 1316 class revlog:
1317 1317 """
1318 1318 the underlying revision storage object
1319 1319
1320 1320 A revlog consists of two parts, an index and the revision data.
1321 1321
1322 1322 The index is a file with a fixed record size containing
1323 1323 information on each revision, including its nodeid (hash), the
1324 1324 nodeids of its parents, the position and offset of its data within
1325 1325 the data file, and the revision it's based on. Finally, each entry
1326 1326 contains a linkrev entry that can serve as a pointer to external
1327 1327 data.
1328 1328
1329 1329 The revision data itself is a linear collection of data chunks.
1330 1330 Each chunk represents a revision and is usually represented as a
1331 1331 delta against the previous chunk. To bound lookup time, runs of
1332 1332 deltas are limited to about 2 times the length of the original
1333 1333 version data. This makes retrieval of a version proportional to
1334 1334 its size, or O(1) relative to the number of revisions.
1335 1335
1336 1336 Both pieces of the revlog are written to in an append-only
1337 1337 fashion, which means we never need to rewrite a file to insert or
1338 1338 remove data, and can use some simple techniques to avoid the need
1339 1339 for locking while reading.
1340 1340
1341 1341 If checkambig, indexfile is opened with checkambig=True at
1342 1342 writing, to avoid file stat ambiguity.
1343 1343
1344 1344 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1345 1345 index will be mmapped rather than read if it is larger than the
1346 1346 configured threshold.
1347 1347
1348 1348 If censorable is True, the revlog can have censored revisions.
1349 1349
1350 1350 If `upperboundcomp` is not None, this is the expected maximal gain from
1351 1351 compression for the data content.
1352 1352
1353 1353 `concurrencychecker` is an optional function that receives 3 arguments: a
1354 1354 file handle, a filename, and an expected position. It should check whether
1355 1355 the current position in the file handle is valid, and log/warn/fail (by
1356 1356 raising).
1357 1357
1358 1358 See mercurial/revlogutils/contants.py for details about the content of an
1359 1359 index entry.
1360 1360 """
1361 1361
1362 1362 _flagserrorclass = error.RevlogError
1363 1363 _inner: "_InnerRevlog"
1364 1364
1365 1365 opener: vfsmod.vfs
1366 1366
1367 1367 @staticmethod
1368 1368 def is_inline_index(header_bytes):
1369 1369 """Determine if a revlog is inline from the initial bytes of the index"""
1370 1370 if len(header_bytes) == 0:
1371 1371 return True
1372 1372
1373 1373 header = INDEX_HEADER.unpack(header_bytes)[0]
1374 1374
1375 1375 _format_flags = header & ~0xFFFF
1376 1376 _format_version = header & 0xFFFF
1377 1377
1378 1378 features = FEATURES_BY_VERSION[_format_version]
1379 1379 return features[b'inline'](_format_flags)
1380 1380
1381 1381 _docket_file: Optional[bytes]
1382 1382
1383 1383 def __init__(
1384 1384 self,
1385 1385 opener: vfsmod.vfs,
1386 1386 target,
1387 1387 radix,
1388 1388 postfix=None, # only exist for `tmpcensored` now
1389 1389 checkambig=False,
1390 1390 mmaplargeindex=False,
1391 1391 censorable=False,
1392 1392 upperboundcomp=None,
1393 1393 persistentnodemap=False,
1394 1394 concurrencychecker=None,
1395 1395 trypending=False,
1396 1396 try_split=False,
1397 1397 canonical_parent_order=True,
1398 1398 data_config=None,
1399 1399 delta_config=None,
1400 1400 feature_config=None,
1401 1401 may_inline=True, # may inline new revlog
1402 1402 ):
1403 1403 """
1404 1404 create a revlog object
1405 1405
1406 1406 opener is a function that abstracts the file opening operation
1407 1407 and can be used to implement COW semantics or the like.
1408 1408
1409 1409 `target`: a (KIND, ID) tuple that identify the content stored in
1410 1410 this revlog. It help the rest of the code to understand what the revlog
1411 1411 is about without having to resort to heuristic and index filename
1412 1412 analysis. Note: that this must be reliably be set by normal code, but
1413 1413 that test, debug, or performance measurement code might not set this to
1414 1414 accurate value.
1415 1415 """
1416 1416
1417 1417 self.radix = radix
1418 1418
1419 1419 self._docket_file = None
1420 1420 self._indexfile = None
1421 1421 self._datafile = None
1422 1422 self._sidedatafile = None
1423 1423 self._nodemap_file = None
1424 1424 self.postfix = postfix
1425 1425 self._trypending = trypending
1426 1426 self._try_split = try_split
1427 1427 self._may_inline = may_inline
1428 1428 self.uses_rust = False
1429 1429 self.opener = opener
1430 1430 if persistentnodemap:
1431 1431 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1432 1432
1433 1433 assert target[0] in ALL_KINDS
1434 1434 assert len(target) == 2
1435 1435 self.target = target
1436 1436 if feature_config is not None:
1437 1437 self.feature_config = feature_config.copy()
1438 1438 elif b'feature-config' in self.opener.options:
1439 1439 self.feature_config = self.opener.options[b'feature-config'].copy()
1440 1440 else:
1441 1441 self.feature_config = FeatureConfig()
1442 1442 self.feature_config.censorable = censorable
1443 1443 self.feature_config.canonical_parent_order = canonical_parent_order
1444 1444 if data_config is not None:
1445 1445 self.data_config = data_config.copy()
1446 1446 elif b'data-config' in self.opener.options:
1447 1447 self.data_config = self.opener.options[b'data-config'].copy()
1448 1448 else:
1449 1449 self.data_config = DataConfig()
1450 1450 self.data_config.check_ambig = checkambig
1451 1451 self.data_config.mmap_large_index = mmaplargeindex
1452 1452 if delta_config is not None:
1453 1453 self.delta_config = delta_config.copy()
1454 1454 elif b'delta-config' in self.opener.options:
1455 1455 self.delta_config = self.opener.options[b'delta-config'].copy()
1456 1456 else:
1457 1457 self.delta_config = DeltaConfig()
1458 1458 self.delta_config.upper_bound_comp = upperboundcomp
1459 1459
1460 1460 # Maps rev to chain base rev.
1461 1461 self._chainbasecache = util.lrucachedict(100)
1462 1462
1463 1463 self.index: Optional[BaseIndexObject] = None
1464 1464 self._docket = None
1465 1465 self._nodemap_docket = None
1466 1466 # Mapping of partial identifiers to full nodes.
1467 1467 self._pcache = {}
1468 1468
1469 1469 # other optionnals features
1470 1470
1471 1471 # Make copy of flag processors so each revlog instance can support
1472 1472 # custom flags.
1473 1473 self._flagprocessors = dict(flagutil.flagprocessors)
1474 1474 # prevent nesting of addgroup
1475 1475 self._adding_group = None
1476 1476
1477 1477 index, chunk_cache = self._loadindex()
1478 1478 self._load_inner(index, chunk_cache)
1479 1479 self._concurrencychecker = concurrencychecker
1480 1480
1481 1481 def _init_opts(self):
1482 1482 """process options (from above/config) to setup associated default revlog mode
1483 1483
1484 1484 These values might be affected when actually reading on disk information.
1485 1485
1486 1486 The relevant values are returned for use in _loadindex().
1487 1487
1488 1488 * newversionflags:
1489 1489 version header to use if we need to create a new revlog
1490 1490
1491 1491 * mmapindexthreshold:
1492 1492 minimal index size for start to use mmap
1493 1493
1494 1494 * force_nodemap:
1495 1495 force the usage of a "development" version of the nodemap code
1496 1496 """
1497 1497 opts = self.opener.options
1498 1498
1499 1499 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1500 1500 new_header = CHANGELOGV2
1501 1501 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1502 1502 self.feature_config.compute_rank = compute_rank
1503 1503 elif b'revlogv2' in opts:
1504 1504 new_header = REVLOGV2
1505 1505 elif b'revlogv1' in opts:
1506 1506 new_header = REVLOGV1
1507 1507 if self._may_inline:
1508 1508 new_header |= FLAG_INLINE_DATA
1509 1509 if b'generaldelta' in opts:
1510 1510 new_header |= FLAG_GENERALDELTA
1511 1511 elif b'revlogv0' in self.opener.options:
1512 1512 new_header = REVLOGV0
1513 1513 else:
1514 1514 new_header = REVLOG_DEFAULT_VERSION
1515 1515
1516 1516 mmapindexthreshold = None
1517 1517 if self.data_config.mmap_large_index:
1518 1518 mmapindexthreshold = self.data_config.mmap_index_threshold
1519 1519 if self.feature_config.enable_ellipsis:
1520 1520 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1521 1521
1522 1522 # revlog v0 doesn't have flag processors
1523 1523 for flag, processor in opts.get(b'flagprocessors', {}).items():
1524 1524 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1525 1525
1526 1526 chunk_cache_size = self.data_config.chunk_cache_size
1527 1527 if chunk_cache_size <= 0:
1528 1528 raise error.RevlogError(
1529 1529 _(b'revlog chunk cache size %r is not greater than 0')
1530 1530 % chunk_cache_size
1531 1531 )
1532 1532 elif chunk_cache_size & (chunk_cache_size - 1):
1533 1533 raise error.RevlogError(
1534 1534 _(b'revlog chunk cache size %r is not a power of 2')
1535 1535 % chunk_cache_size
1536 1536 )
1537 1537 force_nodemap = opts.get(b'devel-force-nodemap', False)
1538 1538 return new_header, mmapindexthreshold, force_nodemap
1539 1539
1540 1540 def _get_data(self, filepath, mmap_threshold, size=None):
1541 1541 """return a file content with or without mmap
1542 1542
1543 1543 If the file is missing return the empty string"""
1544 1544 try:
1545 1545 with self.opener(filepath) as fp:
1546 1546 if mmap_threshold is not None:
1547 1547 file_size = self.opener.fstat(fp).st_size
1548 1548 if (
1549 1549 file_size >= mmap_threshold
1550 1550 and self.opener.is_mmap_safe(filepath)
1551 1551 ):
1552 1552 if size is not None:
1553 1553 # avoid potentiel mmap crash
1554 1554 size = min(file_size, size)
1555 1555 # TODO: should .close() to release resources without
1556 1556 # relying on Python GC
1557 1557 if size is None:
1558 1558 return util.buffer(util.mmapread(fp))
1559 1559 else:
1560 1560 return util.buffer(util.mmapread(fp, size))
1561 1561 if size is None:
1562 1562 return fp.read()
1563 1563 else:
1564 1564 return fp.read(size)
1565 1565 except FileNotFoundError:
1566 1566 return b''
1567 1567
1568 1568 def get_streams(self, max_linkrev, force_inline=False):
1569 1569 """return a list of streams that represent this revlog
1570 1570
1571 1571 This is used by stream-clone to do bytes to bytes copies of a repository.
1572 1572
1573 1573 This streams data for all revisions that refer to a changelog revision up
1574 1574 to `max_linkrev`.
1575 1575
1576 1576 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1577 1577
1578 1578 It returns is a list of three-tuple:
1579 1579
1580 1580 [
1581 1581 (filename, bytes_stream, stream_size),
1582 1582 …
1583 1583 ]
1584 1584 """
1585 1585 n = len(self)
1586 1586 index = self.index
1587 1587 while n > 0:
1588 1588 linkrev = index[n - 1][4]
1589 1589 if linkrev < max_linkrev:
1590 1590 break
1591 1591 # note: this loop will rarely go through multiple iterations, since
1592 1592 # it only traverses commits created during the current streaming
1593 1593 # pull operation.
1594 1594 #
1595 1595 # If this become a problem, using a binary search should cap the
1596 1596 # runtime of this.
1597 1597 n = n - 1
1598 1598 if n == 0:
1599 1599 # no data to send
1600 1600 return []
1601 1601 index_size = n * index.entry_size
1602 1602 data_size = self.end(n - 1)
1603 1603
1604 1604 # XXX we might have been split (or stripped) since the object
1605 1605 # initialization, We need to close this race too, but having a way to
1606 1606 # pre-open the file we feed to the revlog and never closing them before
1607 1607 # we are done streaming.
1608 1608
1609 1609 if self._inline:
1610 1610
1611 1611 def get_stream():
1612 1612 with self.opener(self._indexfile, mode=b"r") as fp:
1613 1613 yield None
1614 1614 size = index_size + data_size
1615 1615 if size <= 65536:
1616 1616 yield fp.read(size)
1617 1617 else:
1618 1618 yield from util.filechunkiter(fp, limit=size)
1619 1619
1620 1620 inline_stream = get_stream()
1621 1621 next(inline_stream)
1622 1622 return [
1623 1623 (self._indexfile, inline_stream, index_size + data_size),
1624 1624 ]
1625 1625 elif force_inline:
1626 1626
1627 1627 def get_stream():
1628 1628 with self.reading():
1629 1629 yield None
1630 1630
1631 1631 for rev in range(n):
1632 1632 idx = self.index.entry_binary(rev)
1633 1633 if rev == 0 and self._docket is None:
1634 1634 # re-inject the inline flag
1635 1635 header = self._format_flags
1636 1636 header |= self._format_version
1637 1637 header |= FLAG_INLINE_DATA
1638 1638 header = self.index.pack_header(header)
1639 1639 idx = header + idx
1640 1640 yield idx
1641 1641 yield self._inner.get_segment_for_revs(rev, rev)[1]
1642 1642
1643 1643 inline_stream = get_stream()
1644 1644 next(inline_stream)
1645 1645 return [
1646 1646 (self._indexfile, inline_stream, index_size + data_size),
1647 1647 ]
1648 1648 else:
1649 1649
1650 1650 def get_index_stream():
1651 1651 with self.opener(self._indexfile, mode=b"r") as fp:
1652 1652 yield None
1653 1653 if index_size <= 65536:
1654 1654 yield fp.read(index_size)
1655 1655 else:
1656 1656 yield from util.filechunkiter(fp, limit=index_size)
1657 1657
1658 1658 def get_data_stream():
1659 1659 with self._datafp() as fp:
1660 1660 yield None
1661 1661 if data_size <= 65536:
1662 1662 yield fp.read(data_size)
1663 1663 else:
1664 1664 yield from util.filechunkiter(fp, limit=data_size)
1665 1665
1666 1666 index_stream = get_index_stream()
1667 1667 next(index_stream)
1668 1668 data_stream = get_data_stream()
1669 1669 next(data_stream)
1670 1670 return [
1671 1671 (self._datafile, data_stream, data_size),
1672 1672 (self._indexfile, index_stream, index_size),
1673 1673 ]
1674 1674
1675 1675 def _loadindex(self, docket=None):
1676 1676 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1677 1677
1678 1678 if self.postfix is not None:
1679 1679 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1680 1680 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1681 1681 entry_point = b'%s.i.a' % self.radix
1682 1682 elif self._try_split and self.opener.exists(self._split_index_file):
1683 1683 entry_point = self._split_index_file
1684 1684 else:
1685 1685 entry_point = b'%s.i' % self.radix
1686 1686
1687 1687 if docket is not None:
1688 1688 self._docket = docket
1689 1689 self._docket_file = entry_point
1690 1690 else:
1691 1691 self._initempty = True
1692 1692 entry_data = self._get_data(entry_point, mmapindexthreshold)
1693 1693 if len(entry_data) > 0:
1694 1694 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1695 1695 self._initempty = False
1696 1696 else:
1697 1697 header = new_header
1698 1698
1699 1699 self._format_flags = header & ~0xFFFF
1700 1700 self._format_version = header & 0xFFFF
1701 1701
1702 1702 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1703 1703 if supported_flags is None:
1704 1704 msg = _(b'unknown version (%d) in revlog %s')
1705 1705 msg %= (self._format_version, self.display_id)
1706 1706 raise error.RevlogError(msg)
1707 1707 elif self._format_flags & ~supported_flags:
1708 1708 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1709 1709 display_flag = self._format_flags >> 16
1710 1710 msg %= (display_flag, self._format_version, self.display_id)
1711 1711 raise error.RevlogError(msg)
1712 1712
1713 1713 features = FEATURES_BY_VERSION[self._format_version]
1714 1714 self._inline = features[b'inline'](self._format_flags)
1715 1715 self.delta_config.general_delta = features[b'generaldelta'](
1716 1716 self._format_flags
1717 1717 )
1718 1718 self.data_config.generaldelta = self.delta_config.general_delta
1719 1719 self.feature_config.has_side_data = features[b'sidedata']
1720 1720
1721 1721 if not features[b'docket']:
1722 1722 self._indexfile = entry_point
1723 1723 index_data = entry_data
1724 1724 else:
1725 1725 self._docket_file = entry_point
1726 1726 if self._initempty:
1727 1727 self._docket = docketutil.default_docket(self, header)
1728 1728 else:
1729 1729 self._docket = docketutil.parse_docket(
1730 1730 self, entry_data, use_pending=self._trypending
1731 1731 )
1732 1732
1733 1733 if self._docket is not None:
1734 1734 self._indexfile = self._docket.index_filepath()
1735 1735 index_data = b''
1736 1736 index_size = self._docket.index_end
1737 1737 if index_size > 0:
1738 1738 index_data = self._get_data(
1739 1739 self._indexfile, mmapindexthreshold, size=index_size
1740 1740 )
1741 1741 if len(index_data) < index_size:
1742 1742 msg = _(b'too few index data for %s: got %d, expected %d')
1743 1743 msg %= (self.display_id, len(index_data), index_size)
1744 1744 raise error.RevlogError(msg)
1745 1745
1746 1746 self._inline = False
1747 1747 # generaldelta implied by version 2 revlogs.
1748 1748 self.delta_config.general_delta = True
1749 1749 self.data_config.generaldelta = True
1750 1750 # the logic for persistent nodemap will be dealt with within the
1751 1751 # main docket, so disable it for now.
1752 1752 self._nodemap_file = None
1753 1753
1754 1754 if self._docket is not None:
1755 1755 self._datafile = self._docket.data_filepath()
1756 1756 self._sidedatafile = self._docket.sidedata_filepath()
1757 1757 elif self.postfix is None:
1758 1758 self._datafile = b'%s.d' % self.radix
1759 1759 else:
1760 1760 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1761 1761
1762 1762 self.nodeconstants = sha1nodeconstants
1763 1763 self.nullid = self.nodeconstants.nullid
1764 1764
1765 1765 # sparse-revlog can't be on without general-delta (issue6056)
1766 1766 if not self.delta_config.general_delta:
1767 1767 self.delta_config.sparse_revlog = False
1768 1768
1769 1769 self._storedeltachains = True
1770 1770
1771 1771 devel_nodemap = (
1772 1772 self._nodemap_file
1773 1773 and force_nodemap
1774 1774 and parse_index_v1_nodemap is not None
1775 1775 )
1776 1776
1777 1777 use_rust_index = False
1778 1778 rust_applicable = self._nodemap_file is not None
1779 1779 rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
1780 1780 rust_applicable = rust_applicable and getattr(
1781 1781 self.opener, "rust_compatible", True
1782 1782 )
1783 1783 if rustrevlog is not None and rust_applicable:
1784 1784 # we would like to use the rust_index in all case, especially
1785 1785 # because it is necessary for AncestorsIterator and LazyAncestors
1786 1786 # since the 6.7 cycle.
1787 1787 #
1788 1788 # However, the performance impact of inconditionnaly building the
1789 1789 # nodemap is currently a problem for non-persistent nodemap
1790 1790 # repository.
1791 1791 use_rust_index = True
1792 1792
1793 1793 if self._format_version != REVLOGV1:
1794 1794 use_rust_index = False
1795 1795
1796 1796 if hasattr(self.opener, "fncache"):
1797 1797 vfs = self.opener.vfs
1798 1798 if not self.opener.uses_dotencode:
1799 1799 use_rust_index = False
1800 1800 if not isinstance(vfs, vfsmod.vfs):
1801 1801 # Be cautious since we don't support other vfs
1802 1802 use_rust_index = False
1803 1803 else:
1804 1804 # Rust only supports repos with fncache
1805 1805 use_rust_index = False
1806 1806
1807 1807 self._parse_index = parse_index_v1
1808 1808 if self._format_version == REVLOGV0:
1809 1809 self._parse_index = revlogv0.parse_index_v0
1810 1810 elif self._format_version == REVLOGV2:
1811 1811 self._parse_index = parse_index_v2
1812 1812 elif self._format_version == CHANGELOGV2:
1813 1813 self._parse_index = parse_index_cl_v2
1814 1814 elif devel_nodemap:
1815 1815 self._parse_index = parse_index_v1_nodemap
1816 1816
1817 1817 if use_rust_index:
1818 1818 # Let the Rust code parse its own index
1819 1819 index, chunkcache = (index_data, None)
1820 1820 self.uses_rust = True
1821 1821 else:
1822 1822 try:
1823 1823 d = self._parse_index(index_data, self._inline)
1824 1824 index, chunkcache = d
1825 1825 self._register_nodemap_info(index)
1826 1826 except (ValueError, IndexError):
1827 1827 raise error.RevlogError(
1828 1828 _(b"index %s is corrupted") % self.display_id
1829 1829 )
1830 1830 # revnum -> (chain-length, sum-delta-length)
1831 1831 self._chaininfocache = util.lrucachedict(500)
1832 1832
1833 1833 return index, chunkcache
1834 1834
1835 1835 def _load_inner(self, index, chunk_cache):
1836 1836 if self._docket is None:
1837 1837 default_compression_header = None
1838 1838 else:
1839 1839 default_compression_header = self._docket.default_compression_header
1840 1840
1841 1841 if self.uses_rust:
1842 1842 vfs_is_readonly = False
1843 1843 fncache = None
1844 1844
1845 1845 if hasattr(self.opener, "vfs"):
1846 1846 vfs = self.opener
1847 1847 if isinstance(vfs, vfsmod.readonlyvfs):
1848 1848 vfs_is_readonly = True
1849 1849 vfs = vfs.vfs
1850 1850 fncache = vfs.fncache
1851 1851 vfs = vfs.vfs
1852 1852 else:
1853 1853 vfs = self.opener
1854 1854
1855 1855 vfs_base = vfs.base
1856 1856 assert fncache is not None, "Rust only supports repos with fncache"
1857 1857
1858 1858 self._inner = rustrevlog.InnerRevlog(
1859 1859 vfs_base=vfs_base,
1860 1860 fncache=fncache,
1861 1861 vfs_is_readonly=vfs_is_readonly,
1862 1862 index_data=index,
1863 1863 index_file=self._indexfile,
1864 1864 data_file=self._datafile,
1865 1865 sidedata_file=self._sidedatafile,
1866 1866 inline=self._inline,
1867 1867 data_config=self.data_config,
1868 1868 delta_config=self.delta_config,
1869 1869 feature_config=self.feature_config,
1870 1870 chunk_cache=chunk_cache,
1871 1871 default_compression_header=default_compression_header,
1872 1872 revlog_type=self.target[0],
1873 1873 use_persistent_nodemap=self._nodemap_file is not None,
1874 1874 )
1875 1875 self.index = RustIndexProxy(self._inner)
1876 1876 self._register_nodemap_info(self.index)
1877 1877 self.uses_rust = True
1878 1878 else:
1879 1879 self._inner = _InnerRevlog(
1880 1880 opener=self.opener,
1881 1881 index=index,
1882 1882 index_file=self._indexfile,
1883 1883 data_file=self._datafile,
1884 1884 sidedata_file=self._sidedatafile,
1885 1885 inline=self._inline,
1886 1886 data_config=self.data_config,
1887 1887 delta_config=self.delta_config,
1888 1888 feature_config=self.feature_config,
1889 1889 chunk_cache=chunk_cache,
1890 1890 default_compression_header=default_compression_header,
1891 1891 )
1892 1892 self.index = self._inner.index
1893 1893
1894 1894 def _register_nodemap_info(self, index):
1895 1895 use_nodemap = (
1896 1896 not self._inline
1897 1897 and self._nodemap_file is not None
1898 1898 and hasattr(index, 'update_nodemap_data')
1899 1899 )
1900 1900 if use_nodemap:
1901 1901 nodemap_data = nodemaputil.persisted_data(self)
1902 1902 if nodemap_data is not None:
1903 1903 docket = nodemap_data[0]
1904 1904 if (
1905 1905 len(index) > docket.tip_rev
1906 1906 and index[docket.tip_rev][7] == docket.tip_node
1907 1907 ):
1908 1908 # no changelog tampering
1909 1909 self._nodemap_docket = docket
1910 1910 index.update_nodemap_data(
1911 1911 *nodemap_data
1912 1912 ) # pytype: disable=attribute-error
1913 1913
1914 1914 def get_revlog(self):
1915 1915 """simple function to mirror API of other not-really-revlog API"""
1916 1916 return self
1917 1917
1918 1918 @util.propertycache
1919 1919 def revlog_kind(self):
1920 1920 return self.target[0]
1921 1921
1922 1922 @util.propertycache
1923 1923 def display_id(self):
1924 1924 """The public facing "ID" of the revlog that we use in message"""
1925 1925 if self.revlog_kind == KIND_FILELOG:
1926 1926 # Reference the file without the "data/" prefix, so it is familiar
1927 1927 # to the user.
1928 1928 return self.target[1]
1929 1929 else:
1930 1930 return self.radix
1931 1931
1932 1932 def _datafp(self, mode=b'r'):
1933 1933 """file object for the revlog's data file"""
1934 1934 return self.opener(self._datafile, mode=mode)
1935 1935
1936 1936 def tiprev(self):
1937 1937 return len(self.index) - 1
1938 1938
1939 1939 def tip(self):
1940 1940 return self.node(self.tiprev())
1941 1941
1942 1942 def __contains__(self, rev):
1943 1943 return 0 <= rev < len(self)
1944 1944
1945 1945 def __len__(self):
1946 1946 return len(self.index)
1947 1947
1948 1948 def __iter__(self) -> Iterator[int]:
1949 1949 return iter(range(len(self)))
1950 1950
1951 1951 def revs(self, start=0, stop=None):
1952 1952 """iterate over all rev in this revlog (from start to stop)"""
1953 1953 return storageutil.iterrevs(len(self), start=start, stop=stop)
1954 1954
1955 1955 def hasnode(self, node):
1956 1956 try:
1957 1957 self.rev(node)
1958 1958 return True
1959 1959 except KeyError:
1960 1960 return False
1961 1961
1962 1962 def _candelta(self, baserev, rev):
1963 1963 """whether two revisions (baserev, rev) can be delta-ed or not"""
1964 1964 # Disable delta if either rev requires a content-changing flag
1965 1965 # processor (ex. LFS). This is because such flag processor can alter
1966 1966 # the rawtext content that the delta will be based on, and two clients
1967 1967 # could have a same revlog node with different flags (i.e. different
1968 1968 # rawtext contents) and the delta could be incompatible.
1969 1969 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1970 1970 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1971 1971 ):
1972 1972 return False
1973 1973 return True
1974 1974
1975 1975 def update_caches(self, transaction):
1976 1976 """update on disk cache
1977 1977
1978 1978 If a transaction is passed, the update may be delayed to transaction
1979 1979 commit."""
1980 1980 if self._nodemap_file is not None:
1981 1981 if transaction is None:
1982 1982 nodemaputil.update_persistent_nodemap(self)
1983 1983 else:
1984 1984 nodemaputil.setup_persistent_nodemap(transaction, self)
1985 1985
1986 1986 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1987 1987 """Clear in-memory caches"""
1988 1988 self._chainbasecache.clear()
1989 1989 self._inner.clear_cache()
1990 1990 self._pcache = {}
1991 1991 self._nodemap_docket = None
1992 1992 self.index.clearcaches()
1993 1993 # The python code is the one responsible for validating the docket, we
1994 1994 # end up having to refresh it here.
1995 1995 use_nodemap = (
1996 1996 not self._inline
1997 1997 and self._nodemap_file is not None
1998 1998 and hasattr(self.index, 'update_nodemap_data')
1999 1999 )
2000 2000 if use_nodemap:
2001 2001 nodemap_data = nodemaputil.persisted_data(self)
2002 2002 if nodemap_data is not None:
2003 2003 self._nodemap_docket = nodemap_data[0]
2004 2004 self.index.update_nodemap_data(
2005 2005 *nodemap_data
2006 2006 ) # pytype: disable=attribute-error
2007 2007
2008 2008 def rev(self, node):
2009 2009 """return the revision number associated with a <nodeid>"""
2010 2010 try:
2011 2011 return self.index.rev(node)
2012 2012 except TypeError:
2013 2013 raise
2014 2014 except error.RevlogError:
2015 2015 # parsers.c radix tree lookup failed
2016 2016 if (
2017 2017 node == self.nodeconstants.wdirid
2018 2018 or node in self.nodeconstants.wdirfilenodeids
2019 2019 ):
2020 2020 raise error.WdirUnsupported
2021 2021 raise error.LookupError(node, self.display_id, _(b'no node'))
2022 2022
2023 2023 # Accessors for index entries.
2024 2024
2025 2025 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
2026 2026 # are flags.
2027 2027 def start(self, rev):
2028 2028 return int(self.index[rev][0] >> 16)
2029 2029
2030 2030 def sidedata_cut_off(self, rev):
2031 2031 sd_cut_off = self.index[rev][8]
2032 2032 if sd_cut_off != 0:
2033 2033 return sd_cut_off
2034 2034 # This is some annoying dance, because entries without sidedata
2035 2035 # currently use 0 as their ofsset. (instead of previous-offset +
2036 2036 # previous-size)
2037 2037 #
2038 2038 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
2039 2039 # In the meantime, we need this.
2040 2040 while 0 <= rev:
2041 2041 e = self.index[rev]
2042 2042 if e[9] != 0:
2043 2043 return e[8] + e[9]
2044 2044 rev -= 1
2045 2045 return 0
2046 2046
2047 2047 def flags(self, rev):
2048 2048 return self.index[rev][0] & 0xFFFF
2049 2049
2050 2050 def length(self, rev):
2051 2051 return self.index[rev][1]
2052 2052
2053 2053 def sidedata_length(self, rev):
2054 2054 if not self.feature_config.has_side_data:
2055 2055 return 0
2056 2056 return self.index[rev][9]
2057 2057
2058 2058 def rawsize(self, rev):
2059 2059 """return the length of the uncompressed text for a given revision"""
2060 2060 l = self.index[rev][2]
2061 2061 if l >= 0:
2062 2062 return l
2063 2063
2064 2064 t = self.rawdata(rev)
2065 2065 return len(t)
2066 2066
2067 2067 def size(self, rev):
2068 2068 """length of non-raw text (processed by a "read" flag processor)"""
2069 2069 # fast path: if no "read" flag processor could change the content,
2070 2070 # size is rawsize. note: ELLIPSIS is known to not change the content.
2071 2071 flags = self.flags(rev)
2072 2072 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2073 2073 return self.rawsize(rev)
2074 2074
2075 2075 return len(self.revision(rev))
2076 2076
2077 2077 def fast_rank(self, rev):
2078 2078 """Return the rank of a revision if already known, or None otherwise.
2079 2079
2080 2080 The rank of a revision is the size of the sub-graph it defines as a
2081 2081 head. Equivalently, the rank of a revision `r` is the size of the set
2082 2082 `ancestors(r)`, `r` included.
2083 2083
2084 2084 This method returns the rank retrieved from the revlog in constant
2085 2085 time. It makes no attempt at computing unknown values for versions of
2086 2086 the revlog which do not persist the rank.
2087 2087 """
2088 2088 rank = self.index[rev][ENTRY_RANK]
2089 2089 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2090 2090 return None
2091 2091 if rev == nullrev:
2092 2092 return 0 # convention
2093 2093 return rank
2094 2094
2095 2095 def chainbase(self, rev):
2096 2096 base = self._chainbasecache.get(rev)
2097 2097 if base is not None:
2098 2098 return base
2099 2099
2100 2100 index = self.index
2101 2101 iterrev = rev
2102 2102 base = index[iterrev][3]
2103 2103 while base != iterrev:
2104 2104 iterrev = base
2105 2105 base = index[iterrev][3]
2106 2106
2107 2107 self._chainbasecache[rev] = base
2108 2108 return base
2109 2109
2110 2110 def linkrev(self, rev):
2111 2111 return self.index[rev][4]
2112 2112
2113 2113 def parentrevs(self, rev):
2114 2114 try:
2115 2115 entry = self.index[rev]
2116 2116 except IndexError:
2117 2117 if rev == wdirrev:
2118 2118 raise error.WdirUnsupported
2119 2119 raise
2120 2120
2121 2121 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2122 2122 return entry[6], entry[5]
2123 2123 else:
2124 2124 return entry[5], entry[6]
2125 2125
2126 2126 # fast parentrevs(rev) where rev isn't filtered
2127 2127 _uncheckedparentrevs = parentrevs
2128 2128
2129 2129 def node(self, rev):
2130 2130 try:
2131 2131 return self.index[rev][7]
2132 2132 except IndexError:
2133 2133 if rev == wdirrev:
2134 2134 raise error.WdirUnsupported
2135 2135 raise
2136 2136
2137 2137 # Derived from index values.
2138 2138
2139 2139 def end(self, rev):
2140 2140 return self.start(rev) + self.length(rev)
2141 2141
2142 2142 def parents(self, node):
2143 2143 i = self.index
2144 2144 d = i[self.rev(node)]
2145 2145 # inline node() to avoid function call overhead
2146 2146 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2147 2147 return i[d[6]][7], i[d[5]][7]
2148 2148 else:
2149 2149 return i[d[5]][7], i[d[6]][7]
2150 2150
2151 2151 def chainlen(self, rev):
2152 2152 return self._chaininfo(rev)[0]
2153 2153
2154 2154 def _chaininfo(self, rev):
2155 2155 chaininfocache = self._chaininfocache
2156 2156 if rev in chaininfocache:
2157 2157 return chaininfocache[rev]
2158 2158 index = self.index
2159 2159 generaldelta = self.delta_config.general_delta
2160 2160 iterrev = rev
2161 2161 e = index[iterrev]
2162 2162 clen = 0
2163 2163 compresseddeltalen = 0
2164 2164 while iterrev != e[3]:
2165 2165 clen += 1
2166 2166 compresseddeltalen += e[1]
2167 2167 if generaldelta:
2168 2168 iterrev = e[3]
2169 2169 else:
2170 2170 iterrev -= 1
2171 2171 if iterrev in chaininfocache:
2172 2172 t = chaininfocache[iterrev]
2173 2173 clen += t[0]
2174 2174 compresseddeltalen += t[1]
2175 2175 break
2176 2176 e = index[iterrev]
2177 2177 else:
2178 2178 # Add text length of base since decompressing that also takes
2179 2179 # work. For cache hits the length is already included.
2180 2180 compresseddeltalen += e[1]
2181 2181 r = (clen, compresseddeltalen)
2182 2182 chaininfocache[rev] = r
2183 2183 return r
2184 2184
2185 2185 def _deltachain(self, rev, stoprev=None):
2186 2186 return self._inner._deltachain(rev, stoprev=stoprev)
2187 2187
2188 2188 def ancestors(self, revs, stoprev=0, inclusive=False):
2189 2189 """Generate the ancestors of 'revs' in reverse revision order.
2190 2190 Does not generate revs lower than stoprev.
2191 2191
2192 2192 See the documentation for ancestor.lazyancestors for more details."""
2193 2193
2194 2194 # first, make sure start revisions aren't filtered
2195 2195 revs = list(revs)
2196 2196 checkrev = self.node
2197 2197 for r in revs:
2198 2198 checkrev(r)
2199 2199 # and we're sure ancestors aren't filtered as well
2200 2200
2201 2201 if rustancestor is not None and self.index.rust_ext_compat:
2202 2202 lazyancestors = rustancestor.LazyAncestors
2203 2203 arg = self.index
2204 2204 else:
2205 2205 lazyancestors = ancestor.lazyancestors
2206 2206 arg = self._uncheckedparentrevs
2207 2207 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2208 2208
2209 2209 def descendants(self, revs):
2210 2210 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2211 2211
2212 2212 def findcommonmissing(self, common=None, heads=None):
2213 2213 """Return a tuple of the ancestors of common and the ancestors of heads
2214 2214 that are not ancestors of common. In revset terminology, we return the
2215 2215 tuple:
2216 2216
2217 2217 ::common, (::heads) - (::common)
2218 2218
2219 2219 The list is sorted by revision number, meaning it is
2220 2220 topologically sorted.
2221 2221
2222 2222 'heads' and 'common' are both lists of node IDs. If heads is
2223 2223 not supplied, uses all of the revlog's heads. If common is not
2224 2224 supplied, uses nullid."""
2225 2225 if common is None:
2226 2226 common = [self.nullid]
2227 2227 if heads is None:
2228 2228 heads = self.heads()
2229 2229
2230 2230 common = [self.rev(n) for n in common]
2231 2231 heads = [self.rev(n) for n in heads]
2232 2232
2233 2233 # we want the ancestors, but inclusive
2234 2234 class lazyset:
2235 2235 def __init__(self, lazyvalues):
2236 2236 self.addedvalues = set()
2237 2237 self.lazyvalues = lazyvalues
2238 2238
2239 2239 def __contains__(self, value):
2240 2240 return value in self.addedvalues or value in self.lazyvalues
2241 2241
2242 2242 def __iter__(self):
2243 2243 added = self.addedvalues
2244 2244 for r in added:
2245 2245 yield r
2246 2246 for r in self.lazyvalues:
2247 2247 if not r in added:
2248 2248 yield r
2249 2249
2250 2250 def add(self, value):
2251 2251 self.addedvalues.add(value)
2252 2252
2253 2253 def update(self, values):
2254 2254 self.addedvalues.update(values)
2255 2255
2256 2256 has = lazyset(self.ancestors(common))
2257 2257 has.add(nullrev)
2258 2258 has.update(common)
2259 2259
2260 2260 # take all ancestors from heads that aren't in has
2261 2261 missing = set()
2262 2262 visit = collections.deque(r for r in heads if r not in has)
2263 2263 while visit:
2264 2264 r = visit.popleft()
2265 2265 if r in missing:
2266 2266 continue
2267 2267 else:
2268 2268 missing.add(r)
2269 2269 for p in self.parentrevs(r):
2270 2270 if p not in has:
2271 2271 visit.append(p)
2272 2272 missing = list(missing)
2273 2273 missing.sort()
2274 2274 return has, [self.node(miss) for miss in missing]
2275 2275
2276 2276 def incrementalmissingrevs(self, common=None):
2277 2277 """Return an object that can be used to incrementally compute the
2278 2278 revision numbers of the ancestors of arbitrary sets that are not
2279 2279 ancestors of common. This is an ancestor.incrementalmissingancestors
2280 2280 object.
2281 2281
2282 2282 'common' is a list of revision numbers. If common is not supplied, uses
2283 2283 nullrev.
2284 2284 """
2285 2285 if common is None:
2286 2286 common = [nullrev]
2287 2287
2288 2288 if rustancestor is not None and self.index.rust_ext_compat:
2289 2289 return rustancestor.MissingAncestors(self.index, common)
2290 2290 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2291 2291
2292 2292 def findmissingrevs(self, common=None, heads=None):
2293 2293 """Return the revision numbers of the ancestors of heads that
2294 2294 are not ancestors of common.
2295 2295
2296 2296 More specifically, return a list of revision numbers corresponding to
2297 2297 nodes N such that every N satisfies the following constraints:
2298 2298
2299 2299 1. N is an ancestor of some node in 'heads'
2300 2300 2. N is not an ancestor of any node in 'common'
2301 2301
2302 2302 The list is sorted by revision number, meaning it is
2303 2303 topologically sorted.
2304 2304
2305 2305 'heads' and 'common' are both lists of revision numbers. If heads is
2306 2306 not supplied, uses all of the revlog's heads. If common is not
2307 2307 supplied, uses nullid."""
2308 2308 if common is None:
2309 2309 common = [nullrev]
2310 2310 if heads is None:
2311 2311 heads = self.headrevs()
2312 2312
2313 2313 inc = self.incrementalmissingrevs(common=common)
2314 2314 return inc.missingancestors(heads)
2315 2315
2316 2316 def findmissing(self, common=None, heads=None):
2317 2317 """Return the ancestors of heads that are not ancestors of common.
2318 2318
2319 2319 More specifically, return a list of nodes N such that every N
2320 2320 satisfies the following constraints:
2321 2321
2322 2322 1. N is an ancestor of some node in 'heads'
2323 2323 2. N is not an ancestor of any node in 'common'
2324 2324
2325 2325 The list is sorted by revision number, meaning it is
2326 2326 topologically sorted.
2327 2327
2328 2328 'heads' and 'common' are both lists of node IDs. If heads is
2329 2329 not supplied, uses all of the revlog's heads. If common is not
2330 2330 supplied, uses nullid."""
2331 2331 if common is None:
2332 2332 common = [self.nullid]
2333 2333 if heads is None:
2334 2334 heads = self.heads()
2335 2335
2336 2336 common = [self.rev(n) for n in common]
2337 2337 heads = [self.rev(n) for n in heads]
2338 2338
2339 2339 inc = self.incrementalmissingrevs(common=common)
2340 2340 return [self.node(r) for r in inc.missingancestors(heads)]
2341 2341
2342 2342 def nodesbetween(self, roots=None, heads=None):
2343 2343 """Return a topological path from 'roots' to 'heads'.
2344 2344
2345 2345 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2346 2346 topologically sorted list of all nodes N that satisfy both of
2347 2347 these constraints:
2348 2348
2349 2349 1. N is a descendant of some node in 'roots'
2350 2350 2. N is an ancestor of some node in 'heads'
2351 2351
2352 2352 Every node is considered to be both a descendant and an ancestor
2353 2353 of itself, so every reachable node in 'roots' and 'heads' will be
2354 2354 included in 'nodes'.
2355 2355
2356 2356 'outroots' is the list of reachable nodes in 'roots', i.e., the
2357 2357 subset of 'roots' that is returned in 'nodes'. Likewise,
2358 2358 'outheads' is the subset of 'heads' that is also in 'nodes'.
2359 2359
2360 2360 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2361 2361 unspecified, uses nullid as the only root. If 'heads' is
2362 2362 unspecified, uses list of all of the revlog's heads."""
2363 2363 nonodes = ([], [], [])
2364 2364 if roots is not None:
2365 2365 roots = list(roots)
2366 2366 if not roots:
2367 2367 return nonodes
2368 2368 lowestrev = min([self.rev(n) for n in roots])
2369 2369 else:
2370 2370 roots = [self.nullid] # Everybody's a descendant of nullid
2371 2371 lowestrev = nullrev
2372 2372 if (lowestrev == nullrev) and (heads is None):
2373 2373 # We want _all_ the nodes!
2374 2374 return (
2375 2375 [self.node(r) for r in self],
2376 2376 [self.nullid],
2377 2377 list(self.heads()),
2378 2378 )
2379 2379 if heads is None:
2380 2380 # All nodes are ancestors, so the latest ancestor is the last
2381 2381 # node.
2382 2382 highestrev = len(self) - 1
2383 2383 # Set ancestors to None to signal that every node is an ancestor.
2384 2384 ancestors = None
2385 2385 # Set heads to an empty dictionary for later discovery of heads
2386 2386 heads = {}
2387 2387 else:
2388 2388 heads = list(heads)
2389 2389 if not heads:
2390 2390 return nonodes
2391 2391 ancestors = set()
2392 2392 # Turn heads into a dictionary so we can remove 'fake' heads.
2393 2393 # Also, later we will be using it to filter out the heads we can't
2394 2394 # find from roots.
2395 2395 heads = dict.fromkeys(heads, False)
2396 2396 # Start at the top and keep marking parents until we're done.
2397 2397 nodestotag = set(heads)
2398 2398 # Remember where the top was so we can use it as a limit later.
2399 2399 highestrev = max([self.rev(n) for n in nodestotag])
2400 2400 while nodestotag:
2401 2401 # grab a node to tag
2402 2402 n = nodestotag.pop()
2403 2403 # Never tag nullid
2404 2404 if n == self.nullid:
2405 2405 continue
2406 2406 # A node's revision number represents its place in a
2407 2407 # topologically sorted list of nodes.
2408 2408 r = self.rev(n)
2409 2409 if r >= lowestrev:
2410 2410 if n not in ancestors:
2411 2411 # If we are possibly a descendant of one of the roots
2412 2412 # and we haven't already been marked as an ancestor
2413 2413 ancestors.add(n) # Mark as ancestor
2414 2414 # Add non-nullid parents to list of nodes to tag.
2415 2415 nodestotag.update(
2416 2416 [p for p in self.parents(n) if p != self.nullid]
2417 2417 )
2418 2418 elif n in heads: # We've seen it before, is it a fake head?
2419 2419 # So it is, real heads should not be the ancestors of
2420 2420 # any other heads.
2421 2421 heads.pop(n)
2422 2422 if not ancestors:
2423 2423 return nonodes
2424 2424 # Now that we have our set of ancestors, we want to remove any
2425 2425 # roots that are not ancestors.
2426 2426
2427 2427 # If one of the roots was nullid, everything is included anyway.
2428 2428 if lowestrev > nullrev:
2429 2429 # But, since we weren't, let's recompute the lowest rev to not
2430 2430 # include roots that aren't ancestors.
2431 2431
2432 2432 # Filter out roots that aren't ancestors of heads
2433 2433 roots = [root for root in roots if root in ancestors]
2434 2434 # Recompute the lowest revision
2435 2435 if roots:
2436 2436 lowestrev = min([self.rev(root) for root in roots])
2437 2437 else:
2438 2438 # No more roots? Return empty list
2439 2439 return nonodes
2440 2440 else:
2441 2441 # We are descending from nullid, and don't need to care about
2442 2442 # any other roots.
2443 2443 lowestrev = nullrev
2444 2444 roots = [self.nullid]
2445 2445 # Transform our roots list into a set.
2446 2446 descendants = set(roots)
2447 2447 # Also, keep the original roots so we can filter out roots that aren't
2448 2448 # 'real' roots (i.e. are descended from other roots).
2449 2449 roots = descendants.copy()
2450 2450 # Our topologically sorted list of output nodes.
2451 2451 orderedout = []
2452 2452 # Don't start at nullid since we don't want nullid in our output list,
2453 2453 # and if nullid shows up in descendants, empty parents will look like
2454 2454 # they're descendants.
2455 2455 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2456 2456 n = self.node(r)
2457 2457 isdescendant = False
2458 2458 if lowestrev == nullrev: # Everybody is a descendant of nullid
2459 2459 isdescendant = True
2460 2460 elif n in descendants:
2461 2461 # n is already a descendant
2462 2462 isdescendant = True
2463 2463 # This check only needs to be done here because all the roots
2464 2464 # will start being marked is descendants before the loop.
2465 2465 if n in roots:
2466 2466 # If n was a root, check if it's a 'real' root.
2467 2467 p = tuple(self.parents(n))
2468 2468 # If any of its parents are descendants, it's not a root.
2469 2469 if (p[0] in descendants) or (p[1] in descendants):
2470 2470 roots.remove(n)
2471 2471 else:
2472 2472 p = tuple(self.parents(n))
2473 2473 # A node is a descendant if either of its parents are
2474 2474 # descendants. (We seeded the dependents list with the roots
2475 2475 # up there, remember?)
2476 2476 if (p[0] in descendants) or (p[1] in descendants):
2477 2477 descendants.add(n)
2478 2478 isdescendant = True
2479 2479 if isdescendant and ((ancestors is None) or (n in ancestors)):
2480 2480 # Only include nodes that are both descendants and ancestors.
2481 2481 orderedout.append(n)
2482 2482 if (ancestors is not None) and (n in heads):
2483 2483 # We're trying to figure out which heads are reachable
2484 2484 # from roots.
2485 2485 # Mark this head as having been reached
2486 2486 heads[n] = True
2487 2487 elif ancestors is None:
2488 2488 # Otherwise, we're trying to discover the heads.
2489 2489 # Assume this is a head because if it isn't, the next step
2490 2490 # will eventually remove it.
2491 2491 heads[n] = True
2492 2492 # But, obviously its parents aren't.
2493 2493 for p in self.parents(n):
2494 2494 heads.pop(p, None)
2495 2495 heads = [head for head, flag in heads.items() if flag]
2496 2496 roots = list(roots)
2497 2497 assert orderedout
2498 2498 assert roots
2499 2499 assert heads
2500 2500 return (orderedout, roots, heads)
2501 2501
2502 2502 def headrevs(self, revs=None, stop_rev=None):
2503 2503 if revs is None:
2504 2504 return self.index.headrevs(None, stop_rev)
2505 2505 if rustdagop is not None and self.index.rust_ext_compat:
2506 2506 return rustdagop.headrevs(self.index, revs)
2507 2507 return dagop.headrevs(revs, self._uncheckedparentrevs)
2508 2508
2509 2509 def headrevsdiff(self, start, stop):
2510 2510 try:
2511 2511 return self.index.headrevsdiff(
2512 2512 start, stop
2513 2513 ) # pytype: disable=attribute-error
2514 2514 except AttributeError:
2515 2515 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2516 2516
2517 2517 def computephases(self, roots):
2518 2518 return self.index.computephasesmapsets(
2519 2519 roots
2520 2520 ) # pytype: disable=attribute-error
2521 2521
2522 2522 def _head_node_ids(self):
2523 2523 try:
2524 2524 return self.index.head_node_ids() # pytype: disable=attribute-error
2525 2525 except AttributeError:
2526 2526 return [self.node(r) for r in self.headrevs()]
2527 2527
2528 2528 def heads(self, start=None, stop=None):
2529 2529 """return the list of all nodes that have no children
2530 2530
2531 2531 if start is specified, only heads that are descendants of
2532 2532 start will be returned
2533 2533 if stop is specified, it will consider all the revs from stop
2534 2534 as if they had no children
2535 2535 """
2536 2536 if start is None and stop is None:
2537 2537 if not len(self):
2538 2538 return [self.nullid]
2539 2539 return self._head_node_ids()
2540 2540 if start is None:
2541 2541 start = nullrev
2542 2542 else:
2543 2543 start = self.rev(start)
2544 2544
2545 2545 stoprevs = {self.rev(n) for n in stop or []}
2546 2546
2547 2547 revs = dagop.headrevssubset(
2548 2548 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2549 2549 )
2550 2550
2551 2551 return [self.node(rev) for rev in revs]
2552 2552
2553 2553 def diffheads(self, start, stop):
2554 2554 """return the nodes that make up the difference between
2555 2555 heads of revs before `start` and heads of revs before `stop`"""
2556 2556 removed, added = self.headrevsdiff(start, stop)
2557 2557 return [self.node(r) for r in removed], [self.node(r) for r in added]
2558 2558
2559 2559 def children(self, node):
2560 2560 """find the children of a given node"""
2561 2561 c = []
2562 2562 p = self.rev(node)
2563 2563 for r in self.revs(start=p + 1):
2564 2564 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2565 2565 if prevs:
2566 2566 for pr in prevs:
2567 2567 if pr == p:
2568 2568 c.append(self.node(r))
2569 2569 elif p == nullrev:
2570 2570 c.append(self.node(r))
2571 2571 return c
2572 2572
2573 2573 def commonancestorsheads(self, a, b):
2574 2574 """calculate all the heads of the common ancestors of nodes a and b"""
2575 2575 a, b = self.rev(a), self.rev(b)
2576 2576 ancs = self._commonancestorsheads(a, b)
2577 2577 return pycompat.maplist(self.node, ancs)
2578 2578
2579 2579 def _commonancestorsheads(self, *revs):
2580 2580 """calculate all the heads of the common ancestors of revs"""
2581 2581 try:
2582 2582 ancs = self.index.commonancestorsheads(
2583 2583 *revs
2584 2584 ) # pytype: disable=attribute-error
2585 2585 except (AttributeError, OverflowError): # C implementation failed
2586 2586 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2587 2587 return ancs
2588 2588
2589 2589 def isancestor(self, a, b):
2590 2590 """return True if node a is an ancestor of node b
2591 2591
2592 2592 A revision is considered an ancestor of itself."""
2593 2593 a, b = self.rev(a), self.rev(b)
2594 2594 return self.isancestorrev(a, b)
2595 2595
2596 2596 def isancestorrev(self, a, b):
2597 2597 """return True if revision a is an ancestor of revision b
2598 2598
2599 2599 A revision is considered an ancestor of itself.
2600 2600
2601 2601 The implementation of this is trivial but the use of
2602 2602 reachableroots is not."""
2603 2603 if a == nullrev:
2604 2604 return True
2605 2605 elif a == b:
2606 2606 return True
2607 2607 elif a > b:
2608 2608 return False
2609 2609 return bool(self.reachableroots(a, [b], [a], includepath=False))
2610 2610
2611 2611 def reachableroots(self, minroot, heads, roots, includepath=False):
2612 2612 """return (heads(::(<roots> and <roots>::<heads>)))
2613 2613
2614 2614 If includepath is True, return (<roots>::<heads>)."""
2615 2615 try:
2616 2616 return self.index.reachableroots2(
2617 2617 minroot, heads, roots, includepath
2618 2618 ) # pytype: disable=attribute-error
2619 2619 except AttributeError:
2620 2620 return dagop._reachablerootspure(
2621 2621 self.parentrevs, minroot, roots, heads, includepath
2622 2622 )
2623 2623
2624 2624 def ancestor(self, a, b):
2625 2625 """calculate the "best" common ancestor of nodes a and b"""
2626 2626
2627 2627 a, b = self.rev(a), self.rev(b)
2628 2628 try:
2629 2629 ancs = self.index.ancestors(a, b) # pytype: disable=attribute-error
2630 2630 except (AttributeError, OverflowError):
2631 2631 ancs = ancestor.ancestors(self.parentrevs, a, b)
2632 2632 if ancs:
2633 2633 # choose a consistent winner when there's a tie
2634 2634 return min(map(self.node, ancs))
2635 2635 return self.nullid
2636 2636
2637 2637 def _match(self, id):
2638 2638 if isinstance(id, int):
2639 2639 # rev
2640 2640 return self.node(id)
2641 2641 if len(id) == self.nodeconstants.nodelen:
2642 2642 # possibly a binary node
2643 2643 # odds of a binary node being all hex in ASCII are 1 in 10**25
2644 2644 try:
2645 2645 node = id
2646 2646 self.rev(node) # quick search the index
2647 2647 return node
2648 2648 except error.LookupError:
2649 2649 pass # may be partial hex id
2650 2650 try:
2651 2651 # str(rev)
2652 2652 rev = int(id)
2653 2653 if b"%d" % rev != id:
2654 2654 raise ValueError
2655 2655 if rev < 0:
2656 2656 rev = len(self) + rev
2657 2657 if rev < 0 or rev >= len(self):
2658 2658 raise ValueError
2659 2659 return self.node(rev)
2660 2660 except (ValueError, OverflowError):
2661 2661 pass
2662 2662 if len(id) == 2 * self.nodeconstants.nodelen:
2663 2663 try:
2664 2664 # a full hex nodeid?
2665 2665 node = bin(id)
2666 2666 self.rev(node)
2667 2667 return node
2668 2668 except (binascii.Error, error.LookupError):
2669 2669 pass
2670 2670
2671 2671 def _partialmatch(self, id):
2672 2672 # we don't care wdirfilenodeids as they should be always full hash
2673 2673 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2674 2674 ambiguous = False
2675 2675 try:
2676 2676 partial = self.index.partialmatch(
2677 2677 id
2678 2678 ) # pytype: disable=attribute-error
2679 2679 if partial and self.hasnode(partial):
2680 2680 if maybewdir:
2681 2681 # single 'ff...' match in radix tree, ambiguous with wdir
2682 2682 ambiguous = True
2683 2683 else:
2684 2684 return partial
2685 2685 elif maybewdir:
2686 2686 # no 'ff...' match in radix tree, wdir identified
2687 2687 raise error.WdirUnsupported
2688 2688 else:
2689 2689 return None
2690 2690 except error.RevlogError:
2691 2691 # parsers.c radix tree lookup gave multiple matches
2692 2692 # fast path: for unfiltered changelog, radix tree is accurate
2693 2693 if not getattr(self, 'filteredrevs', None):
2694 2694 ambiguous = True
2695 2695 # fall through to slow path that filters hidden revisions
2696 2696 except (AttributeError, ValueError):
2697 2697 # we are pure python, or key is not hex
2698 2698 pass
2699 2699 if ambiguous:
2700 2700 raise error.AmbiguousPrefixLookupError(
2701 2701 id, self.display_id, _(b'ambiguous identifier')
2702 2702 )
2703 2703
2704 2704 if id in self._pcache:
2705 2705 return self._pcache[id]
2706 2706
2707 2707 if len(id) <= 40:
2708 2708 # hex(node)[:...]
2709 2709 l = len(id) // 2 * 2 # grab an even number of digits
2710 2710 try:
2711 2711 # we're dropping the last digit, so let's check that it's hex,
2712 2712 # to avoid the expensive computation below if it's not
2713 2713 if len(id) % 2 > 0:
2714 2714 if not (id[-1] in hexdigits):
2715 2715 return None
2716 2716 prefix = bin(id[:l])
2717 2717 except binascii.Error:
2718 2718 pass
2719 2719 else:
2720 2720 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2721 2721 nl = [
2722 2722 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2723 2723 ]
2724 2724 if self.nodeconstants.nullhex.startswith(id):
2725 2725 nl.append(self.nullid)
2726 2726 if len(nl) > 0:
2727 2727 if len(nl) == 1 and not maybewdir:
2728 2728 self._pcache[id] = nl[0]
2729 2729 return nl[0]
2730 2730 raise error.AmbiguousPrefixLookupError(
2731 2731 id, self.display_id, _(b'ambiguous identifier')
2732 2732 )
2733 2733 if maybewdir:
2734 2734 raise error.WdirUnsupported
2735 2735 return None
2736 2736
2737 2737 def lookup(self, id):
2738 2738 """locate a node based on:
2739 2739 - revision number or str(revision number)
2740 2740 - nodeid or subset of hex nodeid
2741 2741 """
2742 2742 n = self._match(id)
2743 2743 if n is not None:
2744 2744 return n
2745 2745 n = self._partialmatch(id)
2746 2746 if n:
2747 2747 return n
2748 2748
2749 2749 raise error.LookupError(id, self.display_id, _(b'no match found'))
2750 2750
2751 2751 def shortest(self, node, minlength=1):
2752 2752 """Find the shortest unambiguous prefix that matches node."""
2753 2753
2754 2754 def isvalid(prefix):
2755 2755 try:
2756 2756 matchednode = self._partialmatch(prefix)
2757 2757 except error.AmbiguousPrefixLookupError:
2758 2758 return False
2759 2759 except error.WdirUnsupported:
2760 2760 # single 'ff...' match
2761 2761 return True
2762 2762 if matchednode is None:
2763 2763 raise error.LookupError(node, self.display_id, _(b'no node'))
2764 2764 return True
2765 2765
2766 2766 def maybewdir(prefix):
2767 2767 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2768 2768
2769 2769 hexnode = hex(node)
2770 2770
2771 2771 def disambiguate(hexnode, minlength):
2772 2772 """Disambiguate against wdirid."""
2773 2773 for length in range(minlength, len(hexnode) + 1):
2774 2774 prefix = hexnode[:length]
2775 2775 if not maybewdir(prefix):
2776 2776 return prefix
2777 2777
2778 2778 if not getattr(self, 'filteredrevs', None):
2779 2779 try:
2780 2780 shortest = self.index.shortest(
2781 2781 node
2782 2782 ) # pytype: disable=attribute-error
2783 2783 length = max(shortest, minlength)
2784 2784 return disambiguate(hexnode, length)
2785 2785 except error.RevlogError:
2786 2786 if node != self.nodeconstants.wdirid:
2787 2787 raise error.LookupError(
2788 2788 node, self.display_id, _(b'no node')
2789 2789 )
2790 2790 except AttributeError:
2791 2791 # Fall through to pure code
2792 2792 pass
2793 2793
2794 2794 if node == self.nodeconstants.wdirid:
2795 2795 for length in range(minlength, len(hexnode) + 1):
2796 2796 prefix = hexnode[:length]
2797 2797 if isvalid(prefix):
2798 2798 return prefix
2799 2799
2800 2800 for length in range(minlength, len(hexnode) + 1):
2801 2801 prefix = hexnode[:length]
2802 2802 if isvalid(prefix):
2803 2803 return disambiguate(hexnode, length)
2804 2804
2805 2805 def cmp(self, node, text):
2806 2806 """compare text with a given file revision
2807 2807
2808 2808 returns True if text is different than what is stored.
2809 2809 """
2810 2810 p1, p2 = self.parents(node)
2811 2811 return storageutil.hashrevisionsha1(text, p1, p2) != node
2812 2812
2813 2813 def deltaparent(self, rev):
2814 2814 """return deltaparent of the given revision"""
2815 2815 base = self.index[rev][3]
2816 2816 if base == rev:
2817 2817 return nullrev
2818 2818 elif self.delta_config.general_delta:
2819 2819 return base
2820 2820 else:
2821 2821 return rev - 1
2822 2822
2823 2823 def issnapshot(self, rev):
2824 2824 """tells whether rev is a snapshot"""
2825 2825 ret = self._inner.issnapshot(rev)
2826 2826 self.issnapshot = self._inner.issnapshot
2827 2827 return ret
2828 2828
2829 2829 def snapshotdepth(self, rev):
2830 2830 """number of snapshot in the chain before this one"""
2831 2831 if not self.issnapshot(rev):
2832 2832 raise error.ProgrammingError(b'revision %d not a snapshot')
2833 2833 return len(self._inner._deltachain(rev)[0]) - 1
2834 2834
2835 2835 def revdiff(self, rev1, rev2):
2836 2836 """return or calculate a delta between two revisions
2837 2837
2838 2838 The delta calculated is in binary form and is intended to be written to
2839 2839 revlog data directly. So this function needs raw revision data.
2840 2840 """
2841 2841 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2842 2842 return bytes(self._inner._chunk(rev2))
2843 2843
2844 2844 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2845 2845
2846 2846 def revision(self, nodeorrev):
2847 2847 """return an uncompressed revision of a given node or revision
2848 2848 number.
2849 2849 """
2850 2850 return self._revisiondata(nodeorrev)
2851 2851
2852 2852 def sidedata(self, nodeorrev):
2853 2853 """a map of extra data related to the changeset but not part of the hash
2854 2854
2855 2855 This function currently return a dictionary. However, more advanced
2856 2856 mapping object will likely be used in the future for a more
2857 2857 efficient/lazy code.
2858 2858 """
2859 2859 # deal with <nodeorrev> argument type
2860 2860 if isinstance(nodeorrev, int):
2861 2861 rev = nodeorrev
2862 2862 else:
2863 2863 rev = self.rev(nodeorrev)
2864 2864 return self._sidedata(rev)
2865 2865
2866 2866 def _rawtext(self, node, rev):
2867 2867 """return the possibly unvalidated rawtext for a revision
2868 2868
2869 2869 returns (rev, rawtext, validated)
2870 2870 """
2871 2871 # Check if we have the entry in cache
2872 2872 # The cache entry looks like (node, rev, rawtext)
2873 2873 if self._inner._revisioncache:
2874 2874 if self._inner._revisioncache[0] == node:
2875 2875 return (rev, self._inner._revisioncache[2], True)
2876 2876
2877 2877 if rev is None:
2878 2878 rev = self.rev(node)
2879 2879
2880 2880 text = self._inner.raw_text(node, rev)
2881 2881 return (rev, text, False)
2882 2882
2883 2883 def _revisiondata(self, nodeorrev, raw=False):
2884 2884 # deal with <nodeorrev> argument type
2885 2885 if isinstance(nodeorrev, int):
2886 2886 rev = nodeorrev
2887 2887 node = self.node(rev)
2888 2888 else:
2889 2889 node = nodeorrev
2890 2890 rev = None
2891 2891
2892 2892 # fast path the special `nullid` rev
2893 2893 if node == self.nullid:
2894 2894 return b""
2895 2895
2896 2896 # ``rawtext`` is the text as stored inside the revlog. Might be the
2897 2897 # revision or might need to be processed to retrieve the revision.
2898 2898 rev, rawtext, validated = self._rawtext(node, rev)
2899 2899
2900 2900 if raw and validated:
2901 2901 # if we don't want to process the raw text and that raw
2902 2902 # text is cached, we can exit early.
2903 2903 return rawtext
2904 2904 if rev is None:
2905 2905 rev = self.rev(node)
2906 2906 # the revlog's flag for this revision
2907 2907 # (usually alter its state or content)
2908 2908 flags = self.flags(rev)
2909 2909
2910 2910 if validated and flags == REVIDX_DEFAULT_FLAGS:
2911 2911 # no extra flags set, no flag processor runs, text = rawtext
2912 2912 return rawtext
2913 2913
2914 2914 if raw:
2915 2915 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2916 2916 text = rawtext
2917 2917 else:
2918 2918 r = flagutil.processflagsread(self, rawtext, flags)
2919 2919 text, validatehash = r
2920 2920 if validatehash:
2921 2921 self.checkhash(text, node, rev=rev)
2922 2922 if not validated:
2923 2923 self._inner._revisioncache = (node, rev, rawtext)
2924 2924
2925 2925 return text
2926 2926
2927 2927 def _sidedata(self, rev):
2928 2928 """Return the sidedata for a given revision number."""
2929 2929 if self._sidedatafile is None:
2930 2930 return {}
2931 2931 sidedata_end = None
2932 2932 if self._docket is not None:
2933 2933 sidedata_end = self._docket.sidedata_end
2934 2934 return self._inner.sidedata(rev, sidedata_end)
2935 2935
2936 2936 def rawdata(self, nodeorrev):
2937 2937 """return an uncompressed raw data of a given node or revision number."""
2938 2938 return self._revisiondata(nodeorrev, raw=True)
2939 2939
2940 2940 def hash(self, text, p1, p2):
2941 2941 """Compute a node hash.
2942 2942
2943 2943 Available as a function so that subclasses can replace the hash
2944 2944 as needed.
2945 2945 """
2946 2946 return storageutil.hashrevisionsha1(text, p1, p2)
2947 2947
2948 2948 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2949 2949 """Check node hash integrity.
2950 2950
2951 2951 Available as a function so that subclasses can extend hash mismatch
2952 2952 behaviors as needed.
2953 2953 """
2954 2954 try:
2955 2955 if p1 is None and p2 is None:
2956 2956 p1, p2 = self.parents(node)
2957 2957 if node != self.hash(text, p1, p2):
2958 2958 # Clear the revision cache on hash failure. The revision cache
2959 2959 # only stores the raw revision and clearing the cache does have
2960 2960 # the side-effect that we won't have a cache hit when the raw
2961 2961 # revision data is accessed. But this case should be rare and
2962 2962 # it is extra work to teach the cache about the hash
2963 2963 # verification state.
2964 2964 if (
2965 2965 self._inner._revisioncache
2966 2966 and self._inner._revisioncache[0] == node
2967 2967 ):
2968 2968 self._inner._revisioncache = None
2969 2969
2970 2970 revornode = rev
2971 2971 if revornode is None:
2972 2972 revornode = templatefilters.short(hex(node))
2973 2973 raise error.RevlogError(
2974 2974 _(b"integrity check failed on %s:%s")
2975 2975 % (self.display_id, pycompat.bytestr(revornode))
2976 2976 )
2977 2977 except error.RevlogError:
2978 2978 if self.feature_config.censorable and storageutil.iscensoredtext(
2979 2979 text
2980 2980 ):
2981 2981 raise error.CensoredNodeError(self.display_id, node, text)
2982 2982 raise
2983 2983
2984 2984 @property
2985 2985 def _split_index_file(self):
2986 2986 """the path where to expect the index of an ongoing splitting operation
2987 2987
2988 2988 The file will only exist if a splitting operation is in progress, but
2989 2989 it is always expected at the same location."""
2990 2990 parts = self.radix.split(b'/')
2991 2991 if len(parts) > 1:
2992 2992 # adds a '-s' prefix to the ``data/` or `meta/` base
2993 2993 head = parts[0] + b'-s'
2994 2994 mids = parts[1:-1]
2995 2995 tail = parts[-1] + b'.i'
2996 2996 pieces = [head] + mids + [tail]
2997 2997 return b'/'.join(pieces)
2998 2998 else:
2999 2999 # the revlog is stored at the root of the store (changelog or
3000 3000 # manifest), no risk of collision.
3001 3001 return self.radix + b'.i.s'
3002 3002
3003 3003 def _enforceinlinesize(self, tr):
3004 3004 """Check if the revlog is too big for inline and convert if so.
3005 3005
3006 3006 This should be called after revisions are added to the revlog. If the
3007 3007 revlog has grown too large to be an inline revlog, it will convert it
3008 3008 to use multiple index and data files.
3009 3009 """
3010 3010 tiprev = len(self) - 1
3011 3011 total_size = self.start(tiprev) + self.length(tiprev)
3012 3012 if not self._inline or (self._may_inline and total_size < _maxinline):
3013 3013 return
3014 3014
3015 3015 if self._docket is not None:
3016 3016 msg = b"inline revlog should not have a docket"
3017 3017 raise error.ProgrammingError(msg)
3018 3018
3019 3019 # In the common case, we enforce inline size because the revlog has
3020 3020 # been appened too. And in such case, it must have an initial offset
3021 3021 # recorded in the transaction.
3022 3022 troffset = tr.findoffset(self._inner.canonical_index_file)
3023 3023 pre_touched = troffset is not None
3024 3024 if not pre_touched and self.target[0] != KIND_CHANGELOG:
3025 3025 raise error.RevlogError(
3026 3026 _(b"%s not found in the transaction") % self._indexfile
3027 3027 )
3028 3028
3029 3029 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
3030 3030 tr.add(self._datafile, 0)
3031 3031
3032 3032 new_index_file_path = None
3033 3033 old_index_file_path = self._indexfile
3034 3034 new_index_file_path = self._split_index_file
3035 3035 opener = self.opener
3036 3036 weak_self = weakref.ref(self)
3037 3037
3038 3038 # the "split" index replace the real index when the transaction is
3039 3039 # finalized
3040 3040 def finalize_callback(tr):
3041 3041 opener.rename(
3042 3042 new_index_file_path,
3043 3043 old_index_file_path,
3044 3044 checkambig=True,
3045 3045 )
3046 3046 maybe_self = weak_self()
3047 3047 if maybe_self is not None:
3048 3048 maybe_self._indexfile = old_index_file_path
3049 3049 maybe_self._inner.index_file = maybe_self._indexfile
3050 3050
3051 3051 def abort_callback(tr):
3052 3052 maybe_self = weak_self()
3053 3053 if maybe_self is not None:
3054 3054 maybe_self._indexfile = old_index_file_path
3055 3055 maybe_self._inner.inline = True
3056 3056 maybe_self._inner.index_file = old_index_file_path
3057 3057
3058 3058 tr.registertmp(new_index_file_path)
3059 3059 # we use 001 here to make this this happens after the finalisation of
3060 3060 # pending changelog write (using 000). Otherwise the two finalizer
3061 3061 # would step over each other and delete the changelog.i file.
3062 3062 if self.target[1] is not None:
3063 3063 callback_id = b'001-revlog-split-%d-%s' % self.target
3064 3064 else:
3065 3065 callback_id = b'001-revlog-split-%d' % self.target[0]
3066 3066 tr.addfinalize(callback_id, finalize_callback)
3067 3067 tr.addabort(callback_id, abort_callback)
3068 3068
3069 3069 self._format_flags &= ~FLAG_INLINE_DATA
3070 3070 self._inner.split_inline(
3071 3071 tr,
3072 3072 self._format_flags | self._format_version,
3073 3073 new_index_file_path=new_index_file_path,
3074 3074 )
3075 3075
3076 3076 self._inline = False
3077 3077 if new_index_file_path is not None:
3078 3078 self._indexfile = new_index_file_path
3079 3079
3080 3080 nodemaputil.setup_persistent_nodemap(tr, self)
3081 3081
3082 3082 def _nodeduplicatecallback(self, transaction, node):
3083 3083 """called when trying to add a node already stored."""
3084 3084
3085 3085 @contextlib.contextmanager
3086 3086 def reading(self):
3087 3087 with self._inner.reading():
3088 3088 yield
3089 3089
3090 3090 @contextlib.contextmanager
3091 3091 def _writing(self, transaction):
3092 3092 if self._trypending:
3093 3093 msg = b'try to write in a `trypending` revlog: %s'
3094 3094 msg %= self.display_id
3095 3095 raise error.ProgrammingError(msg)
3096 3096 if self._inner.is_writing:
3097 3097 yield
3098 3098 else:
3099 3099 data_end = None
3100 3100 sidedata_end = None
3101 3101 if self._docket is not None:
3102 3102 data_end = self._docket.data_end
3103 3103 sidedata_end = self._docket.sidedata_end
3104 3104 with self._inner.writing(
3105 3105 transaction,
3106 3106 data_end=data_end,
3107 3107 sidedata_end=sidedata_end,
3108 3108 ):
3109 3109 yield
3110 3110 if self._docket is not None:
3111 3111 self._write_docket(transaction)
3112 3112
3113 3113 @property
3114 3114 def is_delaying(self):
3115 3115 return self._inner.is_delaying
3116 3116
3117 3117 def _write_docket(self, transaction):
3118 3118 """write the current docket on disk
3119 3119
3120 3120 Exist as a method to help changelog to implement transaction logic
3121 3121
3122 3122 We could also imagine using the same transaction logic for all revlog
3123 3123 since docket are cheap."""
3124 3124 self._docket.write(transaction)
3125 3125
3126 3126 def addrevision(
3127 3127 self,
3128 3128 text,
3129 3129 transaction,
3130 3130 link,
3131 3131 p1,
3132 3132 p2,
3133 3133 cachedelta=None,
3134 3134 node=None,
3135 3135 flags=REVIDX_DEFAULT_FLAGS,
3136 3136 deltacomputer=None,
3137 3137 sidedata=None,
3138 3138 ):
3139 3139 """add a revision to the log
3140 3140
3141 3141 text - the revision data to add
3142 3142 transaction - the transaction object used for rollback
3143 3143 link - the linkrev data to add
3144 3144 p1, p2 - the parent nodeids of the revision
3145 3145 cachedelta - an optional precomputed delta
3146 3146 node - nodeid of revision; typically node is not specified, and it is
3147 3147 computed by default as hash(text, p1, p2), however subclasses might
3148 3148 use different hashing method (and override checkhash() in such case)
3149 3149 flags - the known flags to set on the revision
3150 3150 deltacomputer - an optional deltacomputer instance shared between
3151 3151 multiple calls
3152 3152 """
3153 3153 if link == nullrev:
3154 3154 raise error.RevlogError(
3155 3155 _(b"attempted to add linkrev -1 to %s") % self.display_id
3156 3156 )
3157 3157
3158 3158 if sidedata is None:
3159 3159 sidedata = {}
3160 3160 elif sidedata and not self.feature_config.has_side_data:
3161 3161 raise error.ProgrammingError(
3162 3162 _(b"trying to add sidedata to a revlog who don't support them")
3163 3163 )
3164 3164
3165 3165 if flags:
3166 3166 node = node or self.hash(text, p1, p2)
3167 3167
3168 3168 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3169 3169
3170 3170 # If the flag processor modifies the revision data, ignore any provided
3171 3171 # cachedelta.
3172 3172 if rawtext != text:
3173 3173 cachedelta = None
3174 3174
3175 3175 if len(rawtext) > _maxentrysize:
3176 3176 raise error.RevlogError(
3177 3177 _(
3178 3178 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3179 3179 )
3180 3180 % (self.display_id, len(rawtext))
3181 3181 )
3182 3182
3183 3183 node = node or self.hash(rawtext, p1, p2)
3184 3184 rev = self.index.get_rev(node)
3185 3185 if rev is not None:
3186 3186 return rev
3187 3187
3188 3188 if validatehash:
3189 3189 self.checkhash(rawtext, node, p1=p1, p2=p2)
3190 3190
3191 3191 return self.addrawrevision(
3192 3192 rawtext,
3193 3193 transaction,
3194 3194 link,
3195 3195 p1,
3196 3196 p2,
3197 3197 node,
3198 3198 flags,
3199 3199 cachedelta=cachedelta,
3200 3200 deltacomputer=deltacomputer,
3201 3201 sidedata=sidedata,
3202 3202 )
3203 3203
3204 3204 def addrawrevision(
3205 3205 self,
3206 3206 rawtext,
3207 3207 transaction,
3208 3208 link,
3209 3209 p1,
3210 3210 p2,
3211 3211 node,
3212 3212 flags,
3213 3213 cachedelta=None,
3214 3214 deltacomputer=None,
3215 3215 sidedata=None,
3216 3216 ):
3217 3217 """add a raw revision with known flags, node and parents
3218 3218 useful when reusing a revision not stored in this revlog (ex: received
3219 3219 over wire, or read from an external bundle).
3220 3220 """
3221 3221 with self._writing(transaction):
3222 3222 return self._addrevision(
3223 3223 node,
3224 3224 rawtext,
3225 3225 transaction,
3226 3226 link,
3227 3227 p1,
3228 3228 p2,
3229 3229 flags,
3230 3230 cachedelta,
3231 3231 deltacomputer=deltacomputer,
3232 3232 sidedata=sidedata,
3233 3233 )
3234 3234
3235 3235 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3236 3236 return self._inner.compress(data)
3237 3237
3238 3238 def decompress(self, data):
3239 3239 return self._inner.decompress(data)
3240 3240
3241 3241 def _addrevision(
3242 3242 self,
3243 3243 node,
3244 3244 rawtext,
3245 3245 transaction,
3246 3246 link,
3247 3247 p1,
3248 3248 p2,
3249 3249 flags,
3250 3250 cachedelta,
3251 3251 alwayscache=False,
3252 3252 deltacomputer=None,
3253 3253 sidedata=None,
3254 3254 ):
3255 3255 """internal function to add revisions to the log
3256 3256
3257 3257 see addrevision for argument descriptions.
3258 3258
3259 3259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3260 3260
3261 3261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3262 3262 be used.
3263 3263
3264 3264 invariants:
3265 3265 - rawtext is optional (can be None); if not set, cachedelta must be set.
3266 3266 if both are set, they must correspond to each other.
3267 3267 """
3268 3268 if node == self.nullid:
3269 3269 raise error.RevlogError(
3270 3270 _(b"%s: attempt to add null revision") % self.display_id
3271 3271 )
3272 3272 if (
3273 3273 node == self.nodeconstants.wdirid
3274 3274 or node in self.nodeconstants.wdirfilenodeids
3275 3275 ):
3276 3276 raise error.RevlogError(
3277 3277 _(b"%s: attempt to add wdir revision") % self.display_id
3278 3278 )
3279 3279 if not self._inner.is_writing:
3280 3280 msg = b'adding revision outside `revlog._writing` context'
3281 3281 raise error.ProgrammingError(msg)
3282 3282
3283 3283 btext = [rawtext]
3284 3284
3285 3285 curr = len(self)
3286 3286 prev = curr - 1
3287 3287
3288 3288 offset = self._get_data_offset(prev)
3289 3289
3290 3290 if self._concurrencychecker:
3291 3291 ifh, dfh, sdfh = self._inner._writinghandles
3292 3292 # XXX no checking for the sidedata file
3293 3293 if self._inline:
3294 3294 # offset is "as if" it were in the .d file, so we need to add on
3295 3295 # the size of the entry metadata.
3296 3296 self._concurrencychecker(
3297 3297 ifh, self._indexfile, offset + curr * self.index.entry_size
3298 3298 )
3299 3299 else:
3300 3300 # Entries in the .i are a consistent size.
3301 3301 self._concurrencychecker(
3302 3302 ifh, self._indexfile, curr * self.index.entry_size
3303 3303 )
3304 3304 self._concurrencychecker(dfh, self._datafile, offset)
3305 3305
3306 3306 p1r, p2r = self.rev(p1), self.rev(p2)
3307 3307
3308 3308 # full versions are inserted when the needed deltas
3309 3309 # become comparable to the uncompressed text
3310 3310 if rawtext is None:
3311 3311 # need rawtext size, before changed by flag processors, which is
3312 3312 # the non-raw size. use revlog explicitly to avoid filelog's extra
3313 3313 # logic that might remove metadata size.
3314 3314 textlen = mdiff.patchedsize(
3315 3315 revlog.size(self, cachedelta[0]), cachedelta[1]
3316 3316 )
3317 3317 else:
3318 3318 textlen = len(rawtext)
3319 3319
3320 3320 if deltacomputer is None:
3321 3321 write_debug = None
3322 3322 if self.delta_config.debug_delta:
3323 3323 write_debug = transaction._report
3324 3324 deltacomputer = deltautil.deltacomputer(
3325 3325 self, write_debug=write_debug
3326 3326 )
3327 3327
3328 3328 if cachedelta is not None and len(cachedelta) == 2:
3329 3329 # If the cached delta has no information about how it should be
3330 3330 # reused, add the default reuse instruction according to the
3331 3331 # revlog's configuration.
3332 3332 if (
3333 3333 self.delta_config.general_delta
3334 3334 and self.delta_config.lazy_delta_base
3335 3335 ):
3336 3336 delta_base_reuse = DELTA_BASE_REUSE_TRY
3337 3337 else:
3338 3338 delta_base_reuse = DELTA_BASE_REUSE_NO
3339 3339 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3340 3340
3341 3341 revinfo = revlogutils.revisioninfo(
3342 3342 node,
3343 3343 p1,
3344 3344 p2,
3345 3345 btext,
3346 3346 textlen,
3347 3347 cachedelta,
3348 3348 flags,
3349 3349 )
3350 3350
3351 3351 deltainfo = deltacomputer.finddeltainfo(revinfo)
3352 3352
3353 3353 compression_mode = COMP_MODE_INLINE
3354 3354 if self._docket is not None:
3355 3355 default_comp = self._docket.default_compression_header
3356 3356 r = deltautil.delta_compression(default_comp, deltainfo)
3357 3357 compression_mode, deltainfo = r
3358 3358
3359 3359 sidedata_compression_mode = COMP_MODE_INLINE
3360 3360 if sidedata and self.feature_config.has_side_data:
3361 3361 sidedata_compression_mode = COMP_MODE_PLAIN
3362 3362 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3363 3363 sidedata_offset = self._docket.sidedata_end
3364 3364 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3365 3365 if (
3366 3366 h != b'u'
3367 3367 and comp_sidedata[0:1] != b'\0'
3368 3368 and len(comp_sidedata) < len(serialized_sidedata)
3369 3369 ):
3370 3370 assert not h
3371 3371 if (
3372 3372 comp_sidedata[0:1]
3373 3373 == self._docket.default_compression_header
3374 3374 ):
3375 3375 sidedata_compression_mode = COMP_MODE_DEFAULT
3376 3376 serialized_sidedata = comp_sidedata
3377 3377 else:
3378 3378 sidedata_compression_mode = COMP_MODE_INLINE
3379 3379 serialized_sidedata = comp_sidedata
3380 3380 else:
3381 3381 serialized_sidedata = b""
3382 3382 # Don't store the offset if the sidedata is empty, that way
3383 3383 # we can easily detect empty sidedata and they will be no different
3384 3384 # than ones we manually add.
3385 3385 sidedata_offset = 0
3386 3386
3387 3387 rank = RANK_UNKNOWN
3388 3388 if self.feature_config.compute_rank:
3389 3389 if (p1r, p2r) == (nullrev, nullrev):
3390 3390 rank = 1
3391 3391 elif p1r != nullrev and p2r == nullrev:
3392 3392 rank = 1 + self.fast_rank(p1r)
3393 3393 elif p1r == nullrev and p2r != nullrev:
3394 3394 rank = 1 + self.fast_rank(p2r)
3395 3395 else: # merge node
3396 3396 if rustdagop is not None and self.index.rust_ext_compat:
3397 3397 rank = rustdagop.rank(self.index, p1r, p2r)
3398 3398 else:
3399 3399 pmin, pmax = sorted((p1r, p2r))
3400 3400 rank = 1 + self.fast_rank(pmax)
3401 3401 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3402 3402
3403 3403 e = revlogutils.entry(
3404 3404 flags=flags,
3405 3405 data_offset=offset,
3406 3406 data_compressed_length=deltainfo.deltalen,
3407 3407 data_uncompressed_length=textlen,
3408 3408 data_compression_mode=compression_mode,
3409 3409 data_delta_base=deltainfo.base,
3410 3410 link_rev=link,
3411 3411 parent_rev_1=p1r,
3412 3412 parent_rev_2=p2r,
3413 3413 node_id=node,
3414 3414 sidedata_offset=sidedata_offset,
3415 3415 sidedata_compressed_length=len(serialized_sidedata),
3416 3416 sidedata_compression_mode=sidedata_compression_mode,
3417 3417 rank=rank,
3418 3418 )
3419 3419
3420 3420 self.index.append(e)
3421 3421 entry = self.index.entry_binary(curr)
3422 3422 if curr == 0 and self._docket is None:
3423 3423 header = self._format_flags | self._format_version
3424 3424 header = self.index.pack_header(header)
3425 3425 entry = header + entry
3426 3426 self._writeentry(
3427 3427 transaction,
3428 3428 entry,
3429 3429 deltainfo.data,
3430 3430 link,
3431 3431 offset,
3432 3432 serialized_sidedata,
3433 3433 sidedata_offset,
3434 3434 )
3435 3435
3436 3436 rawtext = btext[0]
3437 3437
3438 3438 if alwayscache and rawtext is None:
3439 3439 rawtext = deltacomputer.buildtext(revinfo)
3440 3440
3441 3441 if type(rawtext) == bytes: # only accept immutable objects
3442 3442 self._inner._revisioncache = (node, curr, rawtext)
3443 3443 self._chainbasecache[curr] = deltainfo.chainbase
3444 3444 return curr
3445 3445
3446 3446 def _get_data_offset(self, prev):
3447 3447 """Returns the current offset in the (in-transaction) data file.
3448 3448 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3449 3449 file to store that information: since sidedata can be rewritten to the
3450 3450 end of the data file within a transaction, you can have cases where, for
3451 3451 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3452 3452 to `n - 1`'s sidedata being written after `n`'s data.
3453 3453
3454 3454 TODO cache this in a docket file before getting out of experimental."""
3455 3455 if self._docket is None:
3456 3456 return self.end(prev)
3457 3457 else:
3458 3458 return self._docket.data_end
3459 3459
3460 3460 def _writeentry(
3461 3461 self,
3462 3462 transaction,
3463 3463 entry,
3464 3464 data,
3465 3465 link,
3466 3466 offset,
3467 3467 sidedata,
3468 3468 sidedata_offset,
3469 3469 ):
3470 3470 # Files opened in a+ mode have inconsistent behavior on various
3471 3471 # platforms. Windows requires that a file positioning call be made
3472 3472 # when the file handle transitions between reads and writes. See
3473 3473 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3474 3474 # platforms, Python or the platform itself can be buggy. Some versions
3475 3475 # of Solaris have been observed to not append at the end of the file
3476 3476 # if the file was seeked to before the end. See issue4943 for more.
3477 3477 #
3478 3478 # We work around this issue by inserting a seek() before writing.
3479 3479 # Note: This is likely not necessary on Python 3. However, because
3480 3480 # the file handle is reused for reads and may be seeked there, we need
3481 3481 # to be careful before changing this.
3482 3482 index_end = data_end = sidedata_end = None
3483 3483 if self._docket is not None:
3484 3484 index_end = self._docket.index_end
3485 3485 data_end = self._docket.data_end
3486 3486 sidedata_end = self._docket.sidedata_end
3487 3487
3488 3488 files_end = self._inner.write_entry(
3489 3489 transaction,
3490 3490 entry,
3491 3491 data,
3492 3492 link,
3493 3493 offset,
3494 3494 sidedata,
3495 3495 sidedata_offset,
3496 3496 index_end,
3497 3497 data_end,
3498 3498 sidedata_end,
3499 3499 )
3500 3500 self._enforceinlinesize(transaction)
3501 3501 if self._docket is not None:
3502 3502 self._docket.index_end = files_end[0]
3503 3503 self._docket.data_end = files_end[1]
3504 3504 self._docket.sidedata_end = files_end[2]
3505 3505
3506 3506 nodemaputil.setup_persistent_nodemap(transaction, self)
3507 3507
3508 3508 def addgroup(
3509 3509 self,
3510 3510 deltas,
3511 3511 linkmapper,
3512 3512 transaction,
3513 3513 alwayscache=False,
3514 3514 addrevisioncb=None,
3515 3515 duplicaterevisioncb=None,
3516 3516 debug_info=None,
3517 3517 delta_base_reuse_policy=None,
3518 3518 ):
3519 3519 """
3520 3520 add a delta group
3521 3521
3522 3522 given a set of deltas, add them to the revision log. the
3523 3523 first delta is against its parent, which should be in our
3524 3524 log, the rest are against the previous delta.
3525 3525
3526 3526 If ``addrevisioncb`` is defined, it will be called with arguments of
3527 3527 this revlog and the node that was added.
3528 3528 """
3529 3529
3530 3530 if self._adding_group:
3531 3531 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3532 3532
3533 3533 # read the default delta-base reuse policy from revlog config if the
3534 3534 # group did not specify one.
3535 3535 if delta_base_reuse_policy is None:
3536 3536 if (
3537 3537 self.delta_config.general_delta
3538 3538 and self.delta_config.lazy_delta_base
3539 3539 ):
3540 3540 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3541 3541 else:
3542 3542 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3543 3543
3544 3544 self._adding_group = True
3545 3545 empty = True
3546 3546 try:
3547 3547 with self._writing(transaction):
3548 3548 write_debug = None
3549 3549 if self.delta_config.debug_delta:
3550 3550 write_debug = transaction._report
3551 3551 deltacomputer = deltautil.deltacomputer(
3552 3552 self,
3553 3553 write_debug=write_debug,
3554 3554 debug_info=debug_info,
3555 3555 )
3556 3556 # loop through our set of deltas
3557 3557 for data in deltas:
3558 3558 (
3559 3559 node,
3560 3560 p1,
3561 3561 p2,
3562 3562 linknode,
3563 3563 deltabase,
3564 3564 delta,
3565 3565 flags,
3566 3566 sidedata,
3567 3567 ) = data
3568 3568 link = linkmapper(linknode)
3569 3569 flags = flags or REVIDX_DEFAULT_FLAGS
3570 3570
3571 3571 rev = self.index.get_rev(node)
3572 3572 if rev is not None:
3573 3573 # this can happen if two branches make the same change
3574 3574 self._nodeduplicatecallback(transaction, rev)
3575 3575 if duplicaterevisioncb:
3576 3576 duplicaterevisioncb(self, rev)
3577 3577 empty = False
3578 3578 continue
3579 3579
3580 3580 for p in (p1, p2):
3581 3581 if not self.index.has_node(p):
3582 3582 raise error.LookupError(
3583 3583 p, self.radix, _(b'unknown parent')
3584 3584 )
3585 3585
3586 3586 if not self.index.has_node(deltabase):
3587 3587 raise error.LookupError(
3588 3588 deltabase, self.display_id, _(b'unknown delta base')
3589 3589 )
3590 3590
3591 3591 baserev = self.rev(deltabase)
3592 3592
3593 3593 if baserev != nullrev and self.iscensored(baserev):
3594 3594 # if base is censored, delta must be full replacement in a
3595 3595 # single patch operation
3596 3596 hlen = struct.calcsize(b">lll")
3597 3597 oldlen = self.rawsize(baserev)
3598 3598 newlen = len(delta) - hlen
3599 3599 if delta[:hlen] != mdiff.replacediffheader(
3600 3600 oldlen, newlen
3601 3601 ):
3602 3602 raise error.CensoredBaseError(
3603 3603 self.display_id, self.node(baserev)
3604 3604 )
3605 3605
3606 3606 if not flags and self._peek_iscensored(baserev, delta):
3607 3607 flags |= REVIDX_ISCENSORED
3608 3608
3609 3609 # We assume consumers of addrevisioncb will want to retrieve
3610 3610 # the added revision, which will require a call to
3611 3611 # revision(). revision() will fast path if there is a cache
3612 3612 # hit. So, we tell _addrevision() to always cache in this case.
3613 3613 # We're only using addgroup() in the context of changegroup
3614 3614 # generation so the revision data can always be handled as raw
3615 3615 # by the flagprocessor.
3616 3616 rev = self._addrevision(
3617 3617 node,
3618 3618 None,
3619 3619 transaction,
3620 3620 link,
3621 3621 p1,
3622 3622 p2,
3623 3623 flags,
3624 3624 (baserev, delta, delta_base_reuse_policy),
3625 3625 alwayscache=alwayscache,
3626 3626 deltacomputer=deltacomputer,
3627 3627 sidedata=sidedata,
3628 3628 )
3629 3629
3630 3630 if addrevisioncb:
3631 3631 addrevisioncb(self, rev)
3632 3632 empty = False
3633 3633 finally:
3634 3634 self._adding_group = False
3635 3635 return not empty
3636 3636
3637 3637 def iscensored(self, rev):
3638 3638 """Check if a file revision is censored."""
3639 3639 if not self.feature_config.censorable:
3640 3640 return False
3641 3641
3642 3642 return self.flags(rev) & REVIDX_ISCENSORED
3643 3643
3644 3644 def _peek_iscensored(self, baserev, delta):
3645 3645 """Quickly check if a delta produces a censored revision."""
3646 3646 if not self.feature_config.censorable:
3647 3647 return False
3648 3648
3649 3649 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3650 3650
3651 3651 def getstrippoint(self, minlink):
3652 3652 """find the minimum rev that must be stripped to strip the linkrev
3653 3653
3654 3654 Returns a tuple containing the minimum rev and a set of all revs that
3655 3655 have linkrevs that will be broken by this strip.
3656 3656 """
3657 3657 return storageutil.resolvestripinfo(
3658 3658 minlink,
3659 3659 len(self) - 1,
3660 3660 self.headrevs(),
3661 3661 self.linkrev,
3662 3662 self.parentrevs,
3663 3663 )
3664 3664
3665 3665 def strip(self, minlink, transaction):
3666 3666 """truncate the revlog on the first revision with a linkrev >= minlink
3667 3667
3668 3668 This function is called when we're stripping revision minlink and
3669 3669 its descendants from the repository.
3670 3670
3671 3671 We have to remove all revisions with linkrev >= minlink, because
3672 3672 the equivalent changelog revisions will be renumbered after the
3673 3673 strip.
3674 3674
3675 3675 So we truncate the revlog on the first of these revisions, and
3676 3676 trust that the caller has saved the revisions that shouldn't be
3677 3677 removed and that it'll re-add them after this truncation.
3678 3678 """
3679 3679 if len(self) == 0:
3680 3680 return
3681 3681
3682 3682 rev, _ = self.getstrippoint(minlink)
3683 3683 if rev == len(self):
3684 3684 return
3685 3685
3686 3686 # first truncate the files on disk
3687 3687 data_end = self.start(rev)
3688 3688 if not self._inline:
3689 3689 transaction.add(self._datafile, data_end)
3690 3690 end = rev * self.index.entry_size
3691 3691 else:
3692 3692 end = data_end + (rev * self.index.entry_size)
3693 3693
3694 3694 if self._sidedatafile:
3695 3695 sidedata_end = self.sidedata_cut_off(rev)
3696 3696 transaction.add(self._sidedatafile, sidedata_end)
3697 3697
3698 3698 transaction.add(self._indexfile, end)
3699 3699 if self._docket is not None:
3700 3700 # XXX we could, leverage the docket while stripping. However it is
3701 3701 # not powerfull enough at the time of this comment
3702 3702 self._docket.index_end = end
3703 3703 self._docket.data_end = data_end
3704 3704 self._docket.sidedata_end = sidedata_end
3705 3705 self._docket.write(transaction, stripping=True)
3706 3706
3707 3707 # then reset internal state in memory to forget those revisions
3708 3708 self._chaininfocache = util.lrucachedict(500)
3709 3709 self._inner.clear_cache()
3710 3710
3711 3711 del self.index[rev:-1]
3712 3712
3713 3713 def checksize(self):
3714 3714 """Check size of index and data files
3715 3715
3716 3716 return a (dd, di) tuple.
3717 3717 - dd: extra bytes for the "data" file
3718 3718 - di: extra bytes for the "index" file
3719 3719
3720 3720 A healthy revlog will return (0, 0).
3721 3721 """
3722 3722 expected = 0
3723 3723 if len(self):
3724 3724 expected = max(0, self.end(len(self) - 1))
3725 3725
3726 3726 try:
3727 3727 with self._datafp() as f:
3728 3728 f.seek(0, io.SEEK_END)
3729 3729 actual = f.tell()
3730 3730 dd = actual - expected
3731 3731 except FileNotFoundError:
3732 3732 dd = 0
3733 3733
3734 3734 try:
3735 3735 f = self.opener(self._indexfile)
3736 3736 f.seek(0, io.SEEK_END)
3737 3737 actual = f.tell()
3738 3738 f.close()
3739 3739 s = self.index.entry_size
3740 3740 i = max(0, actual // s)
3741 3741 di = actual - (i * s)
3742 3742 if self._inline:
3743 3743 databytes = 0
3744 3744 for r in self:
3745 3745 databytes += max(0, self.length(r))
3746 3746 dd = 0
3747 3747 di = actual - len(self) * s - databytes
3748 3748 except FileNotFoundError:
3749 3749 di = 0
3750 3750
3751 3751 return (dd, di)
3752 3752
3753 3753 def files(self):
3754 3754 """return list of files that compose this revlog"""
3755 3755 res = [self._indexfile]
3756 3756 if self._docket_file is None:
3757 3757 if not self._inline:
3758 3758 res.append(self._datafile)
3759 3759 else:
3760 3760 res.append(self._docket_file)
3761 3761 res.extend(self._docket.old_index_filepaths(include_empty=False))
3762 3762 if self._docket.data_end:
3763 3763 res.append(self._datafile)
3764 3764 res.extend(self._docket.old_data_filepaths(include_empty=False))
3765 3765 if self._docket.sidedata_end:
3766 3766 res.append(self._sidedatafile)
3767 3767 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3768 3768 return res
3769 3769
3770 3770 def emitrevisions(
3771 3771 self,
3772 3772 nodes,
3773 3773 nodesorder=None,
3774 3774 revisiondata=False,
3775 3775 assumehaveparentrevisions=False,
3776 3776 deltamode=repository.CG_DELTAMODE_STD,
3777 3777 sidedata_helpers=None,
3778 3778 debug_info=None,
3779 3779 ):
3780 3780 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3781 3781 raise error.ProgrammingError(
3782 3782 b'unhandled value for nodesorder: %s' % nodesorder
3783 3783 )
3784 3784
3785 3785 if nodesorder is None and not self.delta_config.general_delta:
3786 3786 nodesorder = b'storage'
3787 3787
3788 3788 if (
3789 3789 not self._storedeltachains
3790 3790 and deltamode != repository.CG_DELTAMODE_PREV
3791 3791 ):
3792 3792 deltamode = repository.CG_DELTAMODE_FULL
3793 3793
3794 3794 return storageutil.emitrevisions(
3795 3795 self,
3796 3796 nodes,
3797 3797 nodesorder,
3798 3798 revlogrevisiondelta,
3799 3799 deltaparentfn=self.deltaparent,
3800 3800 candeltafn=self._candelta,
3801 3801 rawsizefn=self.rawsize,
3802 3802 revdifffn=self.revdiff,
3803 3803 flagsfn=self.flags,
3804 3804 deltamode=deltamode,
3805 3805 revisiondata=revisiondata,
3806 3806 assumehaveparentrevisions=assumehaveparentrevisions,
3807 3807 sidedata_helpers=sidedata_helpers,
3808 3808 debug_info=debug_info,
3809 3809 )
3810 3810
3811 3811 DELTAREUSEALWAYS = b'always'
3812 3812 DELTAREUSESAMEREVS = b'samerevs'
3813 3813 DELTAREUSENEVER = b'never'
3814 3814
3815 3815 DELTAREUSEFULLADD = b'fulladd'
3816 3816
3817 3817 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3818 3818
3819 3819 def clone(
3820 3820 self,
3821 3821 tr,
3822 3822 destrevlog,
3823 3823 addrevisioncb=None,
3824 3824 deltareuse=DELTAREUSESAMEREVS,
3825 3825 forcedeltabothparents=None,
3826 3826 sidedata_helpers=None,
3827 3827 ):
3828 3828 """Copy this revlog to another, possibly with format changes.
3829 3829
3830 3830 The destination revlog will contain the same revisions and nodes.
3831 3831 However, it may not be bit-for-bit identical due to e.g. delta encoding
3832 3832 differences.
3833 3833
3834 3834 The ``deltareuse`` argument control how deltas from the existing revlog
3835 3835 are preserved in the destination revlog. The argument can have the
3836 3836 following values:
3837 3837
3838 3838 DELTAREUSEALWAYS
3839 3839 Deltas will always be reused (if possible), even if the destination
3840 3840 revlog would not select the same revisions for the delta. This is the
3841 3841 fastest mode of operation.
3842 3842 DELTAREUSESAMEREVS
3843 3843 Deltas will be reused if the destination revlog would pick the same
3844 3844 revisions for the delta. This mode strikes a balance between speed
3845 3845 and optimization.
3846 3846 DELTAREUSENEVER
3847 3847 Deltas will never be reused. This is the slowest mode of execution.
3848 3848 This mode can be used to recompute deltas (e.g. if the diff/delta
3849 3849 algorithm changes).
3850 3850 DELTAREUSEFULLADD
3851 3851 Revision will be re-added as if their were new content. This is
3852 3852 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3853 3853 eg: large file detection and handling.
3854 3854
3855 3855 Delta computation can be slow, so the choice of delta reuse policy can
3856 3856 significantly affect run time.
3857 3857
3858 3858 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3859 3859 two extremes. Deltas will be reused if they are appropriate. But if the
3860 3860 delta could choose a better revision, it will do so. This means if you
3861 3861 are converting a non-generaldelta revlog to a generaldelta revlog,
3862 3862 deltas will be recomputed if the delta's parent isn't a parent of the
3863 3863 revision.
3864 3864
3865 3865 In addition to the delta policy, the ``forcedeltabothparents``
3866 3866 argument controls whether to force compute deltas against both parents
3867 3867 for merges. By default, the current default is used.
3868 3868
3869 3869 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3870 3870 `sidedata_helpers`.
3871 3871 """
3872 3872 if deltareuse not in self.DELTAREUSEALL:
3873 3873 raise ValueError(
3874 3874 _(b'value for deltareuse invalid: %s') % deltareuse
3875 3875 )
3876 3876
3877 3877 if len(destrevlog):
3878 3878 raise ValueError(_(b'destination revlog is not empty'))
3879 3879
3880 3880 if getattr(self, 'filteredrevs', None):
3881 3881 raise ValueError(_(b'source revlog has filtered revisions'))
3882 3882 if getattr(destrevlog, 'filteredrevs', None):
3883 3883 raise ValueError(_(b'destination revlog has filtered revisions'))
3884 3884
3885 3885 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3886 3886 # if possible.
3887 3887 old_delta_config = destrevlog.delta_config
3888 3888 destrevlog.delta_config = destrevlog.delta_config.copy()
3889 3889
3890 3890 try:
3891 3891 if deltareuse == self.DELTAREUSEALWAYS:
3892 3892 destrevlog.delta_config.lazy_delta_base = True
3893 3893 destrevlog.delta_config.lazy_delta = True
3894 3894 elif deltareuse == self.DELTAREUSESAMEREVS:
3895 3895 destrevlog.delta_config.lazy_delta_base = False
3896 3896 destrevlog.delta_config.lazy_delta = True
3897 3897 elif deltareuse == self.DELTAREUSENEVER:
3898 3898 destrevlog.delta_config.lazy_delta_base = False
3899 3899 destrevlog.delta_config.lazy_delta = False
3900 3900
3901 3901 delta_both_parents = (
3902 3902 forcedeltabothparents or old_delta_config.delta_both_parents
3903 3903 )
3904 3904 destrevlog.delta_config.delta_both_parents = delta_both_parents
3905 3905
3906 3906 with self.reading(), destrevlog._writing(tr):
3907 3907 self._clone(
3908 3908 tr,
3909 3909 destrevlog,
3910 3910 addrevisioncb,
3911 3911 deltareuse,
3912 3912 forcedeltabothparents,
3913 3913 sidedata_helpers,
3914 3914 )
3915 3915
3916 3916 finally:
3917 3917 destrevlog.delta_config = old_delta_config
3918 3918
3919 3919 def _clone(
3920 3920 self,
3921 3921 tr,
3922 3922 destrevlog,
3923 3923 addrevisioncb,
3924 3924 deltareuse,
3925 3925 forcedeltabothparents,
3926 3926 sidedata_helpers,
3927 3927 ):
3928 3928 """perform the core duty of `revlog.clone` after parameter processing"""
3929 3929 write_debug = None
3930 3930 if self.delta_config.debug_delta:
3931 3931 write_debug = tr._report
3932 3932 deltacomputer = deltautil.deltacomputer(
3933 3933 destrevlog,
3934 3934 write_debug=write_debug,
3935 3935 )
3936 3936 index = self.index
3937 3937 for rev in self:
3938 3938 entry = index[rev]
3939 3939
3940 3940 # Some classes override linkrev to take filtered revs into
3941 3941 # account. Use raw entry from index.
3942 3942 flags = entry[0] & 0xFFFF
3943 3943 linkrev = entry[4]
3944 3944 p1 = index[entry[5]][7]
3945 3945 p2 = index[entry[6]][7]
3946 3946 node = entry[7]
3947 3947
3948 3948 # (Possibly) reuse the delta from the revlog if allowed and
3949 3949 # the revlog chunk is a delta.
3950 3950 cachedelta = None
3951 3951 rawtext = None
3952 3952 if deltareuse == self.DELTAREUSEFULLADD:
3953 3953 text = self._revisiondata(rev)
3954 3954 sidedata = self.sidedata(rev)
3955 3955
3956 3956 if sidedata_helpers is not None:
3957 3957 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3958 3958 self, sidedata_helpers, sidedata, rev
3959 3959 )
3960 3960 flags = flags | new_flags[0] & ~new_flags[1]
3961 3961
3962 3962 destrevlog.addrevision(
3963 3963 text,
3964 3964 tr,
3965 3965 linkrev,
3966 3966 p1,
3967 3967 p2,
3968 3968 cachedelta=cachedelta,
3969 3969 node=node,
3970 3970 flags=flags,
3971 3971 deltacomputer=deltacomputer,
3972 3972 sidedata=sidedata,
3973 3973 )
3974 3974 else:
3975 3975 if destrevlog.delta_config.lazy_delta:
3976 3976 dp = self.deltaparent(rev)
3977 3977 if dp != nullrev:
3978 3978 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3979 3979
3980 3980 sidedata = None
3981 3981 if not cachedelta:
3982 3982 try:
3983 3983 rawtext = self._revisiondata(rev)
3984 3984 except error.CensoredNodeError as censored:
3985 3985 assert flags & REVIDX_ISCENSORED
3986 3986 rawtext = censored.tombstone
3987 3987 sidedata = self.sidedata(rev)
3988 3988 if sidedata is None:
3989 3989 sidedata = self.sidedata(rev)
3990 3990
3991 3991 if sidedata_helpers is not None:
3992 3992 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3993 3993 self, sidedata_helpers, sidedata, rev
3994 3994 )
3995 3995 flags = flags | new_flags[0] & ~new_flags[1]
3996 3996
3997 3997 destrevlog._addrevision(
3998 3998 node,
3999 3999 rawtext,
4000 4000 tr,
4001 4001 linkrev,
4002 4002 p1,
4003 4003 p2,
4004 4004 flags,
4005 4005 cachedelta,
4006 4006 deltacomputer=deltacomputer,
4007 4007 sidedata=sidedata,
4008 4008 )
4009 4009
4010 4010 if addrevisioncb:
4011 4011 addrevisioncb(self, rev, node)
4012 4012
4013 4013 def censorrevision(self, tr, censor_nodes, tombstone=b''):
4014 4014 if self._format_version == REVLOGV0:
4015 4015 raise error.RevlogError(
4016 4016 _(b'cannot censor with version %d revlogs')
4017 4017 % self._format_version
4018 4018 )
4019 4019 elif self._format_version == REVLOGV1:
4020 4020 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
4021 4021 else:
4022 4022 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
4023 4023
4024 4024 def verifyintegrity(self, state) -> Iterable[revlogproblem]:
4025 4025 """Verifies the integrity of the revlog.
4026 4026
4027 4027 Yields ``revlogproblem`` instances describing problems that are
4028 4028 found.
4029 4029 """
4030 4030 dd, di = self.checksize()
4031 4031 if dd:
4032 4032 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
4033 4033 if di:
4034 4034 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
4035 4035
4036 4036 version = self._format_version
4037 4037
4038 4038 # The verifier tells us what version revlog we should be.
4039 4039 if version != state[b'expectedversion']:
4040 4040 yield revlogproblem(
4041 4041 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
4042 4042 % (self.display_id, version, state[b'expectedversion'])
4043 4043 )
4044 4044
4045 4045 state[b'skipread'] = set()
4046 4046 state[b'safe_renamed'] = set()
4047 4047
4048 4048 for rev in self:
4049 4049 node = self.node(rev)
4050 4050
4051 4051 # Verify contents. 4 cases to care about:
4052 4052 #
4053 4053 # common: the most common case
4054 4054 # rename: with a rename
4055 4055 # meta: file content starts with b'\1\n', the metadata
4056 4056 # header defined in filelog.py, but without a rename
4057 4057 # ext: content stored externally
4058 4058 #
4059 4059 # More formally, their differences are shown below:
4060 4060 #
4061 4061 # | common | rename | meta | ext
4062 4062 # -------------------------------------------------------
4063 4063 # flags() | 0 | 0 | 0 | not 0
4064 4064 # renamed() | False | True | False | ?
4065 4065 # rawtext[0:2]=='\1\n'| False | True | True | ?
4066 4066 #
4067 4067 # "rawtext" means the raw text stored in revlog data, which
4068 4068 # could be retrieved by "rawdata(rev)". "text"
4069 4069 # mentioned below is "revision(rev)".
4070 4070 #
4071 4071 # There are 3 different lengths stored physically:
4072 4072 # 1. L1: rawsize, stored in revlog index
4073 4073 # 2. L2: len(rawtext), stored in revlog data
4074 4074 # 3. L3: len(text), stored in revlog data if flags==0, or
4075 4075 # possibly somewhere else if flags!=0
4076 4076 #
4077 4077 # L1 should be equal to L2. L3 could be different from them.
4078 4078 # "text" may or may not affect commit hash depending on flag
4079 4079 # processors (see flagutil.addflagprocessor).
4080 4080 #
4081 4081 # | common | rename | meta | ext
4082 4082 # -------------------------------------------------
4083 4083 # rawsize() | L1 | L1 | L1 | L1
4084 4084 # size() | L1 | L2-LM | L1(*) | L1 (?)
4085 4085 # len(rawtext) | L2 | L2 | L2 | L2
4086 4086 # len(text) | L2 | L2 | L2 | L3
4087 4087 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4088 4088 #
4089 4089 # LM: length of metadata, depending on rawtext
4090 4090 # (*): not ideal, see comment in filelog.size
4091 4091 # (?): could be "- len(meta)" if the resolved content has
4092 4092 # rename metadata
4093 4093 #
4094 4094 # Checks needed to be done:
4095 4095 # 1. length check: L1 == L2, in all cases.
4096 4096 # 2. hash check: depending on flag processor, we may need to
4097 4097 # use either "text" (external), or "rawtext" (in revlog).
4098 4098
4099 4099 try:
4100 4100 skipflags = state.get(b'skipflags', 0)
4101 4101 if skipflags:
4102 4102 skipflags &= self.flags(rev)
4103 4103
4104 4104 _verify_revision(self, skipflags, state, node)
4105 4105
4106 4106 l1 = self.rawsize(rev)
4107 4107 l2 = len(self.rawdata(node))
4108 4108
4109 4109 if l1 != l2:
4110 4110 yield revlogproblem(
4111 4111 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4112 4112 node=node,
4113 4113 )
4114 4114
4115 4115 except error.CensoredNodeError:
4116 4116 if state[b'erroroncensored']:
4117 4117 yield revlogproblem(
4118 4118 error=_(b'censored file data'), node=node
4119 4119 )
4120 4120 state[b'skipread'].add(node)
4121 4121 except Exception as e:
4122 4122 yield revlogproblem(
4123 4123 error=_(b'unpacking %s: %s')
4124 4124 % (short(node), stringutil.forcebytestr(e)),
4125 4125 node=node,
4126 4126 )
4127 4127 state[b'skipread'].add(node)
4128 4128
4129 4129 def storageinfo(
4130 4130 self,
4131 4131 exclusivefiles=False,
4132 4132 sharedfiles=False,
4133 4133 revisionscount=False,
4134 4134 trackedsize=False,
4135 4135 storedsize=False,
4136 4136 ):
4137 4137 d = {}
4138 4138
4139 4139 if exclusivefiles:
4140 4140 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4141 4141 if not self._inline:
4142 4142 d[b'exclusivefiles'].append((self.opener, self._datafile))
4143 4143
4144 4144 if sharedfiles:
4145 4145 d[b'sharedfiles'] = []
4146 4146
4147 4147 if revisionscount:
4148 4148 d[b'revisionscount'] = len(self)
4149 4149
4150 4150 if trackedsize:
4151 4151 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4152 4152
4153 4153 if storedsize:
4154 4154 d[b'storedsize'] = sum(
4155 4155 self.opener.stat(path).st_size for path in self.files()
4156 4156 )
4157 4157
4158 4158 return d
4159 4159
4160 4160 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4161 4161 if not self.feature_config.has_side_data:
4162 4162 return
4163 4163 # revlog formats with sidedata support does not support inline
4164 4164 assert not self._inline
4165 4165 if not helpers[1] and not helpers[2]:
4166 4166 # Nothing to generate or remove
4167 4167 return
4168 4168
4169 4169 new_entries = []
4170 4170 # append the new sidedata
4171 4171 with self._writing(transaction):
4172 4172 ifh, dfh, sdfh = self._inner._writinghandles
4173 4173 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4174 4174
4175 4175 current_offset = sdfh.tell()
4176 4176 for rev in range(startrev, endrev + 1):
4177 4177 entry = self.index[rev]
4178 4178 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4179 4179 store=self,
4180 4180 sidedata_helpers=helpers,
4181 4181 sidedata={},
4182 4182 rev=rev,
4183 4183 )
4184 4184
4185 4185 serialized_sidedata = sidedatautil.serialize_sidedata(
4186 4186 new_sidedata
4187 4187 )
4188 4188
4189 4189 sidedata_compression_mode = COMP_MODE_INLINE
4190 4190 if serialized_sidedata and self.feature_config.has_side_data:
4191 4191 sidedata_compression_mode = COMP_MODE_PLAIN
4192 4192 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4193 4193 if (
4194 4194 h != b'u'
4195 4195 and comp_sidedata[0] != b'\0'
4196 4196 and len(comp_sidedata) < len(serialized_sidedata)
4197 4197 ):
4198 4198 assert not h
4199 4199 if (
4200 4200 comp_sidedata[0]
4201 4201 == self._docket.default_compression_header
4202 4202 ):
4203 4203 sidedata_compression_mode = COMP_MODE_DEFAULT
4204 4204 serialized_sidedata = comp_sidedata
4205 4205 else:
4206 4206 sidedata_compression_mode = COMP_MODE_INLINE
4207 4207 serialized_sidedata = comp_sidedata
4208 4208 if entry[8] != 0 or entry[9] != 0:
4209 4209 # rewriting entries that already have sidedata is not
4210 4210 # supported yet, because it introduces garbage data in the
4211 4211 # revlog.
4212 4212 msg = b"rewriting existing sidedata is not supported yet"
4213 4213 raise error.Abort(msg)
4214 4214
4215 4215 # Apply (potential) flags to add and to remove after running
4216 4216 # the sidedata helpers
4217 4217 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4218 4218 entry_update = (
4219 4219 current_offset,
4220 4220 len(serialized_sidedata),
4221 4221 new_offset_flags,
4222 4222 sidedata_compression_mode,
4223 4223 )
4224 4224
4225 4225 # the sidedata computation might have move the file cursors around
4226 4226 sdfh.seek(current_offset, os.SEEK_SET)
4227 4227 sdfh.write(serialized_sidedata)
4228 4228 new_entries.append(entry_update)
4229 4229 current_offset += len(serialized_sidedata)
4230 4230 self._docket.sidedata_end = sdfh.tell()
4231 4231
4232 4232 # rewrite the new index entries
4233 4233 ifh.seek(startrev * self.index.entry_size)
4234 4234 for i, e in enumerate(new_entries):
4235 4235 rev = startrev + i
4236 4236 self.index.replace_sidedata_info(
4237 4237 rev, *e
4238 4238 ) # pytype: disable=attribute-error
4239 4239 packed = self.index.entry_binary(rev)
4240 4240 if rev == 0 and self._docket is None:
4241 4241 header = self._format_flags | self._format_version
4242 4242 header = self.index.pack_header(header)
4243 4243 packed = header + packed
4244 4244 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now