##// END OF EJS Templates
typing: add a few type hints to `mercurial/revlog.py`...
Matt Harbison -
r52570:df6ce326 default
parent child Browse files
Show More
@@ -1,4091 +1,4098
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import functools
20 20 import io
21 21 import os
22 22 import struct
23 23 import weakref
24 24 import zlib
25 25
26 from typing import (
27 Optional,
28 Tuple,
29 )
30
26 31 # import stuff from node for others to import from revlog
27 32 from .node import (
28 33 bin,
29 34 hex,
30 35 nullrev,
31 36 sha1nodeconstants,
32 37 short,
33 38 wdirrev,
34 39 )
35 40 from .i18n import _
36 41 from .revlogutils.constants import (
37 42 ALL_KINDS,
38 43 CHANGELOGV2,
39 44 COMP_MODE_DEFAULT,
40 45 COMP_MODE_INLINE,
41 46 COMP_MODE_PLAIN,
42 47 DELTA_BASE_REUSE_NO,
43 48 DELTA_BASE_REUSE_TRY,
44 49 ENTRY_RANK,
45 50 FEATURES_BY_VERSION,
46 51 FLAG_GENERALDELTA,
47 52 FLAG_INLINE_DATA,
48 53 INDEX_HEADER,
49 54 KIND_CHANGELOG,
50 55 KIND_FILELOG,
51 56 RANK_UNKNOWN,
52 57 REVLOGV0,
53 58 REVLOGV1,
54 59 REVLOGV1_FLAGS,
55 60 REVLOGV2,
56 61 REVLOGV2_FLAGS,
57 62 REVLOG_DEFAULT_FLAGS,
58 63 REVLOG_DEFAULT_FORMAT,
59 64 REVLOG_DEFAULT_VERSION,
60 65 SUPPORTED_FLAGS,
61 66 )
62 67 from .revlogutils.flagutil import (
63 68 REVIDX_DEFAULT_FLAGS,
64 69 REVIDX_ELLIPSIS,
65 70 REVIDX_EXTSTORED,
66 71 REVIDX_FLAGS_ORDER,
67 72 REVIDX_HASCOPIESINFO,
68 73 REVIDX_ISCENSORED,
69 74 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 75 )
71 76 from .thirdparty import attr
72 77 from . import (
73 78 ancestor,
74 79 dagop,
75 80 error,
76 81 mdiff,
77 82 policy,
78 83 pycompat,
79 84 revlogutils,
80 85 templatefilters,
81 86 util,
82 87 )
83 88 from .interfaces import (
84 89 repository,
85 90 util as interfaceutil,
86 91 )
87 92 from .revlogutils import (
88 93 deltas as deltautil,
89 94 docket as docketutil,
90 95 flagutil,
91 96 nodemap as nodemaputil,
92 97 randomaccessfile,
93 98 revlogv0,
94 99 rewrite,
95 100 sidedata as sidedatautil,
96 101 )
97 102 from .utils import (
98 103 storageutil,
99 104 stringutil,
100 105 )
101 106
102 107 # blanked usage of all the name to prevent pyflakes constraints
103 108 # We need these name available in the module for extensions.
104 109
105 110 REVLOGV0
106 111 REVLOGV1
107 112 REVLOGV2
108 113 CHANGELOGV2
109 114 FLAG_INLINE_DATA
110 115 FLAG_GENERALDELTA
111 116 REVLOG_DEFAULT_FLAGS
112 117 REVLOG_DEFAULT_FORMAT
113 118 REVLOG_DEFAULT_VERSION
114 119 REVLOGV1_FLAGS
115 120 REVLOGV2_FLAGS
116 121 REVIDX_ISCENSORED
117 122 REVIDX_ELLIPSIS
118 123 REVIDX_HASCOPIESINFO
119 124 REVIDX_EXTSTORED
120 125 REVIDX_DEFAULT_FLAGS
121 126 REVIDX_FLAGS_ORDER
122 127 REVIDX_RAWTEXT_CHANGING_FLAGS
123 128
124 129 parsers = policy.importmod('parsers')
125 130 rustancestor = policy.importrust('ancestor')
126 131 rustdagop = policy.importrust('dagop')
127 132 rustrevlog = policy.importrust('revlog')
128 133
129 134 # Aliased for performance.
130 135 _zlibdecompress = zlib.decompress
131 136
132 137 # max size of inline data embedded into a revlog
133 138 _maxinline = 131072
134 139
135 140
136 141 # Flag processors for REVIDX_ELLIPSIS.
137 142 def ellipsisreadprocessor(rl, text):
138 143 return text, False
139 144
140 145
141 146 def ellipsiswriteprocessor(rl, text):
142 147 return text, False
143 148
144 149
145 150 def ellipsisrawprocessor(rl, text):
146 151 return False
147 152
148 153
149 154 ellipsisprocessor = (
150 155 ellipsisreadprocessor,
151 156 ellipsiswriteprocessor,
152 157 ellipsisrawprocessor,
153 158 )
154 159
155 160
156 161 def _verify_revision(rl, skipflags, state, node):
157 162 """Verify the integrity of the given revlog ``node`` while providing a hook
158 163 point for extensions to influence the operation."""
159 164 if skipflags:
160 165 state[b'skipread'].add(node)
161 166 else:
162 167 # Side-effect: read content and verify hash.
163 168 rl.revision(node)
164 169
165 170
166 171 # True if a fast implementation for persistent-nodemap is available
167 172 #
168 173 # We also consider we have a "fast" implementation in "pure" python because
169 174 # people using pure don't really have performance consideration (and a
170 175 # wheelbarrow of other slowness source)
171 176 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
172 177 parsers, 'BaseIndexObject'
173 178 )
174 179
175 180
176 181 @interfaceutil.implementer(repository.irevisiondelta)
177 182 @attr.s(slots=True)
178 183 class revlogrevisiondelta:
179 184 node = attr.ib()
180 185 p1node = attr.ib()
181 186 p2node = attr.ib()
182 187 basenode = attr.ib()
183 188 flags = attr.ib()
184 189 baserevisionsize = attr.ib()
185 190 revision = attr.ib()
186 191 delta = attr.ib()
187 192 sidedata = attr.ib()
188 193 protocol_flags = attr.ib()
189 194 linknode = attr.ib(default=None)
190 195
191 196
192 197 @interfaceutil.implementer(repository.iverifyproblem)
193 198 @attr.s(frozen=True)
194 199 class revlogproblem:
195 200 warning = attr.ib(default=None)
196 201 error = attr.ib(default=None)
197 202 node = attr.ib(default=None)
198 203
199 204
200 205 def parse_index_v1(data, inline):
201 206 # call the C implementation to parse the index data
202 207 index, cache = parsers.parse_index2(data, inline)
203 208 return index, cache
204 209
205 210
206 211 def parse_index_v2(data, inline):
207 212 # call the C implementation to parse the index data
208 213 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
209 214 return index, cache
210 215
211 216
212 217 def parse_index_cl_v2(data, inline):
213 218 # call the C implementation to parse the index data
214 219 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
215 220 return index, cache
216 221
217 222
218 223 if hasattr(parsers, 'parse_index_devel_nodemap'):
219 224
220 225 def parse_index_v1_nodemap(data, inline):
221 226 index, cache = parsers.parse_index_devel_nodemap(data, inline)
222 227 return index, cache
223 228
224 229
225 230 else:
226 231 parse_index_v1_nodemap = None
227 232
228 233
229 234 def parse_index_v1_rust(data, inline, default_header):
230 235 cache = (0, data) if inline else None
231 236 return rustrevlog.Index(data, default_header), cache
232 237
233 238
234 239 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
235 240 # signed integer)
236 241 _maxentrysize = 0x7FFFFFFF
237 242
238 243 FILE_TOO_SHORT_MSG = _(
239 244 b'cannot read from revlog %s;'
240 245 b' expected %d bytes from offset %d, data size is %d'
241 246 )
242 247
243 248 hexdigits = b'0123456789abcdefABCDEF'
244 249
245 250
246 251 class _Config:
247 252 def copy(self):
248 253 return self.__class__(**self.__dict__)
249 254
250 255
251 256 @attr.s()
252 257 class FeatureConfig(_Config):
253 258 """Hold configuration values about the available revlog features"""
254 259
255 260 # the default compression engine
256 261 compression_engine = attr.ib(default=b'zlib')
257 262 # compression engines options
258 263 compression_engine_options = attr.ib(default=attr.Factory(dict))
259 264
260 265 # can we use censor on this revlog
261 266 censorable = attr.ib(default=False)
262 267 # does this revlog use the "side data" feature
263 268 has_side_data = attr.ib(default=False)
264 269 # might remove rank configuration once the computation has no impact
265 270 compute_rank = attr.ib(default=False)
266 271 # parent order is supposed to be semantically irrelevant, so we
267 272 # normally resort parents to ensure that the first parent is non-null,
268 273 # if there is a non-null parent at all.
269 274 # filelog abuses the parent order as flag to mark some instances of
270 275 # meta-encoded files, so allow it to disable this behavior.
271 276 canonical_parent_order = attr.ib(default=False)
272 277 # can ellipsis commit be used
273 278 enable_ellipsis = attr.ib(default=False)
274 279
275 280 def copy(self):
276 281 new = super().copy()
277 282 new.compression_engine_options = self.compression_engine_options.copy()
278 283 return new
279 284
280 285
281 286 @attr.s()
282 287 class DataConfig(_Config):
283 288 """Hold configuration value about how the revlog data are read"""
284 289
285 290 # should we try to open the "pending" version of the revlog
286 291 try_pending = attr.ib(default=False)
287 292 # should we try to open the "splitted" version of the revlog
288 293 try_split = attr.ib(default=False)
289 294 # When True, indexfile should be opened with checkambig=True at writing,
290 295 # to avoid file stat ambiguity.
291 296 check_ambig = attr.ib(default=False)
292 297
293 298 # If true, use mmap instead of reading to deal with large index
294 299 mmap_large_index = attr.ib(default=False)
295 300 # how much data is large
296 301 mmap_index_threshold = attr.ib(default=None)
297 302 # How much data to read and cache into the raw revlog data cache.
298 303 chunk_cache_size = attr.ib(default=65536)
299 304
300 305 # The size of the uncompressed cache compared to the largest revision seen.
301 306 uncompressed_cache_factor = attr.ib(default=None)
302 307
303 308 # The number of chunk cached
304 309 uncompressed_cache_count = attr.ib(default=None)
305 310
306 311 # Allow sparse reading of the revlog data
307 312 with_sparse_read = attr.ib(default=False)
308 313 # minimal density of a sparse read chunk
309 314 sr_density_threshold = attr.ib(default=0.50)
310 315 # minimal size of data we skip when performing sparse read
311 316 sr_min_gap_size = attr.ib(default=262144)
312 317
313 318 # are delta encoded against arbitrary bases.
314 319 generaldelta = attr.ib(default=False)
315 320
316 321
317 322 @attr.s()
318 323 class DeltaConfig(_Config):
319 324 """Hold configuration value about how new delta are computed
320 325
321 326 Some attributes are duplicated from DataConfig to help havign each object
322 327 self contained.
323 328 """
324 329
325 330 # can delta be encoded against arbitrary bases.
326 331 general_delta = attr.ib(default=False)
327 332 # Allow sparse writing of the revlog data
328 333 sparse_revlog = attr.ib(default=False)
329 334 # maximum length of a delta chain
330 335 max_chain_len = attr.ib(default=None)
331 336 # Maximum distance between delta chain base start and end
332 337 max_deltachain_span = attr.ib(default=-1)
333 338 # If `upper_bound_comp` is not None, this is the expected maximal gain from
334 339 # compression for the data content.
335 340 upper_bound_comp = attr.ib(default=None)
336 341 # Should we try a delta against both parent
337 342 delta_both_parents = attr.ib(default=True)
338 343 # Test delta base candidate group by chunk of this maximal size.
339 344 candidate_group_chunk_size = attr.ib(default=0)
340 345 # Should we display debug information about delta computation
341 346 debug_delta = attr.ib(default=False)
342 347 # trust incoming delta by default
343 348 lazy_delta = attr.ib(default=True)
344 349 # trust the base of incoming delta by default
345 350 lazy_delta_base = attr.ib(default=False)
346 351
347 352
348 353 class _InnerRevlog:
349 354 """An inner layer of the revlog object
350 355
351 356 That layer exist to be able to delegate some operation to Rust, its
352 357 boundaries are arbitrary and based on what we can delegate to Rust.
353 358 """
354 359
355 360 def __init__(
356 361 self,
357 362 opener,
358 363 index,
359 364 index_file,
360 365 data_file,
361 366 sidedata_file,
362 367 inline,
363 368 data_config,
364 369 delta_config,
365 370 feature_config,
366 371 chunk_cache,
367 372 default_compression_header,
368 373 ):
369 374 self.opener = opener
370 375 self.index = index
371 376
372 377 self.index_file = index_file
373 378 self.data_file = data_file
374 379 self.sidedata_file = sidedata_file
375 380 self.inline = inline
376 381 self.data_config = data_config
377 382 self.delta_config = delta_config
378 383 self.feature_config = feature_config
379 384
380 385 # used during diverted write.
381 386 self._orig_index_file = None
382 387
383 388 self._default_compression_header = default_compression_header
384 389
385 390 # index
386 391
387 392 # 3-tuple of file handles being used for active writing.
388 393 self._writinghandles = None
389 394
390 395 self._segmentfile = randomaccessfile.randomaccessfile(
391 396 self.opener,
392 397 (self.index_file if self.inline else self.data_file),
393 398 self.data_config.chunk_cache_size,
394 399 chunk_cache,
395 400 )
396 401 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
397 402 self.opener,
398 403 self.sidedata_file,
399 404 self.data_config.chunk_cache_size,
400 405 )
401 406
402 407 # revlog header -> revlog compressor
403 408 self._decompressors = {}
404 409 # 3-tuple of (node, rev, text) for a raw revision.
405 410 self._revisioncache = None
406 411
407 412 # cache some uncompressed chunks
408 413 # rev β†’ uncompressed_chunk
409 414 #
410 415 # the max cost is dynamically updated to be proportionnal to the
411 416 # size of revision we actually encounter.
412 417 self._uncompressed_chunk_cache = None
413 418 if self.data_config.uncompressed_cache_factor is not None:
414 419 self._uncompressed_chunk_cache = util.lrucachedict(
415 420 self.data_config.uncompressed_cache_count,
416 421 maxcost=65536, # some arbitrary initial value
417 422 )
418 423
419 424 self._delay_buffer = None
420 425
421 426 def __len__(self):
422 427 return len(self.index)
423 428
424 429 def clear_cache(self):
425 430 assert not self.is_delaying
426 431 self._revisioncache = None
427 432 if self._uncompressed_chunk_cache is not None:
428 433 self._uncompressed_chunk_cache.clear()
429 434 self._segmentfile.clear_cache()
430 435 self._segmentfile_sidedata.clear_cache()
431 436
432 437 @property
433 438 def canonical_index_file(self):
434 439 if self._orig_index_file is not None:
435 440 return self._orig_index_file
436 441 return self.index_file
437 442
438 443 @property
439 444 def is_delaying(self):
440 445 """is the revlog is currently delaying the visibility of written data?
441 446
442 447 The delaying mechanism can be either in-memory or written on disk in a
443 448 side-file."""
444 449 return (self._delay_buffer is not None) or (
445 450 self._orig_index_file is not None
446 451 )
447 452
448 453 # Derived from index values.
449 454
450 455 def start(self, rev):
451 456 """the offset of the data chunk for this revision"""
452 457 return int(self.index[rev][0] >> 16)
453 458
454 459 def length(self, rev):
455 460 """the length of the data chunk for this revision"""
456 461 return self.index[rev][1]
457 462
458 463 def end(self, rev):
459 464 """the end of the data chunk for this revision"""
460 465 return self.start(rev) + self.length(rev)
461 466
462 467 def deltaparent(self, rev):
463 468 """return deltaparent of the given revision"""
464 469 base = self.index[rev][3]
465 470 if base == rev:
466 471 return nullrev
467 472 elif self.delta_config.general_delta:
468 473 return base
469 474 else:
470 475 return rev - 1
471 476
472 477 def issnapshot(self, rev):
473 478 """tells whether rev is a snapshot"""
474 479 if not self.delta_config.sparse_revlog:
475 480 return self.deltaparent(rev) == nullrev
476 481 elif hasattr(self.index, 'issnapshot'):
477 482 # directly assign the method to cache the testing and access
478 483 self.issnapshot = self.index.issnapshot
479 484 return self.issnapshot(rev)
480 485 if rev == nullrev:
481 486 return True
482 487 entry = self.index[rev]
483 488 base = entry[3]
484 489 if base == rev:
485 490 return True
486 491 if base == nullrev:
487 492 return True
488 493 p1 = entry[5]
489 494 while self.length(p1) == 0:
490 495 b = self.deltaparent(p1)
491 496 if b == p1:
492 497 break
493 498 p1 = b
494 499 p2 = entry[6]
495 500 while self.length(p2) == 0:
496 501 b = self.deltaparent(p2)
497 502 if b == p2:
498 503 break
499 504 p2 = b
500 505 if base == p1 or base == p2:
501 506 return False
502 507 return self.issnapshot(base)
503 508
504 509 def _deltachain(self, rev, stoprev=None):
505 510 """Obtain the delta chain for a revision.
506 511
507 512 ``stoprev`` specifies a revision to stop at. If not specified, we
508 513 stop at the base of the chain.
509 514
510 515 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
511 516 revs in ascending order and ``stopped`` is a bool indicating whether
512 517 ``stoprev`` was hit.
513 518 """
514 519 generaldelta = self.delta_config.general_delta
515 520 # Try C implementation.
516 521 try:
517 522 return self.index.deltachain(rev, stoprev, generaldelta)
518 523 except AttributeError:
519 524 pass
520 525
521 526 chain = []
522 527
523 528 # Alias to prevent attribute lookup in tight loop.
524 529 index = self.index
525 530
526 531 iterrev = rev
527 532 e = index[iterrev]
528 533 while iterrev != e[3] and iterrev != stoprev:
529 534 chain.append(iterrev)
530 535 if generaldelta:
531 536 iterrev = e[3]
532 537 else:
533 538 iterrev -= 1
534 539 e = index[iterrev]
535 540
536 541 if iterrev == stoprev:
537 542 stopped = True
538 543 else:
539 544 chain.append(iterrev)
540 545 stopped = False
541 546
542 547 chain.reverse()
543 548 return chain, stopped
544 549
545 550 @util.propertycache
546 551 def _compressor(self):
547 552 engine = util.compengines[self.feature_config.compression_engine]
548 553 return engine.revlogcompressor(
549 554 self.feature_config.compression_engine_options
550 555 )
551 556
552 557 @util.propertycache
553 558 def _decompressor(self):
554 559 """the default decompressor"""
555 560 if self._default_compression_header is None:
556 561 return None
557 562 t = self._default_compression_header
558 563 c = self._get_decompressor(t)
559 564 return c.decompress
560 565
561 def _get_decompressor(self, t):
566 def _get_decompressor(self, t: bytes):
562 567 try:
563 568 compressor = self._decompressors[t]
564 569 except KeyError:
565 570 try:
566 571 engine = util.compengines.forrevlogheader(t)
567 572 compressor = engine.revlogcompressor(
568 573 self.feature_config.compression_engine_options
569 574 )
570 575 self._decompressors[t] = compressor
571 576 except KeyError:
572 577 raise error.RevlogError(
573 578 _(b'unknown compression type %s') % binascii.hexlify(t)
574 579 )
575 580 return compressor
576 581
577 def compress(self, data):
582 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
578 583 """Generate a possibly-compressed representation of data."""
579 584 if not data:
580 585 return b'', data
581 586
582 587 compressed = self._compressor.compress(data)
583 588
584 589 if compressed:
585 590 # The revlog compressor added the header in the returned data.
586 591 return b'', compressed
587 592
588 593 if data[0:1] == b'\0':
589 594 return b'', data
590 595 return b'u', data
591 596
592 def decompress(self, data):
597 def decompress(self, data: bytes):
593 598 """Decompress a revlog chunk.
594 599
595 600 The chunk is expected to begin with a header identifying the
596 601 format type so it can be routed to an appropriate decompressor.
597 602 """
598 603 if not data:
599 604 return data
600 605
601 606 # Revlogs are read much more frequently than they are written and many
602 607 # chunks only take microseconds to decompress, so performance is
603 608 # important here.
604 609 #
605 610 # We can make a few assumptions about revlogs:
606 611 #
607 612 # 1) the majority of chunks will be compressed (as opposed to inline
608 613 # raw data).
609 614 # 2) decompressing *any* data will likely by at least 10x slower than
610 615 # returning raw inline data.
611 616 # 3) we want to prioritize common and officially supported compression
612 617 # engines
613 618 #
614 619 # It follows that we want to optimize for "decompress compressed data
615 620 # when encoded with common and officially supported compression engines"
616 621 # case over "raw data" and "data encoded by less common or non-official
617 622 # compression engines." That is why we have the inline lookup first
618 623 # followed by the compengines lookup.
619 624 #
620 625 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
621 626 # compressed chunks. And this matters for changelog and manifest reads.
622 627 t = data[0:1]
623 628
624 629 if t == b'x':
625 630 try:
626 631 return _zlibdecompress(data)
627 632 except zlib.error as e:
628 633 raise error.RevlogError(
629 634 _(b'revlog decompress error: %s')
630 635 % stringutil.forcebytestr(e)
631 636 )
632 637 # '\0' is more common than 'u' so it goes first.
633 638 elif t == b'\0':
634 639 return data
635 640 elif t == b'u':
636 641 return util.buffer(data, 1)
637 642
638 643 compressor = self._get_decompressor(t)
639 644
640 645 return compressor.decompress(data)
641 646
642 647 @contextlib.contextmanager
643 648 def reading(self):
644 649 """Context manager that keeps data and sidedata files open for reading"""
645 650 if len(self.index) == 0:
646 651 yield # nothing to be read
647 652 elif self._delay_buffer is not None and self.inline:
648 653 msg = "revlog with delayed write should not be inline"
649 654 raise error.ProgrammingError(msg)
650 655 else:
651 656 with self._segmentfile.reading():
652 657 with self._segmentfile_sidedata.reading():
653 658 yield
654 659
655 660 @property
656 661 def is_writing(self):
657 662 """True is a writing context is open"""
658 663 return self._writinghandles is not None
659 664
660 665 @property
661 666 def is_open(self):
662 667 """True if any file handle is being held
663 668
664 669 Used for assert and debug in the python code"""
665 670 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
666 671
667 672 @contextlib.contextmanager
668 673 def writing(self, transaction, data_end=None, sidedata_end=None):
669 674 """Open the revlog files for writing
670 675
671 676 Add content to a revlog should be done within such context.
672 677 """
673 678 if self.is_writing:
674 679 yield
675 680 else:
676 681 ifh = dfh = sdfh = None
677 682 try:
678 683 r = len(self.index)
679 684 # opening the data file.
680 685 dsize = 0
681 686 if r:
682 687 dsize = self.end(r - 1)
683 688 dfh = None
684 689 if not self.inline:
685 690 try:
686 691 dfh = self.opener(self.data_file, mode=b"r+")
687 692 if data_end is None:
688 693 dfh.seek(0, os.SEEK_END)
689 694 else:
690 695 dfh.seek(data_end, os.SEEK_SET)
691 696 except FileNotFoundError:
692 697 dfh = self.opener(self.data_file, mode=b"w+")
693 698 transaction.add(self.data_file, dsize)
694 699 if self.sidedata_file is not None:
695 700 assert sidedata_end is not None
696 701 # revlog-v2 does not inline, help Pytype
697 702 assert dfh is not None
698 703 try:
699 704 sdfh = self.opener(self.sidedata_file, mode=b"r+")
700 705 dfh.seek(sidedata_end, os.SEEK_SET)
701 706 except FileNotFoundError:
702 707 sdfh = self.opener(self.sidedata_file, mode=b"w+")
703 708 transaction.add(self.sidedata_file, sidedata_end)
704 709
705 710 # opening the index file.
706 711 isize = r * self.index.entry_size
707 712 ifh = self.__index_write_fp()
708 713 if self.inline:
709 714 transaction.add(self.index_file, dsize + isize)
710 715 else:
711 716 transaction.add(self.index_file, isize)
712 717 # exposing all file handle for writing.
713 718 self._writinghandles = (ifh, dfh, sdfh)
714 719 self._segmentfile.writing_handle = ifh if self.inline else dfh
715 720 self._segmentfile_sidedata.writing_handle = sdfh
716 721 yield
717 722 finally:
718 723 self._writinghandles = None
719 724 self._segmentfile.writing_handle = None
720 725 self._segmentfile_sidedata.writing_handle = None
721 726 if dfh is not None:
722 727 dfh.close()
723 728 if sdfh is not None:
724 729 sdfh.close()
725 730 # closing the index file last to avoid exposing referent to
726 731 # potential unflushed data content.
727 732 if ifh is not None:
728 733 ifh.close()
729 734
730 735 def __index_write_fp(self, index_end=None):
731 736 """internal method to open the index file for writing
732 737
733 738 You should not use this directly and use `_writing` instead
734 739 """
735 740 try:
736 741 if self._delay_buffer is None:
737 742 f = self.opener(
738 743 self.index_file,
739 744 mode=b"r+",
740 745 checkambig=self.data_config.check_ambig,
741 746 )
742 747 else:
743 748 # check_ambig affect we way we open file for writing, however
744 749 # here, we do not actually open a file for writting as write
745 750 # will appened to a delay_buffer. So check_ambig is not
746 751 # meaningful and unneeded here.
747 752 f = randomaccessfile.appender(
748 753 self.opener, self.index_file, b"r+", self._delay_buffer
749 754 )
750 755 if index_end is None:
751 756 f.seek(0, os.SEEK_END)
752 757 else:
753 758 f.seek(index_end, os.SEEK_SET)
754 759 return f
755 760 except FileNotFoundError:
756 761 if self._delay_buffer is None:
757 762 return self.opener(
758 763 self.index_file,
759 764 mode=b"w+",
760 765 checkambig=self.data_config.check_ambig,
761 766 )
762 767 else:
763 768 return randomaccessfile.appender(
764 769 self.opener, self.index_file, b"w+", self._delay_buffer
765 770 )
766 771
767 772 def __index_new_fp(self):
768 773 """internal method to create a new index file for writing
769 774
770 775 You should not use this unless you are upgrading from inline revlog
771 776 """
772 777 return self.opener(
773 778 self.index_file,
774 779 mode=b"w",
775 780 checkambig=self.data_config.check_ambig,
776 781 )
777 782
778 783 def split_inline(self, tr, header, new_index_file_path=None):
779 784 """split the data of an inline revlog into an index and a data file"""
780 785 assert self._delay_buffer is None
781 786 existing_handles = False
782 787 if self._writinghandles is not None:
783 788 existing_handles = True
784 789 fp = self._writinghandles[0]
785 790 fp.flush()
786 791 fp.close()
787 792 # We can't use the cached file handle after close(). So prevent
788 793 # its usage.
789 794 self._writinghandles = None
790 795 self._segmentfile.writing_handle = None
791 796 # No need to deal with sidedata writing handle as it is only
792 797 # relevant with revlog-v2 which is never inline, not reaching
793 798 # this code
794 799
795 800 new_dfh = self.opener(self.data_file, mode=b"w+")
796 801 new_dfh.truncate(0) # drop any potentially existing data
797 802 try:
798 803 with self.reading():
799 804 for r in range(len(self.index)):
800 805 new_dfh.write(self.get_segment_for_revs(r, r)[1])
801 806 new_dfh.flush()
802 807
803 808 if new_index_file_path is not None:
804 809 self.index_file = new_index_file_path
805 810 with self.__index_new_fp() as fp:
806 811 self.inline = False
807 812 for i in range(len(self.index)):
808 813 e = self.index.entry_binary(i)
809 814 if i == 0:
810 815 packed_header = self.index.pack_header(header)
811 816 e = packed_header + e
812 817 fp.write(e)
813 818
814 819 # If we don't use side-write, the temp file replace the real
815 820 # index when we exit the context manager
816 821
817 822 self._segmentfile = randomaccessfile.randomaccessfile(
818 823 self.opener,
819 824 self.data_file,
820 825 self.data_config.chunk_cache_size,
821 826 )
822 827
823 828 if existing_handles:
824 829 # switched from inline to conventional reopen the index
825 830 ifh = self.__index_write_fp()
826 831 self._writinghandles = (ifh, new_dfh, None)
827 832 self._segmentfile.writing_handle = new_dfh
828 833 new_dfh = None
829 834 # No need to deal with sidedata writing handle as it is only
830 835 # relevant with revlog-v2 which is never inline, not reaching
831 836 # this code
832 837 finally:
833 838 if new_dfh is not None:
834 839 new_dfh.close()
835 840 return self.index_file
836 841
837 842 def get_segment_for_revs(self, startrev, endrev):
838 843 """Obtain a segment of raw data corresponding to a range of revisions.
839 844
840 845 Accepts the start and end revisions and an optional already-open
841 846 file handle to be used for reading. If the file handle is read, its
842 847 seek position will not be preserved.
843 848
844 849 Requests for data may be satisfied by a cache.
845 850
846 851 Returns a 2-tuple of (offset, data) for the requested range of
847 852 revisions. Offset is the integer offset from the beginning of the
848 853 revlog and data is a str or buffer of the raw byte data.
849 854
850 855 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
851 856 to determine where each revision's data begins and ends.
852 857
853 858 API: we should consider making this a private part of the InnerRevlog
854 859 at some point.
855 860 """
856 861 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
857 862 # (functions are expensive).
858 863 index = self.index
859 864 istart = index[startrev]
860 865 start = int(istart[0] >> 16)
861 866 if startrev == endrev:
862 867 end = start + istart[1]
863 868 else:
864 869 iend = index[endrev]
865 870 end = int(iend[0] >> 16) + iend[1]
866 871
867 872 if self.inline:
868 873 start += (startrev + 1) * self.index.entry_size
869 874 end += (endrev + 1) * self.index.entry_size
870 875 length = end - start
871 876
872 877 return start, self._segmentfile.read_chunk(start, length)
873 878
874 879 def _chunk(self, rev):
875 880 """Obtain a single decompressed chunk for a revision.
876 881
877 882 Accepts an integer revision and an optional already-open file handle
878 883 to be used for reading. If used, the seek position of the file will not
879 884 be preserved.
880 885
881 886 Returns a str holding uncompressed data for the requested revision.
882 887 """
883 888 if self._uncompressed_chunk_cache is not None:
884 889 uncomp = self._uncompressed_chunk_cache.get(rev)
885 890 if uncomp is not None:
886 891 return uncomp
887 892
888 893 compression_mode = self.index[rev][10]
889 894 data = self.get_segment_for_revs(rev, rev)[1]
890 895 if compression_mode == COMP_MODE_PLAIN:
891 896 uncomp = data
892 897 elif compression_mode == COMP_MODE_DEFAULT:
893 898 uncomp = self._decompressor(data)
894 899 elif compression_mode == COMP_MODE_INLINE:
895 900 uncomp = self.decompress(data)
896 901 else:
897 902 msg = b'unknown compression mode %d'
898 903 msg %= compression_mode
899 904 raise error.RevlogError(msg)
900 905 if self._uncompressed_chunk_cache is not None:
901 906 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
902 907 return uncomp
903 908
904 909 def _chunks(self, revs, targetsize=None):
905 910 """Obtain decompressed chunks for the specified revisions.
906 911
907 912 Accepts an iterable of numeric revisions that are assumed to be in
908 913 ascending order. Also accepts an optional already-open file handle
909 914 to be used for reading. If used, the seek position of the file will
910 915 not be preserved.
911 916
912 917 This function is similar to calling ``self._chunk()`` multiple times,
913 918 but is faster.
914 919
915 920 Returns a list with decompressed data for each requested revision.
916 921 """
917 922 if not revs:
918 923 return []
919 924 start = self.start
920 925 length = self.length
921 926 inline = self.inline
922 927 iosize = self.index.entry_size
923 928 buffer = util.buffer
924 929
925 930 fetched_revs = []
926 931 fadd = fetched_revs.append
927 932
928 933 chunks = []
929 934 ladd = chunks.append
930 935
931 936 if self._uncompressed_chunk_cache is None:
932 937 fetched_revs = revs
933 938 else:
934 939 for rev in revs:
935 940 cached_value = self._uncompressed_chunk_cache.get(rev)
936 941 if cached_value is None:
937 942 fadd(rev)
938 943 else:
939 944 ladd((rev, cached_value))
940 945
941 946 if not fetched_revs:
942 947 slicedchunks = ()
943 948 elif not self.data_config.with_sparse_read:
944 949 slicedchunks = (fetched_revs,)
945 950 else:
946 951 slicedchunks = deltautil.slicechunk(
947 952 self,
948 953 fetched_revs,
949 954 targetsize=targetsize,
950 955 )
951 956
952 957 for revschunk in slicedchunks:
953 958 firstrev = revschunk[0]
954 959 # Skip trailing revisions with empty diff
955 960 for lastrev in revschunk[::-1]:
956 961 if length(lastrev) != 0:
957 962 break
958 963
959 964 try:
960 965 offset, data = self.get_segment_for_revs(firstrev, lastrev)
961 966 except OverflowError:
962 967 # issue4215 - we can't cache a run of chunks greater than
963 968 # 2G on Windows
964 969 for rev in revschunk:
965 970 ladd((rev, self._chunk(rev)))
966 971
967 972 decomp = self.decompress
968 973 # self._decompressor might be None, but will not be used in that case
969 974 def_decomp = self._decompressor
970 975 for rev in revschunk:
971 976 chunkstart = start(rev)
972 977 if inline:
973 978 chunkstart += (rev + 1) * iosize
974 979 chunklength = length(rev)
975 980 comp_mode = self.index[rev][10]
976 981 c = buffer(data, chunkstart - offset, chunklength)
977 982 if comp_mode == COMP_MODE_PLAIN:
978 983 c = c
979 984 elif comp_mode == COMP_MODE_INLINE:
980 985 c = decomp(c)
981 986 elif comp_mode == COMP_MODE_DEFAULT:
982 987 c = def_decomp(c)
983 988 else:
984 989 msg = b'unknown compression mode %d'
985 990 msg %= comp_mode
986 991 raise error.RevlogError(msg)
987 992 ladd((rev, c))
988 993 if self._uncompressed_chunk_cache is not None:
989 994 self._uncompressed_chunk_cache.insert(rev, c, len(c))
990 995
991 996 chunks.sort()
992 997 return [x[1] for x in chunks]
993 998
994 999 def raw_text(self, node, rev):
995 1000 """return the possibly unvalidated rawtext for a revision
996 1001
997 1002 returns (rev, rawtext, validated)
998 1003 """
999 1004
1000 1005 # revision in the cache (could be useful to apply delta)
1001 1006 cachedrev = None
1002 1007 # An intermediate text to apply deltas to
1003 1008 basetext = None
1004 1009
1005 1010 # Check if we have the entry in cache
1006 1011 # The cache entry looks like (node, rev, rawtext)
1007 1012 if self._revisioncache:
1008 1013 cachedrev = self._revisioncache[1]
1009 1014
1010 1015 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1011 1016 if stopped:
1012 1017 basetext = self._revisioncache[2]
1013 1018
1014 1019 # drop cache to save memory, the caller is expected to
1015 1020 # update self._inner._revisioncache after validating the text
1016 1021 self._revisioncache = None
1017 1022
1018 1023 targetsize = None
1019 1024 rawsize = self.index[rev][2]
1020 1025 if 0 <= rawsize:
1021 1026 targetsize = 4 * rawsize
1022 1027
1023 1028 if self._uncompressed_chunk_cache is not None:
1024 1029 # dynamically update the uncompressed_chunk_cache size to the
1025 1030 # largest revision we saw in this revlog.
1026 1031 factor = self.data_config.uncompressed_cache_factor
1027 1032 candidate_size = rawsize * factor
1028 1033 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1029 1034 self._uncompressed_chunk_cache.maxcost = candidate_size
1030 1035
1031 1036 bins = self._chunks(chain, targetsize=targetsize)
1032 1037 if basetext is None:
1033 1038 basetext = bytes(bins[0])
1034 1039 bins = bins[1:]
1035 1040
1036 1041 rawtext = mdiff.patches(basetext, bins)
1037 1042 del basetext # let us have a chance to free memory early
1038 1043 return (rev, rawtext, False)
1039 1044
1040 1045 def sidedata(self, rev, sidedata_end):
1041 1046 """Return the sidedata for a given revision number."""
1042 1047 index_entry = self.index[rev]
1043 1048 sidedata_offset = index_entry[8]
1044 1049 sidedata_size = index_entry[9]
1045 1050
1046 1051 if self.inline:
1047 1052 sidedata_offset += self.index.entry_size * (1 + rev)
1048 1053 if sidedata_size == 0:
1049 1054 return {}
1050 1055
1051 1056 if sidedata_end < sidedata_offset + sidedata_size:
1052 1057 filename = self.sidedata_file
1053 1058 end = sidedata_end
1054 1059 offset = sidedata_offset
1055 1060 length = sidedata_size
1056 1061 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1057 1062 raise error.RevlogError(m)
1058 1063
1059 1064 comp_segment = self._segmentfile_sidedata.read_chunk(
1060 1065 sidedata_offset, sidedata_size
1061 1066 )
1062 1067
1063 1068 comp = self.index[rev][11]
1064 1069 if comp == COMP_MODE_PLAIN:
1065 1070 segment = comp_segment
1066 1071 elif comp == COMP_MODE_DEFAULT:
1067 1072 segment = self._decompressor(comp_segment)
1068 1073 elif comp == COMP_MODE_INLINE:
1069 1074 segment = self.decompress(comp_segment)
1070 1075 else:
1071 1076 msg = b'unknown compression mode %d'
1072 1077 msg %= comp
1073 1078 raise error.RevlogError(msg)
1074 1079
1075 1080 sidedata = sidedatautil.deserialize_sidedata(segment)
1076 1081 return sidedata
1077 1082
1078 1083 def write_entry(
1079 1084 self,
1080 1085 transaction,
1081 1086 entry,
1082 1087 data,
1083 1088 link,
1084 1089 offset,
1085 1090 sidedata,
1086 1091 sidedata_offset,
1087 1092 index_end,
1088 1093 data_end,
1089 1094 sidedata_end,
1090 1095 ):
1091 1096 # Files opened in a+ mode have inconsistent behavior on various
1092 1097 # platforms. Windows requires that a file positioning call be made
1093 1098 # when the file handle transitions between reads and writes. See
1094 1099 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1095 1100 # platforms, Python or the platform itself can be buggy. Some versions
1096 1101 # of Solaris have been observed to not append at the end of the file
1097 1102 # if the file was seeked to before the end. See issue4943 for more.
1098 1103 #
1099 1104 # We work around this issue by inserting a seek() before writing.
1100 1105 # Note: This is likely not necessary on Python 3. However, because
1101 1106 # the file handle is reused for reads and may be seeked there, we need
1102 1107 # to be careful before changing this.
1103 1108 if self._writinghandles is None:
1104 1109 msg = b'adding revision outside `revlog._writing` context'
1105 1110 raise error.ProgrammingError(msg)
1106 1111 ifh, dfh, sdfh = self._writinghandles
1107 1112 if index_end is None:
1108 1113 ifh.seek(0, os.SEEK_END)
1109 1114 else:
1110 1115 ifh.seek(index_end, os.SEEK_SET)
1111 1116 if dfh:
1112 1117 if data_end is None:
1113 1118 dfh.seek(0, os.SEEK_END)
1114 1119 else:
1115 1120 dfh.seek(data_end, os.SEEK_SET)
1116 1121 if sdfh:
1117 1122 sdfh.seek(sidedata_end, os.SEEK_SET)
1118 1123
1119 1124 curr = len(self.index) - 1
1120 1125 if not self.inline:
1121 1126 transaction.add(self.data_file, offset)
1122 1127 if self.sidedata_file:
1123 1128 transaction.add(self.sidedata_file, sidedata_offset)
1124 1129 transaction.add(self.canonical_index_file, curr * len(entry))
1125 1130 if data[0]:
1126 1131 dfh.write(data[0])
1127 1132 dfh.write(data[1])
1128 1133 if sidedata:
1129 1134 sdfh.write(sidedata)
1130 1135 if self._delay_buffer is None:
1131 1136 ifh.write(entry)
1132 1137 else:
1133 1138 self._delay_buffer.append(entry)
1134 1139 elif self._delay_buffer is not None:
1135 1140 msg = b'invalid delayed write on inline revlog'
1136 1141 raise error.ProgrammingError(msg)
1137 1142 else:
1138 1143 offset += curr * self.index.entry_size
1139 1144 transaction.add(self.canonical_index_file, offset)
1140 1145 assert not sidedata
1141 1146 ifh.write(entry)
1142 1147 ifh.write(data[0])
1143 1148 ifh.write(data[1])
1144 1149 return (
1145 1150 ifh.tell(),
1146 1151 dfh.tell() if dfh else None,
1147 1152 sdfh.tell() if sdfh else None,
1148 1153 )
1149 1154
1150 1155 def _divert_index(self):
1151 1156 index_file = self.index_file
1152 1157 # when we encounter a legacy inline-changelog, split it. However it is
1153 1158 # important to use the expected filename for pending content
1154 1159 # (<radix>.a) otherwise hooks won't be seeing the content of the
1155 1160 # pending transaction.
1156 1161 if index_file.endswith(b'.s'):
1157 1162 index_file = self.index_file[:-2]
1158 1163 return index_file + b'.a'
1159 1164
1160 1165 def delay(self):
1161 1166 assert not self.is_open
1162 1167 if self.inline:
1163 1168 msg = "revlog with delayed write should not be inline"
1164 1169 raise error.ProgrammingError(msg)
1165 1170 if self._delay_buffer is not None or self._orig_index_file is not None:
1166 1171 # delay or divert already in place
1167 1172 return None
1168 1173 elif len(self.index) == 0:
1169 1174 self._orig_index_file = self.index_file
1170 1175 self.index_file = self._divert_index()
1171 1176 assert self._orig_index_file is not None
1172 1177 assert self.index_file is not None
1173 1178 if self.opener.exists(self.index_file):
1174 1179 self.opener.unlink(self.index_file)
1175 1180 return self.index_file
1176 1181 else:
1177 1182 self._delay_buffer = []
1178 1183 return None
1179 1184
1180 1185 def write_pending(self):
1181 1186 assert not self.is_open
1182 1187 if self.inline:
1183 1188 msg = "revlog with delayed write should not be inline"
1184 1189 raise error.ProgrammingError(msg)
1185 1190 if self._orig_index_file is not None:
1186 1191 return None, True
1187 1192 any_pending = False
1188 1193 pending_index_file = self._divert_index()
1189 1194 if self.opener.exists(pending_index_file):
1190 1195 self.opener.unlink(pending_index_file)
1191 1196 util.copyfile(
1192 1197 self.opener.join(self.index_file),
1193 1198 self.opener.join(pending_index_file),
1194 1199 )
1195 1200 if self._delay_buffer:
1196 1201 with self.opener(pending_index_file, b'r+') as ifh:
1197 1202 ifh.seek(0, os.SEEK_END)
1198 1203 ifh.write(b"".join(self._delay_buffer))
1199 1204 any_pending = True
1200 1205 self._delay_buffer = None
1201 1206 self._orig_index_file = self.index_file
1202 1207 self.index_file = pending_index_file
1203 1208 return self.index_file, any_pending
1204 1209
1205 1210 def finalize_pending(self):
1206 1211 assert not self.is_open
1207 1212 if self.inline:
1208 1213 msg = "revlog with delayed write should not be inline"
1209 1214 raise error.ProgrammingError(msg)
1210 1215
1211 1216 delay = self._delay_buffer is not None
1212 1217 divert = self._orig_index_file is not None
1213 1218
1214 1219 if delay and divert:
1215 1220 assert False, "unreachable"
1216 1221 elif delay:
1217 1222 if self._delay_buffer:
1218 1223 with self.opener(self.index_file, b'r+') as ifh:
1219 1224 ifh.seek(0, os.SEEK_END)
1220 1225 ifh.write(b"".join(self._delay_buffer))
1221 1226 self._delay_buffer = None
1222 1227 elif divert:
1223 1228 if self.opener.exists(self.index_file):
1224 1229 self.opener.rename(
1225 1230 self.index_file,
1226 1231 self._orig_index_file,
1227 1232 checkambig=True,
1228 1233 )
1229 1234 self.index_file = self._orig_index_file
1230 1235 self._orig_index_file = None
1231 1236 else:
1232 1237 msg = b"not delay or divert found on this revlog"
1233 1238 raise error.ProgrammingError(msg)
1234 1239 return self.canonical_index_file
1235 1240
1236 1241
1237 1242 class revlog:
1238 1243 """
1239 1244 the underlying revision storage object
1240 1245
1241 1246 A revlog consists of two parts, an index and the revision data.
1242 1247
1243 1248 The index is a file with a fixed record size containing
1244 1249 information on each revision, including its nodeid (hash), the
1245 1250 nodeids of its parents, the position and offset of its data within
1246 1251 the data file, and the revision it's based on. Finally, each entry
1247 1252 contains a linkrev entry that can serve as a pointer to external
1248 1253 data.
1249 1254
1250 1255 The revision data itself is a linear collection of data chunks.
1251 1256 Each chunk represents a revision and is usually represented as a
1252 1257 delta against the previous chunk. To bound lookup time, runs of
1253 1258 deltas are limited to about 2 times the length of the original
1254 1259 version data. This makes retrieval of a version proportional to
1255 1260 its size, or O(1) relative to the number of revisions.
1256 1261
1257 1262 Both pieces of the revlog are written to in an append-only
1258 1263 fashion, which means we never need to rewrite a file to insert or
1259 1264 remove data, and can use some simple techniques to avoid the need
1260 1265 for locking while reading.
1261 1266
1262 1267 If checkambig, indexfile is opened with checkambig=True at
1263 1268 writing, to avoid file stat ambiguity.
1264 1269
1265 1270 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1266 1271 index will be mmapped rather than read if it is larger than the
1267 1272 configured threshold.
1268 1273
1269 1274 If censorable is True, the revlog can have censored revisions.
1270 1275
1271 1276 If `upperboundcomp` is not None, this is the expected maximal gain from
1272 1277 compression for the data content.
1273 1278
1274 1279 `concurrencychecker` is an optional function that receives 3 arguments: a
1275 1280 file handle, a filename, and an expected position. It should check whether
1276 1281 the current position in the file handle is valid, and log/warn/fail (by
1277 1282 raising).
1278 1283
1279 1284 See mercurial/revlogutils/contants.py for details about the content of an
1280 1285 index entry.
1281 1286 """
1282 1287
1283 1288 _flagserrorclass = error.RevlogError
1284 1289
1285 1290 @staticmethod
1286 1291 def is_inline_index(header_bytes):
1287 1292 """Determine if a revlog is inline from the initial bytes of the index"""
1288 1293 if len(header_bytes) == 0:
1289 1294 return True
1290 1295
1291 1296 header = INDEX_HEADER.unpack(header_bytes)[0]
1292 1297
1293 1298 _format_flags = header & ~0xFFFF
1294 1299 _format_version = header & 0xFFFF
1295 1300
1296 1301 features = FEATURES_BY_VERSION[_format_version]
1297 1302 return features[b'inline'](_format_flags)
1298 1303
1304 _docket_file: Optional[bytes]
1305
1299 1306 def __init__(
1300 1307 self,
1301 1308 opener,
1302 1309 target,
1303 1310 radix,
1304 1311 postfix=None, # only exist for `tmpcensored` now
1305 1312 checkambig=False,
1306 1313 mmaplargeindex=False,
1307 1314 censorable=False,
1308 1315 upperboundcomp=None,
1309 1316 persistentnodemap=False,
1310 1317 concurrencychecker=None,
1311 1318 trypending=False,
1312 1319 try_split=False,
1313 1320 canonical_parent_order=True,
1314 1321 data_config=None,
1315 1322 delta_config=None,
1316 1323 feature_config=None,
1317 1324 may_inline=True, # may inline new revlog
1318 1325 ):
1319 1326 """
1320 1327 create a revlog object
1321 1328
1322 1329 opener is a function that abstracts the file opening operation
1323 1330 and can be used to implement COW semantics or the like.
1324 1331
1325 1332 `target`: a (KIND, ID) tuple that identify the content stored in
1326 1333 this revlog. It help the rest of the code to understand what the revlog
1327 1334 is about without having to resort to heuristic and index filename
1328 1335 analysis. Note: that this must be reliably be set by normal code, but
1329 1336 that test, debug, or performance measurement code might not set this to
1330 1337 accurate value.
1331 1338 """
1332 1339
1333 1340 self.radix = radix
1334 1341
1335 1342 self._docket_file = None
1336 1343 self._indexfile = None
1337 1344 self._datafile = None
1338 1345 self._sidedatafile = None
1339 1346 self._nodemap_file = None
1340 1347 self.postfix = postfix
1341 1348 self._trypending = trypending
1342 1349 self._try_split = try_split
1343 1350 self._may_inline = may_inline
1344 1351 self.opener = opener
1345 1352 if persistentnodemap:
1346 1353 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1347 1354
1348 1355 assert target[0] in ALL_KINDS
1349 1356 assert len(target) == 2
1350 1357 self.target = target
1351 1358 if feature_config is not None:
1352 1359 self.feature_config = feature_config.copy()
1353 1360 elif b'feature-config' in self.opener.options:
1354 1361 self.feature_config = self.opener.options[b'feature-config'].copy()
1355 1362 else:
1356 1363 self.feature_config = FeatureConfig()
1357 1364 self.feature_config.censorable = censorable
1358 1365 self.feature_config.canonical_parent_order = canonical_parent_order
1359 1366 if data_config is not None:
1360 1367 self.data_config = data_config.copy()
1361 1368 elif b'data-config' in self.opener.options:
1362 1369 self.data_config = self.opener.options[b'data-config'].copy()
1363 1370 else:
1364 1371 self.data_config = DataConfig()
1365 1372 self.data_config.check_ambig = checkambig
1366 1373 self.data_config.mmap_large_index = mmaplargeindex
1367 1374 if delta_config is not None:
1368 1375 self.delta_config = delta_config.copy()
1369 1376 elif b'delta-config' in self.opener.options:
1370 1377 self.delta_config = self.opener.options[b'delta-config'].copy()
1371 1378 else:
1372 1379 self.delta_config = DeltaConfig()
1373 1380 self.delta_config.upper_bound_comp = upperboundcomp
1374 1381
1375 1382 # Maps rev to chain base rev.
1376 1383 self._chainbasecache = util.lrucachedict(100)
1377 1384
1378 1385 self.index = None
1379 1386 self._docket = None
1380 1387 self._nodemap_docket = None
1381 1388 # Mapping of partial identifiers to full nodes.
1382 1389 self._pcache = {}
1383 1390
1384 1391 # other optionnals features
1385 1392
1386 1393 # Make copy of flag processors so each revlog instance can support
1387 1394 # custom flags.
1388 1395 self._flagprocessors = dict(flagutil.flagprocessors)
1389 1396 # prevent nesting of addgroup
1390 1397 self._adding_group = None
1391 1398
1392 1399 chunk_cache = self._loadindex()
1393 1400 self._load_inner(chunk_cache)
1394 1401 self._concurrencychecker = concurrencychecker
1395 1402
1396 1403 def _init_opts(self):
1397 1404 """process options (from above/config) to setup associated default revlog mode
1398 1405
1399 1406 These values might be affected when actually reading on disk information.
1400 1407
1401 1408 The relevant values are returned for use in _loadindex().
1402 1409
1403 1410 * newversionflags:
1404 1411 version header to use if we need to create a new revlog
1405 1412
1406 1413 * mmapindexthreshold:
1407 1414 minimal index size for start to use mmap
1408 1415
1409 1416 * force_nodemap:
1410 1417 force the usage of a "development" version of the nodemap code
1411 1418 """
1412 1419 opts = self.opener.options
1413 1420
1414 1421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1415 1422 new_header = CHANGELOGV2
1416 1423 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1417 1424 self.feature_config.compute_rank = compute_rank
1418 1425 elif b'revlogv2' in opts:
1419 1426 new_header = REVLOGV2
1420 1427 elif b'revlogv1' in opts:
1421 1428 new_header = REVLOGV1
1422 1429 if self._may_inline:
1423 1430 new_header |= FLAG_INLINE_DATA
1424 1431 if b'generaldelta' in opts:
1425 1432 new_header |= FLAG_GENERALDELTA
1426 1433 elif b'revlogv0' in self.opener.options:
1427 1434 new_header = REVLOGV0
1428 1435 else:
1429 1436 new_header = REVLOG_DEFAULT_VERSION
1430 1437
1431 1438 mmapindexthreshold = None
1432 1439 if self.data_config.mmap_large_index:
1433 1440 mmapindexthreshold = self.data_config.mmap_index_threshold
1434 1441 if self.feature_config.enable_ellipsis:
1435 1442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1436 1443
1437 1444 # revlog v0 doesn't have flag processors
1438 1445 for flag, processor in opts.get(b'flagprocessors', {}).items():
1439 1446 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1440 1447
1441 1448 chunk_cache_size = self.data_config.chunk_cache_size
1442 1449 if chunk_cache_size <= 0:
1443 1450 raise error.RevlogError(
1444 1451 _(b'revlog chunk cache size %r is not greater than 0')
1445 1452 % chunk_cache_size
1446 1453 )
1447 1454 elif chunk_cache_size & (chunk_cache_size - 1):
1448 1455 raise error.RevlogError(
1449 1456 _(b'revlog chunk cache size %r is not a power of 2')
1450 1457 % chunk_cache_size
1451 1458 )
1452 1459 force_nodemap = opts.get(b'devel-force-nodemap', False)
1453 1460 return new_header, mmapindexthreshold, force_nodemap
1454 1461
1455 1462 def _get_data(self, filepath, mmap_threshold, size=None):
1456 1463 """return a file content with or without mmap
1457 1464
1458 1465 If the file is missing return the empty string"""
1459 1466 try:
1460 1467 with self.opener(filepath) as fp:
1461 1468 if mmap_threshold is not None:
1462 1469 file_size = self.opener.fstat(fp).st_size
1463 1470 if (
1464 1471 file_size >= mmap_threshold
1465 1472 and self.opener.is_mmap_safe(filepath)
1466 1473 ):
1467 1474 if size is not None:
1468 1475 # avoid potentiel mmap crash
1469 1476 size = min(file_size, size)
1470 1477 # TODO: should .close() to release resources without
1471 1478 # relying on Python GC
1472 1479 if size is None:
1473 1480 return util.buffer(util.mmapread(fp))
1474 1481 else:
1475 1482 return util.buffer(util.mmapread(fp, size))
1476 1483 if size is None:
1477 1484 return fp.read()
1478 1485 else:
1479 1486 return fp.read(size)
1480 1487 except FileNotFoundError:
1481 1488 return b''
1482 1489
1483 1490 def get_streams(self, max_linkrev, force_inline=False):
1484 1491 """return a list of streams that represent this revlog
1485 1492
1486 1493 This is used by stream-clone to do bytes to bytes copies of a repository.
1487 1494
1488 1495 This streams data for all revisions that refer to a changelog revision up
1489 1496 to `max_linkrev`.
1490 1497
1491 1498 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1492 1499
1493 1500 It returns is a list of three-tuple:
1494 1501
1495 1502 [
1496 1503 (filename, bytes_stream, stream_size),
1497 1504 …
1498 1505 ]
1499 1506 """
1500 1507 n = len(self)
1501 1508 index = self.index
1502 1509 while n > 0:
1503 1510 linkrev = index[n - 1][4]
1504 1511 if linkrev < max_linkrev:
1505 1512 break
1506 1513 # note: this loop will rarely go through multiple iterations, since
1507 1514 # it only traverses commits created during the current streaming
1508 1515 # pull operation.
1509 1516 #
1510 1517 # If this become a problem, using a binary search should cap the
1511 1518 # runtime of this.
1512 1519 n = n - 1
1513 1520 if n == 0:
1514 1521 # no data to send
1515 1522 return []
1516 1523 index_size = n * index.entry_size
1517 1524 data_size = self.end(n - 1)
1518 1525
1519 1526 # XXX we might have been split (or stripped) since the object
1520 1527 # initialization, We need to close this race too, but having a way to
1521 1528 # pre-open the file we feed to the revlog and never closing them before
1522 1529 # we are done streaming.
1523 1530
1524 1531 if self._inline:
1525 1532
1526 1533 def get_stream():
1527 1534 with self.opener(self._indexfile, mode=b"r") as fp:
1528 1535 yield None
1529 1536 size = index_size + data_size
1530 1537 if size <= 65536:
1531 1538 yield fp.read(size)
1532 1539 else:
1533 1540 yield from util.filechunkiter(fp, limit=size)
1534 1541
1535 1542 inline_stream = get_stream()
1536 1543 next(inline_stream)
1537 1544 return [
1538 1545 (self._indexfile, inline_stream, index_size + data_size),
1539 1546 ]
1540 1547 elif force_inline:
1541 1548
1542 1549 def get_stream():
1543 1550 with self.reading():
1544 1551 yield None
1545 1552
1546 1553 for rev in range(n):
1547 1554 idx = self.index.entry_binary(rev)
1548 1555 if rev == 0 and self._docket is None:
1549 1556 # re-inject the inline flag
1550 1557 header = self._format_flags
1551 1558 header |= self._format_version
1552 1559 header |= FLAG_INLINE_DATA
1553 1560 header = self.index.pack_header(header)
1554 1561 idx = header + idx
1555 1562 yield idx
1556 1563 yield self._inner.get_segment_for_revs(rev, rev)[1]
1557 1564
1558 1565 inline_stream = get_stream()
1559 1566 next(inline_stream)
1560 1567 return [
1561 1568 (self._indexfile, inline_stream, index_size + data_size),
1562 1569 ]
1563 1570 else:
1564 1571
1565 1572 def get_index_stream():
1566 1573 with self.opener(self._indexfile, mode=b"r") as fp:
1567 1574 yield None
1568 1575 if index_size <= 65536:
1569 1576 yield fp.read(index_size)
1570 1577 else:
1571 1578 yield from util.filechunkiter(fp, limit=index_size)
1572 1579
1573 1580 def get_data_stream():
1574 1581 with self._datafp() as fp:
1575 1582 yield None
1576 1583 if data_size <= 65536:
1577 1584 yield fp.read(data_size)
1578 1585 else:
1579 1586 yield from util.filechunkiter(fp, limit=data_size)
1580 1587
1581 1588 index_stream = get_index_stream()
1582 1589 next(index_stream)
1583 1590 data_stream = get_data_stream()
1584 1591 next(data_stream)
1585 1592 return [
1586 1593 (self._datafile, data_stream, data_size),
1587 1594 (self._indexfile, index_stream, index_size),
1588 1595 ]
1589 1596
1590 1597 def _loadindex(self, docket=None):
1591 1598 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1592 1599
1593 1600 if self.postfix is not None:
1594 1601 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1595 1602 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1596 1603 entry_point = b'%s.i.a' % self.radix
1597 1604 elif self._try_split and self.opener.exists(self._split_index_file):
1598 1605 entry_point = self._split_index_file
1599 1606 else:
1600 1607 entry_point = b'%s.i' % self.radix
1601 1608
1602 1609 if docket is not None:
1603 1610 self._docket = docket
1604 1611 self._docket_file = entry_point
1605 1612 else:
1606 1613 self._initempty = True
1607 1614 entry_data = self._get_data(entry_point, mmapindexthreshold)
1608 1615 if len(entry_data) > 0:
1609 1616 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1610 1617 self._initempty = False
1611 1618 else:
1612 1619 header = new_header
1613 1620
1614 1621 self._format_flags = header & ~0xFFFF
1615 1622 self._format_version = header & 0xFFFF
1616 1623
1617 1624 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1618 1625 if supported_flags is None:
1619 1626 msg = _(b'unknown version (%d) in revlog %s')
1620 1627 msg %= (self._format_version, self.display_id)
1621 1628 raise error.RevlogError(msg)
1622 1629 elif self._format_flags & ~supported_flags:
1623 1630 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1624 1631 display_flag = self._format_flags >> 16
1625 1632 msg %= (display_flag, self._format_version, self.display_id)
1626 1633 raise error.RevlogError(msg)
1627 1634
1628 1635 features = FEATURES_BY_VERSION[self._format_version]
1629 1636 self._inline = features[b'inline'](self._format_flags)
1630 1637 self.delta_config.general_delta = features[b'generaldelta'](
1631 1638 self._format_flags
1632 1639 )
1633 1640 self.feature_config.has_side_data = features[b'sidedata']
1634 1641
1635 1642 if not features[b'docket']:
1636 1643 self._indexfile = entry_point
1637 1644 index_data = entry_data
1638 1645 else:
1639 1646 self._docket_file = entry_point
1640 1647 if self._initempty:
1641 1648 self._docket = docketutil.default_docket(self, header)
1642 1649 else:
1643 1650 self._docket = docketutil.parse_docket(
1644 1651 self, entry_data, use_pending=self._trypending
1645 1652 )
1646 1653
1647 1654 if self._docket is not None:
1648 1655 self._indexfile = self._docket.index_filepath()
1649 1656 index_data = b''
1650 1657 index_size = self._docket.index_end
1651 1658 if index_size > 0:
1652 1659 index_data = self._get_data(
1653 1660 self._indexfile, mmapindexthreshold, size=index_size
1654 1661 )
1655 1662 if len(index_data) < index_size:
1656 1663 msg = _(b'too few index data for %s: got %d, expected %d')
1657 1664 msg %= (self.display_id, len(index_data), index_size)
1658 1665 raise error.RevlogError(msg)
1659 1666
1660 1667 self._inline = False
1661 1668 # generaldelta implied by version 2 revlogs.
1662 1669 self.delta_config.general_delta = True
1663 1670 # the logic for persistent nodemap will be dealt with within the
1664 1671 # main docket, so disable it for now.
1665 1672 self._nodemap_file = None
1666 1673
1667 1674 if self._docket is not None:
1668 1675 self._datafile = self._docket.data_filepath()
1669 1676 self._sidedatafile = self._docket.sidedata_filepath()
1670 1677 elif self.postfix is None:
1671 1678 self._datafile = b'%s.d' % self.radix
1672 1679 else:
1673 1680 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1674 1681
1675 1682 self.nodeconstants = sha1nodeconstants
1676 1683 self.nullid = self.nodeconstants.nullid
1677 1684
1678 1685 # sparse-revlog can't be on without general-delta (issue6056)
1679 1686 if not self.delta_config.general_delta:
1680 1687 self.delta_config.sparse_revlog = False
1681 1688
1682 1689 self._storedeltachains = True
1683 1690
1684 1691 devel_nodemap = (
1685 1692 self._nodemap_file
1686 1693 and force_nodemap
1687 1694 and parse_index_v1_nodemap is not None
1688 1695 )
1689 1696
1690 1697 use_rust_index = False
1691 1698 if rustrevlog is not None and self._nodemap_file is not None:
1692 1699 # we would like to use the rust_index in all case, especially
1693 1700 # because it is necessary for AncestorsIterator and LazyAncestors
1694 1701 # since the 6.7 cycle.
1695 1702 #
1696 1703 # However, the performance impact of inconditionnaly building the
1697 1704 # nodemap is currently a problem for non-persistent nodemap
1698 1705 # repository.
1699 1706 use_rust_index = True
1700 1707
1701 1708 self._parse_index = parse_index_v1
1702 1709 if self._format_version == REVLOGV0:
1703 1710 self._parse_index = revlogv0.parse_index_v0
1704 1711 elif self._format_version == REVLOGV2:
1705 1712 self._parse_index = parse_index_v2
1706 1713 elif self._format_version == CHANGELOGV2:
1707 1714 self._parse_index = parse_index_cl_v2
1708 1715 elif devel_nodemap:
1709 1716 self._parse_index = parse_index_v1_nodemap
1710 1717 elif use_rust_index:
1711 1718 self._parse_index = functools.partial(
1712 1719 parse_index_v1_rust, default_header=new_header
1713 1720 )
1714 1721 try:
1715 1722 d = self._parse_index(index_data, self._inline)
1716 1723 index, chunkcache = d
1717 1724 use_nodemap = (
1718 1725 not self._inline
1719 1726 and self._nodemap_file is not None
1720 1727 and hasattr(index, 'update_nodemap_data')
1721 1728 )
1722 1729 if use_nodemap:
1723 1730 nodemap_data = nodemaputil.persisted_data(self)
1724 1731 if nodemap_data is not None:
1725 1732 docket = nodemap_data[0]
1726 1733 if (
1727 1734 len(d[0]) > docket.tip_rev
1728 1735 and d[0][docket.tip_rev][7] == docket.tip_node
1729 1736 ):
1730 1737 # no changelog tampering
1731 1738 self._nodemap_docket = docket
1732 1739 index.update_nodemap_data(*nodemap_data)
1733 1740 except (ValueError, IndexError):
1734 1741 raise error.RevlogError(
1735 1742 _(b"index %s is corrupted") % self.display_id
1736 1743 )
1737 1744 self.index = index
1738 1745 # revnum -> (chain-length, sum-delta-length)
1739 1746 self._chaininfocache = util.lrucachedict(500)
1740 1747
1741 1748 return chunkcache
1742 1749
1743 1750 def _load_inner(self, chunk_cache):
1744 1751 if self._docket is None:
1745 1752 default_compression_header = None
1746 1753 else:
1747 1754 default_compression_header = self._docket.default_compression_header
1748 1755
1749 1756 self._inner = _InnerRevlog(
1750 1757 opener=self.opener,
1751 1758 index=self.index,
1752 1759 index_file=self._indexfile,
1753 1760 data_file=self._datafile,
1754 1761 sidedata_file=self._sidedatafile,
1755 1762 inline=self._inline,
1756 1763 data_config=self.data_config,
1757 1764 delta_config=self.delta_config,
1758 1765 feature_config=self.feature_config,
1759 1766 chunk_cache=chunk_cache,
1760 1767 default_compression_header=default_compression_header,
1761 1768 )
1762 1769
1763 1770 def get_revlog(self):
1764 1771 """simple function to mirror API of other not-really-revlog API"""
1765 1772 return self
1766 1773
1767 1774 @util.propertycache
1768 1775 def revlog_kind(self):
1769 1776 return self.target[0]
1770 1777
1771 1778 @util.propertycache
1772 1779 def display_id(self):
1773 1780 """The public facing "ID" of the revlog that we use in message"""
1774 1781 if self.revlog_kind == KIND_FILELOG:
1775 1782 # Reference the file without the "data/" prefix, so it is familiar
1776 1783 # to the user.
1777 1784 return self.target[1]
1778 1785 else:
1779 1786 return self.radix
1780 1787
1781 1788 def _datafp(self, mode=b'r'):
1782 1789 """file object for the revlog's data file"""
1783 1790 return self.opener(self._datafile, mode=mode)
1784 1791
1785 1792 def tiprev(self):
1786 1793 return len(self.index) - 1
1787 1794
1788 1795 def tip(self):
1789 1796 return self.node(self.tiprev())
1790 1797
1791 1798 def __contains__(self, rev):
1792 1799 return 0 <= rev < len(self)
1793 1800
1794 1801 def __len__(self):
1795 1802 return len(self.index)
1796 1803
1797 1804 def __iter__(self):
1798 1805 return iter(range(len(self)))
1799 1806
1800 1807 def revs(self, start=0, stop=None):
1801 1808 """iterate over all rev in this revlog (from start to stop)"""
1802 1809 return storageutil.iterrevs(len(self), start=start, stop=stop)
1803 1810
1804 1811 def hasnode(self, node):
1805 1812 try:
1806 1813 self.rev(node)
1807 1814 return True
1808 1815 except KeyError:
1809 1816 return False
1810 1817
1811 1818 def _candelta(self, baserev, rev):
1812 1819 """whether two revisions (baserev, rev) can be delta-ed or not"""
1813 1820 # Disable delta if either rev requires a content-changing flag
1814 1821 # processor (ex. LFS). This is because such flag processor can alter
1815 1822 # the rawtext content that the delta will be based on, and two clients
1816 1823 # could have a same revlog node with different flags (i.e. different
1817 1824 # rawtext contents) and the delta could be incompatible.
1818 1825 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1819 1826 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1820 1827 ):
1821 1828 return False
1822 1829 return True
1823 1830
1824 1831 def update_caches(self, transaction):
1825 1832 """update on disk cache
1826 1833
1827 1834 If a transaction is passed, the update may be delayed to transaction
1828 1835 commit."""
1829 1836 if self._nodemap_file is not None:
1830 1837 if transaction is None:
1831 1838 nodemaputil.update_persistent_nodemap(self)
1832 1839 else:
1833 1840 nodemaputil.setup_persistent_nodemap(transaction, self)
1834 1841
1835 1842 def clearcaches(self):
1836 1843 """Clear in-memory caches"""
1837 1844 self._chainbasecache.clear()
1838 1845 self._inner.clear_cache()
1839 1846 self._pcache = {}
1840 1847 self._nodemap_docket = None
1841 1848 self.index.clearcaches()
1842 1849 # The python code is the one responsible for validating the docket, we
1843 1850 # end up having to refresh it here.
1844 1851 use_nodemap = (
1845 1852 not self._inline
1846 1853 and self._nodemap_file is not None
1847 1854 and hasattr(self.index, 'update_nodemap_data')
1848 1855 )
1849 1856 if use_nodemap:
1850 1857 nodemap_data = nodemaputil.persisted_data(self)
1851 1858 if nodemap_data is not None:
1852 1859 self._nodemap_docket = nodemap_data[0]
1853 1860 self.index.update_nodemap_data(*nodemap_data)
1854 1861
1855 1862 def rev(self, node):
1856 1863 """return the revision number associated with a <nodeid>"""
1857 1864 try:
1858 1865 return self.index.rev(node)
1859 1866 except TypeError:
1860 1867 raise
1861 1868 except error.RevlogError:
1862 1869 # parsers.c radix tree lookup failed
1863 1870 if (
1864 1871 node == self.nodeconstants.wdirid
1865 1872 or node in self.nodeconstants.wdirfilenodeids
1866 1873 ):
1867 1874 raise error.WdirUnsupported
1868 1875 raise error.LookupError(node, self.display_id, _(b'no node'))
1869 1876
1870 1877 # Accessors for index entries.
1871 1878
1872 1879 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1873 1880 # are flags.
1874 1881 def start(self, rev):
1875 1882 return int(self.index[rev][0] >> 16)
1876 1883
1877 1884 def sidedata_cut_off(self, rev):
1878 1885 sd_cut_off = self.index[rev][8]
1879 1886 if sd_cut_off != 0:
1880 1887 return sd_cut_off
1881 1888 # This is some annoying dance, because entries without sidedata
1882 1889 # currently use 0 as their ofsset. (instead of previous-offset +
1883 1890 # previous-size)
1884 1891 #
1885 1892 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1886 1893 # In the meantime, we need this.
1887 1894 while 0 <= rev:
1888 1895 e = self.index[rev]
1889 1896 if e[9] != 0:
1890 1897 return e[8] + e[9]
1891 1898 rev -= 1
1892 1899 return 0
1893 1900
1894 1901 def flags(self, rev):
1895 1902 return self.index[rev][0] & 0xFFFF
1896 1903
1897 1904 def length(self, rev):
1898 1905 return self.index[rev][1]
1899 1906
1900 1907 def sidedata_length(self, rev):
1901 1908 if not self.feature_config.has_side_data:
1902 1909 return 0
1903 1910 return self.index[rev][9]
1904 1911
1905 1912 def rawsize(self, rev):
1906 1913 """return the length of the uncompressed text for a given revision"""
1907 1914 l = self.index[rev][2]
1908 1915 if l >= 0:
1909 1916 return l
1910 1917
1911 1918 t = self.rawdata(rev)
1912 1919 return len(t)
1913 1920
1914 1921 def size(self, rev):
1915 1922 """length of non-raw text (processed by a "read" flag processor)"""
1916 1923 # fast path: if no "read" flag processor could change the content,
1917 1924 # size is rawsize. note: ELLIPSIS is known to not change the content.
1918 1925 flags = self.flags(rev)
1919 1926 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1920 1927 return self.rawsize(rev)
1921 1928
1922 1929 return len(self.revision(rev))
1923 1930
1924 1931 def fast_rank(self, rev):
1925 1932 """Return the rank of a revision if already known, or None otherwise.
1926 1933
1927 1934 The rank of a revision is the size of the sub-graph it defines as a
1928 1935 head. Equivalently, the rank of a revision `r` is the size of the set
1929 1936 `ancestors(r)`, `r` included.
1930 1937
1931 1938 This method returns the rank retrieved from the revlog in constant
1932 1939 time. It makes no attempt at computing unknown values for versions of
1933 1940 the revlog which do not persist the rank.
1934 1941 """
1935 1942 rank = self.index[rev][ENTRY_RANK]
1936 1943 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1937 1944 return None
1938 1945 if rev == nullrev:
1939 1946 return 0 # convention
1940 1947 return rank
1941 1948
1942 1949 def chainbase(self, rev):
1943 1950 base = self._chainbasecache.get(rev)
1944 1951 if base is not None:
1945 1952 return base
1946 1953
1947 1954 index = self.index
1948 1955 iterrev = rev
1949 1956 base = index[iterrev][3]
1950 1957 while base != iterrev:
1951 1958 iterrev = base
1952 1959 base = index[iterrev][3]
1953 1960
1954 1961 self._chainbasecache[rev] = base
1955 1962 return base
1956 1963
1957 1964 def linkrev(self, rev):
1958 1965 return self.index[rev][4]
1959 1966
1960 1967 def parentrevs(self, rev):
1961 1968 try:
1962 1969 entry = self.index[rev]
1963 1970 except IndexError:
1964 1971 if rev == wdirrev:
1965 1972 raise error.WdirUnsupported
1966 1973 raise
1967 1974
1968 1975 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1969 1976 return entry[6], entry[5]
1970 1977 else:
1971 1978 return entry[5], entry[6]
1972 1979
1973 1980 # fast parentrevs(rev) where rev isn't filtered
1974 1981 _uncheckedparentrevs = parentrevs
1975 1982
1976 1983 def node(self, rev):
1977 1984 try:
1978 1985 return self.index[rev][7]
1979 1986 except IndexError:
1980 1987 if rev == wdirrev:
1981 1988 raise error.WdirUnsupported
1982 1989 raise
1983 1990
1984 1991 # Derived from index values.
1985 1992
1986 1993 def end(self, rev):
1987 1994 return self.start(rev) + self.length(rev)
1988 1995
1989 1996 def parents(self, node):
1990 1997 i = self.index
1991 1998 d = i[self.rev(node)]
1992 1999 # inline node() to avoid function call overhead
1993 2000 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1994 2001 return i[d[6]][7], i[d[5]][7]
1995 2002 else:
1996 2003 return i[d[5]][7], i[d[6]][7]
1997 2004
1998 2005 def chainlen(self, rev):
1999 2006 return self._chaininfo(rev)[0]
2000 2007
2001 2008 def _chaininfo(self, rev):
2002 2009 chaininfocache = self._chaininfocache
2003 2010 if rev in chaininfocache:
2004 2011 return chaininfocache[rev]
2005 2012 index = self.index
2006 2013 generaldelta = self.delta_config.general_delta
2007 2014 iterrev = rev
2008 2015 e = index[iterrev]
2009 2016 clen = 0
2010 2017 compresseddeltalen = 0
2011 2018 while iterrev != e[3]:
2012 2019 clen += 1
2013 2020 compresseddeltalen += e[1]
2014 2021 if generaldelta:
2015 2022 iterrev = e[3]
2016 2023 else:
2017 2024 iterrev -= 1
2018 2025 if iterrev in chaininfocache:
2019 2026 t = chaininfocache[iterrev]
2020 2027 clen += t[0]
2021 2028 compresseddeltalen += t[1]
2022 2029 break
2023 2030 e = index[iterrev]
2024 2031 else:
2025 2032 # Add text length of base since decompressing that also takes
2026 2033 # work. For cache hits the length is already included.
2027 2034 compresseddeltalen += e[1]
2028 2035 r = (clen, compresseddeltalen)
2029 2036 chaininfocache[rev] = r
2030 2037 return r
2031 2038
2032 2039 def _deltachain(self, rev, stoprev=None):
2033 2040 return self._inner._deltachain(rev, stoprev=stoprev)
2034 2041
2035 2042 def ancestors(self, revs, stoprev=0, inclusive=False):
2036 2043 """Generate the ancestors of 'revs' in reverse revision order.
2037 2044 Does not generate revs lower than stoprev.
2038 2045
2039 2046 See the documentation for ancestor.lazyancestors for more details."""
2040 2047
2041 2048 # first, make sure start revisions aren't filtered
2042 2049 revs = list(revs)
2043 2050 checkrev = self.node
2044 2051 for r in revs:
2045 2052 checkrev(r)
2046 2053 # and we're sure ancestors aren't filtered as well
2047 2054
2048 2055 if rustancestor is not None and self.index.rust_ext_compat:
2049 2056 lazyancestors = rustancestor.LazyAncestors
2050 2057 arg = self.index
2051 2058 else:
2052 2059 lazyancestors = ancestor.lazyancestors
2053 2060 arg = self._uncheckedparentrevs
2054 2061 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2055 2062
2056 2063 def descendants(self, revs):
2057 2064 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2058 2065
2059 2066 def findcommonmissing(self, common=None, heads=None):
2060 2067 """Return a tuple of the ancestors of common and the ancestors of heads
2061 2068 that are not ancestors of common. In revset terminology, we return the
2062 2069 tuple:
2063 2070
2064 2071 ::common, (::heads) - (::common)
2065 2072
2066 2073 The list is sorted by revision number, meaning it is
2067 2074 topologically sorted.
2068 2075
2069 2076 'heads' and 'common' are both lists of node IDs. If heads is
2070 2077 not supplied, uses all of the revlog's heads. If common is not
2071 2078 supplied, uses nullid."""
2072 2079 if common is None:
2073 2080 common = [self.nullid]
2074 2081 if heads is None:
2075 2082 heads = self.heads()
2076 2083
2077 2084 common = [self.rev(n) for n in common]
2078 2085 heads = [self.rev(n) for n in heads]
2079 2086
2080 2087 # we want the ancestors, but inclusive
2081 2088 class lazyset:
2082 2089 def __init__(self, lazyvalues):
2083 2090 self.addedvalues = set()
2084 2091 self.lazyvalues = lazyvalues
2085 2092
2086 2093 def __contains__(self, value):
2087 2094 return value in self.addedvalues or value in self.lazyvalues
2088 2095
2089 2096 def __iter__(self):
2090 2097 added = self.addedvalues
2091 2098 for r in added:
2092 2099 yield r
2093 2100 for r in self.lazyvalues:
2094 2101 if not r in added:
2095 2102 yield r
2096 2103
2097 2104 def add(self, value):
2098 2105 self.addedvalues.add(value)
2099 2106
2100 2107 def update(self, values):
2101 2108 self.addedvalues.update(values)
2102 2109
2103 2110 has = lazyset(self.ancestors(common))
2104 2111 has.add(nullrev)
2105 2112 has.update(common)
2106 2113
2107 2114 # take all ancestors from heads that aren't in has
2108 2115 missing = set()
2109 2116 visit = collections.deque(r for r in heads if r not in has)
2110 2117 while visit:
2111 2118 r = visit.popleft()
2112 2119 if r in missing:
2113 2120 continue
2114 2121 else:
2115 2122 missing.add(r)
2116 2123 for p in self.parentrevs(r):
2117 2124 if p not in has:
2118 2125 visit.append(p)
2119 2126 missing = list(missing)
2120 2127 missing.sort()
2121 2128 return has, [self.node(miss) for miss in missing]
2122 2129
2123 2130 def incrementalmissingrevs(self, common=None):
2124 2131 """Return an object that can be used to incrementally compute the
2125 2132 revision numbers of the ancestors of arbitrary sets that are not
2126 2133 ancestors of common. This is an ancestor.incrementalmissingancestors
2127 2134 object.
2128 2135
2129 2136 'common' is a list of revision numbers. If common is not supplied, uses
2130 2137 nullrev.
2131 2138 """
2132 2139 if common is None:
2133 2140 common = [nullrev]
2134 2141
2135 2142 if rustancestor is not None and self.index.rust_ext_compat:
2136 2143 return rustancestor.MissingAncestors(self.index, common)
2137 2144 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2138 2145
2139 2146 def findmissingrevs(self, common=None, heads=None):
2140 2147 """Return the revision numbers of the ancestors of heads that
2141 2148 are not ancestors of common.
2142 2149
2143 2150 More specifically, return a list of revision numbers corresponding to
2144 2151 nodes N such that every N satisfies the following constraints:
2145 2152
2146 2153 1. N is an ancestor of some node in 'heads'
2147 2154 2. N is not an ancestor of any node in 'common'
2148 2155
2149 2156 The list is sorted by revision number, meaning it is
2150 2157 topologically sorted.
2151 2158
2152 2159 'heads' and 'common' are both lists of revision numbers. If heads is
2153 2160 not supplied, uses all of the revlog's heads. If common is not
2154 2161 supplied, uses nullid."""
2155 2162 if common is None:
2156 2163 common = [nullrev]
2157 2164 if heads is None:
2158 2165 heads = self.headrevs()
2159 2166
2160 2167 inc = self.incrementalmissingrevs(common=common)
2161 2168 return inc.missingancestors(heads)
2162 2169
2163 2170 def findmissing(self, common=None, heads=None):
2164 2171 """Return the ancestors of heads that are not ancestors of common.
2165 2172
2166 2173 More specifically, return a list of nodes N such that every N
2167 2174 satisfies the following constraints:
2168 2175
2169 2176 1. N is an ancestor of some node in 'heads'
2170 2177 2. N is not an ancestor of any node in 'common'
2171 2178
2172 2179 The list is sorted by revision number, meaning it is
2173 2180 topologically sorted.
2174 2181
2175 2182 'heads' and 'common' are both lists of node IDs. If heads is
2176 2183 not supplied, uses all of the revlog's heads. If common is not
2177 2184 supplied, uses nullid."""
2178 2185 if common is None:
2179 2186 common = [self.nullid]
2180 2187 if heads is None:
2181 2188 heads = self.heads()
2182 2189
2183 2190 common = [self.rev(n) for n in common]
2184 2191 heads = [self.rev(n) for n in heads]
2185 2192
2186 2193 inc = self.incrementalmissingrevs(common=common)
2187 2194 return [self.node(r) for r in inc.missingancestors(heads)]
2188 2195
2189 2196 def nodesbetween(self, roots=None, heads=None):
2190 2197 """Return a topological path from 'roots' to 'heads'.
2191 2198
2192 2199 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2193 2200 topologically sorted list of all nodes N that satisfy both of
2194 2201 these constraints:
2195 2202
2196 2203 1. N is a descendant of some node in 'roots'
2197 2204 2. N is an ancestor of some node in 'heads'
2198 2205
2199 2206 Every node is considered to be both a descendant and an ancestor
2200 2207 of itself, so every reachable node in 'roots' and 'heads' will be
2201 2208 included in 'nodes'.
2202 2209
2203 2210 'outroots' is the list of reachable nodes in 'roots', i.e., the
2204 2211 subset of 'roots' that is returned in 'nodes'. Likewise,
2205 2212 'outheads' is the subset of 'heads' that is also in 'nodes'.
2206 2213
2207 2214 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2208 2215 unspecified, uses nullid as the only root. If 'heads' is
2209 2216 unspecified, uses list of all of the revlog's heads."""
2210 2217 nonodes = ([], [], [])
2211 2218 if roots is not None:
2212 2219 roots = list(roots)
2213 2220 if not roots:
2214 2221 return nonodes
2215 2222 lowestrev = min([self.rev(n) for n in roots])
2216 2223 else:
2217 2224 roots = [self.nullid] # Everybody's a descendant of nullid
2218 2225 lowestrev = nullrev
2219 2226 if (lowestrev == nullrev) and (heads is None):
2220 2227 # We want _all_ the nodes!
2221 2228 return (
2222 2229 [self.node(r) for r in self],
2223 2230 [self.nullid],
2224 2231 list(self.heads()),
2225 2232 )
2226 2233 if heads is None:
2227 2234 # All nodes are ancestors, so the latest ancestor is the last
2228 2235 # node.
2229 2236 highestrev = len(self) - 1
2230 2237 # Set ancestors to None to signal that every node is an ancestor.
2231 2238 ancestors = None
2232 2239 # Set heads to an empty dictionary for later discovery of heads
2233 2240 heads = {}
2234 2241 else:
2235 2242 heads = list(heads)
2236 2243 if not heads:
2237 2244 return nonodes
2238 2245 ancestors = set()
2239 2246 # Turn heads into a dictionary so we can remove 'fake' heads.
2240 2247 # Also, later we will be using it to filter out the heads we can't
2241 2248 # find from roots.
2242 2249 heads = dict.fromkeys(heads, False)
2243 2250 # Start at the top and keep marking parents until we're done.
2244 2251 nodestotag = set(heads)
2245 2252 # Remember where the top was so we can use it as a limit later.
2246 2253 highestrev = max([self.rev(n) for n in nodestotag])
2247 2254 while nodestotag:
2248 2255 # grab a node to tag
2249 2256 n = nodestotag.pop()
2250 2257 # Never tag nullid
2251 2258 if n == self.nullid:
2252 2259 continue
2253 2260 # A node's revision number represents its place in a
2254 2261 # topologically sorted list of nodes.
2255 2262 r = self.rev(n)
2256 2263 if r >= lowestrev:
2257 2264 if n not in ancestors:
2258 2265 # If we are possibly a descendant of one of the roots
2259 2266 # and we haven't already been marked as an ancestor
2260 2267 ancestors.add(n) # Mark as ancestor
2261 2268 # Add non-nullid parents to list of nodes to tag.
2262 2269 nodestotag.update(
2263 2270 [p for p in self.parents(n) if p != self.nullid]
2264 2271 )
2265 2272 elif n in heads: # We've seen it before, is it a fake head?
2266 2273 # So it is, real heads should not be the ancestors of
2267 2274 # any other heads.
2268 2275 heads.pop(n)
2269 2276 if not ancestors:
2270 2277 return nonodes
2271 2278 # Now that we have our set of ancestors, we want to remove any
2272 2279 # roots that are not ancestors.
2273 2280
2274 2281 # If one of the roots was nullid, everything is included anyway.
2275 2282 if lowestrev > nullrev:
2276 2283 # But, since we weren't, let's recompute the lowest rev to not
2277 2284 # include roots that aren't ancestors.
2278 2285
2279 2286 # Filter out roots that aren't ancestors of heads
2280 2287 roots = [root for root in roots if root in ancestors]
2281 2288 # Recompute the lowest revision
2282 2289 if roots:
2283 2290 lowestrev = min([self.rev(root) for root in roots])
2284 2291 else:
2285 2292 # No more roots? Return empty list
2286 2293 return nonodes
2287 2294 else:
2288 2295 # We are descending from nullid, and don't need to care about
2289 2296 # any other roots.
2290 2297 lowestrev = nullrev
2291 2298 roots = [self.nullid]
2292 2299 # Transform our roots list into a set.
2293 2300 descendants = set(roots)
2294 2301 # Also, keep the original roots so we can filter out roots that aren't
2295 2302 # 'real' roots (i.e. are descended from other roots).
2296 2303 roots = descendants.copy()
2297 2304 # Our topologically sorted list of output nodes.
2298 2305 orderedout = []
2299 2306 # Don't start at nullid since we don't want nullid in our output list,
2300 2307 # and if nullid shows up in descendants, empty parents will look like
2301 2308 # they're descendants.
2302 2309 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2303 2310 n = self.node(r)
2304 2311 isdescendant = False
2305 2312 if lowestrev == nullrev: # Everybody is a descendant of nullid
2306 2313 isdescendant = True
2307 2314 elif n in descendants:
2308 2315 # n is already a descendant
2309 2316 isdescendant = True
2310 2317 # This check only needs to be done here because all the roots
2311 2318 # will start being marked is descendants before the loop.
2312 2319 if n in roots:
2313 2320 # If n was a root, check if it's a 'real' root.
2314 2321 p = tuple(self.parents(n))
2315 2322 # If any of its parents are descendants, it's not a root.
2316 2323 if (p[0] in descendants) or (p[1] in descendants):
2317 2324 roots.remove(n)
2318 2325 else:
2319 2326 p = tuple(self.parents(n))
2320 2327 # A node is a descendant if either of its parents are
2321 2328 # descendants. (We seeded the dependents list with the roots
2322 2329 # up there, remember?)
2323 2330 if (p[0] in descendants) or (p[1] in descendants):
2324 2331 descendants.add(n)
2325 2332 isdescendant = True
2326 2333 if isdescendant and ((ancestors is None) or (n in ancestors)):
2327 2334 # Only include nodes that are both descendants and ancestors.
2328 2335 orderedout.append(n)
2329 2336 if (ancestors is not None) and (n in heads):
2330 2337 # We're trying to figure out which heads are reachable
2331 2338 # from roots.
2332 2339 # Mark this head as having been reached
2333 2340 heads[n] = True
2334 2341 elif ancestors is None:
2335 2342 # Otherwise, we're trying to discover the heads.
2336 2343 # Assume this is a head because if it isn't, the next step
2337 2344 # will eventually remove it.
2338 2345 heads[n] = True
2339 2346 # But, obviously its parents aren't.
2340 2347 for p in self.parents(n):
2341 2348 heads.pop(p, None)
2342 2349 heads = [head for head, flag in heads.items() if flag]
2343 2350 roots = list(roots)
2344 2351 assert orderedout
2345 2352 assert roots
2346 2353 assert heads
2347 2354 return (orderedout, roots, heads)
2348 2355
2349 2356 def headrevs(self, revs=None):
2350 2357 if revs is None:
2351 2358 try:
2352 2359 return self.index.headrevs()
2353 2360 except AttributeError:
2354 2361 return self._headrevs()
2355 2362 if rustdagop is not None and self.index.rust_ext_compat:
2356 2363 return rustdagop.headrevs(self.index, revs)
2357 2364 return dagop.headrevs(revs, self._uncheckedparentrevs)
2358 2365
2359 2366 def headrevsdiff(self, start, stop):
2360 2367 try:
2361 2368 return self.index.headrevsdiff(start, stop)
2362 2369 except AttributeError:
2363 2370 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2364 2371
2365 2372 def computephases(self, roots):
2366 2373 return self.index.computephasesmapsets(roots)
2367 2374
2368 2375 def _headrevs(self):
2369 2376 count = len(self)
2370 2377 if not count:
2371 2378 return [nullrev]
2372 2379 # we won't iter over filtered rev so nobody is a head at start
2373 2380 ishead = [0] * (count + 1)
2374 2381 index = self.index
2375 2382 for r in self:
2376 2383 ishead[r] = 1 # I may be an head
2377 2384 e = index[r]
2378 2385 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2379 2386 return [r for r, val in enumerate(ishead) if val]
2380 2387
2381 2388 def _head_node_ids(self):
2382 2389 try:
2383 2390 return self.index.head_node_ids()
2384 2391 except AttributeError:
2385 2392 return [self.node(r) for r in self.headrevs()]
2386 2393
2387 2394 def heads(self, start=None, stop=None):
2388 2395 """return the list of all nodes that have no children
2389 2396
2390 2397 if start is specified, only heads that are descendants of
2391 2398 start will be returned
2392 2399 if stop is specified, it will consider all the revs from stop
2393 2400 as if they had no children
2394 2401 """
2395 2402 if start is None and stop is None:
2396 2403 if not len(self):
2397 2404 return [self.nullid]
2398 2405 return self._head_node_ids()
2399 2406 if start is None:
2400 2407 start = nullrev
2401 2408 else:
2402 2409 start = self.rev(start)
2403 2410
2404 2411 stoprevs = {self.rev(n) for n in stop or []}
2405 2412
2406 2413 revs = dagop.headrevssubset(
2407 2414 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2408 2415 )
2409 2416
2410 2417 return [self.node(rev) for rev in revs]
2411 2418
2412 2419 def diffheads(self, start, stop):
2413 2420 """return the nodes that make up the difference between
2414 2421 heads of revs before `start` and heads of revs before `stop`"""
2415 2422 removed, added = self.headrevsdiff(start, stop)
2416 2423 return [self.node(r) for r in removed], [self.node(r) for r in added]
2417 2424
2418 2425 def children(self, node):
2419 2426 """find the children of a given node"""
2420 2427 c = []
2421 2428 p = self.rev(node)
2422 2429 for r in self.revs(start=p + 1):
2423 2430 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2424 2431 if prevs:
2425 2432 for pr in prevs:
2426 2433 if pr == p:
2427 2434 c.append(self.node(r))
2428 2435 elif p == nullrev:
2429 2436 c.append(self.node(r))
2430 2437 return c
2431 2438
2432 2439 def commonancestorsheads(self, a, b):
2433 2440 """calculate all the heads of the common ancestors of nodes a and b"""
2434 2441 a, b = self.rev(a), self.rev(b)
2435 2442 ancs = self._commonancestorsheads(a, b)
2436 2443 return pycompat.maplist(self.node, ancs)
2437 2444
2438 2445 def _commonancestorsheads(self, *revs):
2439 2446 """calculate all the heads of the common ancestors of revs"""
2440 2447 try:
2441 2448 ancs = self.index.commonancestorsheads(*revs)
2442 2449 except (AttributeError, OverflowError): # C implementation failed
2443 2450 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2444 2451 return ancs
2445 2452
2446 2453 def isancestor(self, a, b):
2447 2454 """return True if node a is an ancestor of node b
2448 2455
2449 2456 A revision is considered an ancestor of itself."""
2450 2457 a, b = self.rev(a), self.rev(b)
2451 2458 return self.isancestorrev(a, b)
2452 2459
2453 2460 def isancestorrev(self, a, b):
2454 2461 """return True if revision a is an ancestor of revision b
2455 2462
2456 2463 A revision is considered an ancestor of itself.
2457 2464
2458 2465 The implementation of this is trivial but the use of
2459 2466 reachableroots is not."""
2460 2467 if a == nullrev:
2461 2468 return True
2462 2469 elif a == b:
2463 2470 return True
2464 2471 elif a > b:
2465 2472 return False
2466 2473 return bool(self.reachableroots(a, [b], [a], includepath=False))
2467 2474
2468 2475 def reachableroots(self, minroot, heads, roots, includepath=False):
2469 2476 """return (heads(::(<roots> and <roots>::<heads>)))
2470 2477
2471 2478 If includepath is True, return (<roots>::<heads>)."""
2472 2479 try:
2473 2480 return self.index.reachableroots2(
2474 2481 minroot, heads, roots, includepath
2475 2482 )
2476 2483 except AttributeError:
2477 2484 return dagop._reachablerootspure(
2478 2485 self.parentrevs, minroot, roots, heads, includepath
2479 2486 )
2480 2487
2481 2488 def ancestor(self, a, b):
2482 2489 """calculate the "best" common ancestor of nodes a and b"""
2483 2490
2484 2491 a, b = self.rev(a), self.rev(b)
2485 2492 try:
2486 2493 ancs = self.index.ancestors(a, b)
2487 2494 except (AttributeError, OverflowError):
2488 2495 ancs = ancestor.ancestors(self.parentrevs, a, b)
2489 2496 if ancs:
2490 2497 # choose a consistent winner when there's a tie
2491 2498 return min(map(self.node, ancs))
2492 2499 return self.nullid
2493 2500
2494 2501 def _match(self, id):
2495 2502 if isinstance(id, int):
2496 2503 # rev
2497 2504 return self.node(id)
2498 2505 if len(id) == self.nodeconstants.nodelen:
2499 2506 # possibly a binary node
2500 2507 # odds of a binary node being all hex in ASCII are 1 in 10**25
2501 2508 try:
2502 2509 node = id
2503 2510 self.rev(node) # quick search the index
2504 2511 return node
2505 2512 except error.LookupError:
2506 2513 pass # may be partial hex id
2507 2514 try:
2508 2515 # str(rev)
2509 2516 rev = int(id)
2510 2517 if b"%d" % rev != id:
2511 2518 raise ValueError
2512 2519 if rev < 0:
2513 2520 rev = len(self) + rev
2514 2521 if rev < 0 or rev >= len(self):
2515 2522 raise ValueError
2516 2523 return self.node(rev)
2517 2524 except (ValueError, OverflowError):
2518 2525 pass
2519 2526 if len(id) == 2 * self.nodeconstants.nodelen:
2520 2527 try:
2521 2528 # a full hex nodeid?
2522 2529 node = bin(id)
2523 2530 self.rev(node)
2524 2531 return node
2525 2532 except (binascii.Error, error.LookupError):
2526 2533 pass
2527 2534
2528 2535 def _partialmatch(self, id):
2529 2536 # we don't care wdirfilenodeids as they should be always full hash
2530 2537 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2531 2538 ambiguous = False
2532 2539 try:
2533 2540 partial = self.index.partialmatch(id)
2534 2541 if partial and self.hasnode(partial):
2535 2542 if maybewdir:
2536 2543 # single 'ff...' match in radix tree, ambiguous with wdir
2537 2544 ambiguous = True
2538 2545 else:
2539 2546 return partial
2540 2547 elif maybewdir:
2541 2548 # no 'ff...' match in radix tree, wdir identified
2542 2549 raise error.WdirUnsupported
2543 2550 else:
2544 2551 return None
2545 2552 except error.RevlogError:
2546 2553 # parsers.c radix tree lookup gave multiple matches
2547 2554 # fast path: for unfiltered changelog, radix tree is accurate
2548 2555 if not getattr(self, 'filteredrevs', None):
2549 2556 ambiguous = True
2550 2557 # fall through to slow path that filters hidden revisions
2551 2558 except (AttributeError, ValueError):
2552 2559 # we are pure python, or key is not hex
2553 2560 pass
2554 2561 if ambiguous:
2555 2562 raise error.AmbiguousPrefixLookupError(
2556 2563 id, self.display_id, _(b'ambiguous identifier')
2557 2564 )
2558 2565
2559 2566 if id in self._pcache:
2560 2567 return self._pcache[id]
2561 2568
2562 2569 if len(id) <= 40:
2563 2570 # hex(node)[:...]
2564 2571 l = len(id) // 2 * 2 # grab an even number of digits
2565 2572 try:
2566 2573 # we're dropping the last digit, so let's check that it's hex,
2567 2574 # to avoid the expensive computation below if it's not
2568 2575 if len(id) % 2 > 0:
2569 2576 if not (id[-1] in hexdigits):
2570 2577 return None
2571 2578 prefix = bin(id[:l])
2572 2579 except binascii.Error:
2573 2580 pass
2574 2581 else:
2575 2582 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2576 2583 nl = [
2577 2584 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2578 2585 ]
2579 2586 if self.nodeconstants.nullhex.startswith(id):
2580 2587 nl.append(self.nullid)
2581 2588 if len(nl) > 0:
2582 2589 if len(nl) == 1 and not maybewdir:
2583 2590 self._pcache[id] = nl[0]
2584 2591 return nl[0]
2585 2592 raise error.AmbiguousPrefixLookupError(
2586 2593 id, self.display_id, _(b'ambiguous identifier')
2587 2594 )
2588 2595 if maybewdir:
2589 2596 raise error.WdirUnsupported
2590 2597 return None
2591 2598
2592 2599 def lookup(self, id):
2593 2600 """locate a node based on:
2594 2601 - revision number or str(revision number)
2595 2602 - nodeid or subset of hex nodeid
2596 2603 """
2597 2604 n = self._match(id)
2598 2605 if n is not None:
2599 2606 return n
2600 2607 n = self._partialmatch(id)
2601 2608 if n:
2602 2609 return n
2603 2610
2604 2611 raise error.LookupError(id, self.display_id, _(b'no match found'))
2605 2612
2606 2613 def shortest(self, node, minlength=1):
2607 2614 """Find the shortest unambiguous prefix that matches node."""
2608 2615
2609 2616 def isvalid(prefix):
2610 2617 try:
2611 2618 matchednode = self._partialmatch(prefix)
2612 2619 except error.AmbiguousPrefixLookupError:
2613 2620 return False
2614 2621 except error.WdirUnsupported:
2615 2622 # single 'ff...' match
2616 2623 return True
2617 2624 if matchednode is None:
2618 2625 raise error.LookupError(node, self.display_id, _(b'no node'))
2619 2626 return True
2620 2627
2621 2628 def maybewdir(prefix):
2622 2629 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2623 2630
2624 2631 hexnode = hex(node)
2625 2632
2626 2633 def disambiguate(hexnode, minlength):
2627 2634 """Disambiguate against wdirid."""
2628 2635 for length in range(minlength, len(hexnode) + 1):
2629 2636 prefix = hexnode[:length]
2630 2637 if not maybewdir(prefix):
2631 2638 return prefix
2632 2639
2633 2640 if not getattr(self, 'filteredrevs', None):
2634 2641 try:
2635 2642 length = max(self.index.shortest(node), minlength)
2636 2643 return disambiguate(hexnode, length)
2637 2644 except error.RevlogError:
2638 2645 if node != self.nodeconstants.wdirid:
2639 2646 raise error.LookupError(
2640 2647 node, self.display_id, _(b'no node')
2641 2648 )
2642 2649 except AttributeError:
2643 2650 # Fall through to pure code
2644 2651 pass
2645 2652
2646 2653 if node == self.nodeconstants.wdirid:
2647 2654 for length in range(minlength, len(hexnode) + 1):
2648 2655 prefix = hexnode[:length]
2649 2656 if isvalid(prefix):
2650 2657 return prefix
2651 2658
2652 2659 for length in range(minlength, len(hexnode) + 1):
2653 2660 prefix = hexnode[:length]
2654 2661 if isvalid(prefix):
2655 2662 return disambiguate(hexnode, length)
2656 2663
2657 2664 def cmp(self, node, text):
2658 2665 """compare text with a given file revision
2659 2666
2660 2667 returns True if text is different than what is stored.
2661 2668 """
2662 2669 p1, p2 = self.parents(node)
2663 2670 return storageutil.hashrevisionsha1(text, p1, p2) != node
2664 2671
2665 2672 def deltaparent(self, rev):
2666 2673 """return deltaparent of the given revision"""
2667 2674 base = self.index[rev][3]
2668 2675 if base == rev:
2669 2676 return nullrev
2670 2677 elif self.delta_config.general_delta:
2671 2678 return base
2672 2679 else:
2673 2680 return rev - 1
2674 2681
2675 2682 def issnapshot(self, rev):
2676 2683 """tells whether rev is a snapshot"""
2677 2684 ret = self._inner.issnapshot(rev)
2678 2685 self.issnapshot = self._inner.issnapshot
2679 2686 return ret
2680 2687
2681 2688 def snapshotdepth(self, rev):
2682 2689 """number of snapshot in the chain before this one"""
2683 2690 if not self.issnapshot(rev):
2684 2691 raise error.ProgrammingError(b'revision %d not a snapshot')
2685 2692 return len(self._inner._deltachain(rev)[0]) - 1
2686 2693
2687 2694 def revdiff(self, rev1, rev2):
2688 2695 """return or calculate a delta between two revisions
2689 2696
2690 2697 The delta calculated is in binary form and is intended to be written to
2691 2698 revlog data directly. So this function needs raw revision data.
2692 2699 """
2693 2700 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2694 2701 return bytes(self._inner._chunk(rev2))
2695 2702
2696 2703 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2697 2704
2698 2705 def revision(self, nodeorrev):
2699 2706 """return an uncompressed revision of a given node or revision
2700 2707 number.
2701 2708 """
2702 2709 return self._revisiondata(nodeorrev)
2703 2710
2704 2711 def sidedata(self, nodeorrev):
2705 2712 """a map of extra data related to the changeset but not part of the hash
2706 2713
2707 2714 This function currently return a dictionary. However, more advanced
2708 2715 mapping object will likely be used in the future for a more
2709 2716 efficient/lazy code.
2710 2717 """
2711 2718 # deal with <nodeorrev> argument type
2712 2719 if isinstance(nodeorrev, int):
2713 2720 rev = nodeorrev
2714 2721 else:
2715 2722 rev = self.rev(nodeorrev)
2716 2723 return self._sidedata(rev)
2717 2724
2718 2725 def _rawtext(self, node, rev):
2719 2726 """return the possibly unvalidated rawtext for a revision
2720 2727
2721 2728 returns (rev, rawtext, validated)
2722 2729 """
2723 2730 # Check if we have the entry in cache
2724 2731 # The cache entry looks like (node, rev, rawtext)
2725 2732 if self._inner._revisioncache:
2726 2733 if self._inner._revisioncache[0] == node:
2727 2734 return (rev, self._inner._revisioncache[2], True)
2728 2735
2729 2736 if rev is None:
2730 2737 rev = self.rev(node)
2731 2738
2732 2739 return self._inner.raw_text(node, rev)
2733 2740
2734 2741 def _revisiondata(self, nodeorrev, raw=False):
2735 2742 # deal with <nodeorrev> argument type
2736 2743 if isinstance(nodeorrev, int):
2737 2744 rev = nodeorrev
2738 2745 node = self.node(rev)
2739 2746 else:
2740 2747 node = nodeorrev
2741 2748 rev = None
2742 2749
2743 2750 # fast path the special `nullid` rev
2744 2751 if node == self.nullid:
2745 2752 return b""
2746 2753
2747 2754 # ``rawtext`` is the text as stored inside the revlog. Might be the
2748 2755 # revision or might need to be processed to retrieve the revision.
2749 2756 rev, rawtext, validated = self._rawtext(node, rev)
2750 2757
2751 2758 if raw and validated:
2752 2759 # if we don't want to process the raw text and that raw
2753 2760 # text is cached, we can exit early.
2754 2761 return rawtext
2755 2762 if rev is None:
2756 2763 rev = self.rev(node)
2757 2764 # the revlog's flag for this revision
2758 2765 # (usually alter its state or content)
2759 2766 flags = self.flags(rev)
2760 2767
2761 2768 if validated and flags == REVIDX_DEFAULT_FLAGS:
2762 2769 # no extra flags set, no flag processor runs, text = rawtext
2763 2770 return rawtext
2764 2771
2765 2772 if raw:
2766 2773 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2767 2774 text = rawtext
2768 2775 else:
2769 2776 r = flagutil.processflagsread(self, rawtext, flags)
2770 2777 text, validatehash = r
2771 2778 if validatehash:
2772 2779 self.checkhash(text, node, rev=rev)
2773 2780 if not validated:
2774 2781 self._inner._revisioncache = (node, rev, rawtext)
2775 2782
2776 2783 return text
2777 2784
2778 2785 def _sidedata(self, rev):
2779 2786 """Return the sidedata for a given revision number."""
2780 2787 sidedata_end = None
2781 2788 if self._docket is not None:
2782 2789 sidedata_end = self._docket.sidedata_end
2783 2790 return self._inner.sidedata(rev, sidedata_end)
2784 2791
2785 2792 def rawdata(self, nodeorrev):
2786 2793 """return an uncompressed raw data of a given node or revision number."""
2787 2794 return self._revisiondata(nodeorrev, raw=True)
2788 2795
2789 2796 def hash(self, text, p1, p2):
2790 2797 """Compute a node hash.
2791 2798
2792 2799 Available as a function so that subclasses can replace the hash
2793 2800 as needed.
2794 2801 """
2795 2802 return storageutil.hashrevisionsha1(text, p1, p2)
2796 2803
2797 2804 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2798 2805 """Check node hash integrity.
2799 2806
2800 2807 Available as a function so that subclasses can extend hash mismatch
2801 2808 behaviors as needed.
2802 2809 """
2803 2810 try:
2804 2811 if p1 is None and p2 is None:
2805 2812 p1, p2 = self.parents(node)
2806 2813 if node != self.hash(text, p1, p2):
2807 2814 # Clear the revision cache on hash failure. The revision cache
2808 2815 # only stores the raw revision and clearing the cache does have
2809 2816 # the side-effect that we won't have a cache hit when the raw
2810 2817 # revision data is accessed. But this case should be rare and
2811 2818 # it is extra work to teach the cache about the hash
2812 2819 # verification state.
2813 2820 if (
2814 2821 self._inner._revisioncache
2815 2822 and self._inner._revisioncache[0] == node
2816 2823 ):
2817 2824 self._inner._revisioncache = None
2818 2825
2819 2826 revornode = rev
2820 2827 if revornode is None:
2821 2828 revornode = templatefilters.short(hex(node))
2822 2829 raise error.RevlogError(
2823 2830 _(b"integrity check failed on %s:%s")
2824 2831 % (self.display_id, pycompat.bytestr(revornode))
2825 2832 )
2826 2833 except error.RevlogError:
2827 2834 if self.feature_config.censorable and storageutil.iscensoredtext(
2828 2835 text
2829 2836 ):
2830 2837 raise error.CensoredNodeError(self.display_id, node, text)
2831 2838 raise
2832 2839
2833 2840 @property
2834 2841 def _split_index_file(self):
2835 2842 """the path where to expect the index of an ongoing splitting operation
2836 2843
2837 2844 The file will only exist if a splitting operation is in progress, but
2838 2845 it is always expected at the same location."""
2839 2846 parts = self.radix.split(b'/')
2840 2847 if len(parts) > 1:
2841 2848 # adds a '-s' prefix to the ``data/` or `meta/` base
2842 2849 head = parts[0] + b'-s'
2843 2850 mids = parts[1:-1]
2844 2851 tail = parts[-1] + b'.i'
2845 2852 pieces = [head] + mids + [tail]
2846 2853 return b'/'.join(pieces)
2847 2854 else:
2848 2855 # the revlog is stored at the root of the store (changelog or
2849 2856 # manifest), no risk of collision.
2850 2857 return self.radix + b'.i.s'
2851 2858
2852 2859 def _enforceinlinesize(self, tr):
2853 2860 """Check if the revlog is too big for inline and convert if so.
2854 2861
2855 2862 This should be called after revisions are added to the revlog. If the
2856 2863 revlog has grown too large to be an inline revlog, it will convert it
2857 2864 to use multiple index and data files.
2858 2865 """
2859 2866 tiprev = len(self) - 1
2860 2867 total_size = self.start(tiprev) + self.length(tiprev)
2861 2868 if not self._inline or (self._may_inline and total_size < _maxinline):
2862 2869 return
2863 2870
2864 2871 if self._docket is not None:
2865 2872 msg = b"inline revlog should not have a docket"
2866 2873 raise error.ProgrammingError(msg)
2867 2874
2868 2875 # In the common case, we enforce inline size because the revlog has
2869 2876 # been appened too. And in such case, it must have an initial offset
2870 2877 # recorded in the transaction.
2871 2878 troffset = tr.findoffset(self._inner.canonical_index_file)
2872 2879 pre_touched = troffset is not None
2873 2880 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2874 2881 raise error.RevlogError(
2875 2882 _(b"%s not found in the transaction") % self._indexfile
2876 2883 )
2877 2884
2878 2885 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2879 2886 tr.add(self._datafile, 0)
2880 2887
2881 2888 new_index_file_path = None
2882 2889 old_index_file_path = self._indexfile
2883 2890 new_index_file_path = self._split_index_file
2884 2891 opener = self.opener
2885 2892 weak_self = weakref.ref(self)
2886 2893
2887 2894 # the "split" index replace the real index when the transaction is
2888 2895 # finalized
2889 2896 def finalize_callback(tr):
2890 2897 opener.rename(
2891 2898 new_index_file_path,
2892 2899 old_index_file_path,
2893 2900 checkambig=True,
2894 2901 )
2895 2902 maybe_self = weak_self()
2896 2903 if maybe_self is not None:
2897 2904 maybe_self._indexfile = old_index_file_path
2898 2905 maybe_self._inner.index_file = maybe_self._indexfile
2899 2906
2900 2907 def abort_callback(tr):
2901 2908 maybe_self = weak_self()
2902 2909 if maybe_self is not None:
2903 2910 maybe_self._indexfile = old_index_file_path
2904 2911 maybe_self._inner.inline = True
2905 2912 maybe_self._inner.index_file = old_index_file_path
2906 2913
2907 2914 tr.registertmp(new_index_file_path)
2908 2915 # we use 001 here to make this this happens after the finalisation of
2909 2916 # pending changelog write (using 000). Otherwise the two finalizer
2910 2917 # would step over each other and delete the changelog.i file.
2911 2918 if self.target[1] is not None:
2912 2919 callback_id = b'001-revlog-split-%d-%s' % self.target
2913 2920 else:
2914 2921 callback_id = b'001-revlog-split-%d' % self.target[0]
2915 2922 tr.addfinalize(callback_id, finalize_callback)
2916 2923 tr.addabort(callback_id, abort_callback)
2917 2924
2918 2925 self._format_flags &= ~FLAG_INLINE_DATA
2919 2926 self._inner.split_inline(
2920 2927 tr,
2921 2928 self._format_flags | self._format_version,
2922 2929 new_index_file_path=new_index_file_path,
2923 2930 )
2924 2931
2925 2932 self._inline = False
2926 2933 if new_index_file_path is not None:
2927 2934 self._indexfile = new_index_file_path
2928 2935
2929 2936 nodemaputil.setup_persistent_nodemap(tr, self)
2930 2937
2931 2938 def _nodeduplicatecallback(self, transaction, node):
2932 2939 """called when trying to add a node already stored."""
2933 2940
2934 2941 @contextlib.contextmanager
2935 2942 def reading(self):
2936 2943 with self._inner.reading():
2937 2944 yield
2938 2945
2939 2946 @contextlib.contextmanager
2940 2947 def _writing(self, transaction):
2941 2948 if self._trypending:
2942 2949 msg = b'try to write in a `trypending` revlog: %s'
2943 2950 msg %= self.display_id
2944 2951 raise error.ProgrammingError(msg)
2945 2952 if self._inner.is_writing:
2946 2953 yield
2947 2954 else:
2948 2955 data_end = None
2949 2956 sidedata_end = None
2950 2957 if self._docket is not None:
2951 2958 data_end = self._docket.data_end
2952 2959 sidedata_end = self._docket.sidedata_end
2953 2960 with self._inner.writing(
2954 2961 transaction,
2955 2962 data_end=data_end,
2956 2963 sidedata_end=sidedata_end,
2957 2964 ):
2958 2965 yield
2959 2966 if self._docket is not None:
2960 2967 self._write_docket(transaction)
2961 2968
2962 2969 @property
2963 2970 def is_delaying(self):
2964 2971 return self._inner.is_delaying
2965 2972
2966 2973 def _write_docket(self, transaction):
2967 2974 """write the current docket on disk
2968 2975
2969 2976 Exist as a method to help changelog to implement transaction logic
2970 2977
2971 2978 We could also imagine using the same transaction logic for all revlog
2972 2979 since docket are cheap."""
2973 2980 self._docket.write(transaction)
2974 2981
2975 2982 def addrevision(
2976 2983 self,
2977 2984 text,
2978 2985 transaction,
2979 2986 link,
2980 2987 p1,
2981 2988 p2,
2982 2989 cachedelta=None,
2983 2990 node=None,
2984 2991 flags=REVIDX_DEFAULT_FLAGS,
2985 2992 deltacomputer=None,
2986 2993 sidedata=None,
2987 2994 ):
2988 2995 """add a revision to the log
2989 2996
2990 2997 text - the revision data to add
2991 2998 transaction - the transaction object used for rollback
2992 2999 link - the linkrev data to add
2993 3000 p1, p2 - the parent nodeids of the revision
2994 3001 cachedelta - an optional precomputed delta
2995 3002 node - nodeid of revision; typically node is not specified, and it is
2996 3003 computed by default as hash(text, p1, p2), however subclasses might
2997 3004 use different hashing method (and override checkhash() in such case)
2998 3005 flags - the known flags to set on the revision
2999 3006 deltacomputer - an optional deltacomputer instance shared between
3000 3007 multiple calls
3001 3008 """
3002 3009 if link == nullrev:
3003 3010 raise error.RevlogError(
3004 3011 _(b"attempted to add linkrev -1 to %s") % self.display_id
3005 3012 )
3006 3013
3007 3014 if sidedata is None:
3008 3015 sidedata = {}
3009 3016 elif sidedata and not self.feature_config.has_side_data:
3010 3017 raise error.ProgrammingError(
3011 3018 _(b"trying to add sidedata to a revlog who don't support them")
3012 3019 )
3013 3020
3014 3021 if flags:
3015 3022 node = node or self.hash(text, p1, p2)
3016 3023
3017 3024 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3018 3025
3019 3026 # If the flag processor modifies the revision data, ignore any provided
3020 3027 # cachedelta.
3021 3028 if rawtext != text:
3022 3029 cachedelta = None
3023 3030
3024 3031 if len(rawtext) > _maxentrysize:
3025 3032 raise error.RevlogError(
3026 3033 _(
3027 3034 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3028 3035 )
3029 3036 % (self.display_id, len(rawtext))
3030 3037 )
3031 3038
3032 3039 node = node or self.hash(rawtext, p1, p2)
3033 3040 rev = self.index.get_rev(node)
3034 3041 if rev is not None:
3035 3042 return rev
3036 3043
3037 3044 if validatehash:
3038 3045 self.checkhash(rawtext, node, p1=p1, p2=p2)
3039 3046
3040 3047 return self.addrawrevision(
3041 3048 rawtext,
3042 3049 transaction,
3043 3050 link,
3044 3051 p1,
3045 3052 p2,
3046 3053 node,
3047 3054 flags,
3048 3055 cachedelta=cachedelta,
3049 3056 deltacomputer=deltacomputer,
3050 3057 sidedata=sidedata,
3051 3058 )
3052 3059
3053 3060 def addrawrevision(
3054 3061 self,
3055 3062 rawtext,
3056 3063 transaction,
3057 3064 link,
3058 3065 p1,
3059 3066 p2,
3060 3067 node,
3061 3068 flags,
3062 3069 cachedelta=None,
3063 3070 deltacomputer=None,
3064 3071 sidedata=None,
3065 3072 ):
3066 3073 """add a raw revision with known flags, node and parents
3067 3074 useful when reusing a revision not stored in this revlog (ex: received
3068 3075 over wire, or read from an external bundle).
3069 3076 """
3070 3077 with self._writing(transaction):
3071 3078 return self._addrevision(
3072 3079 node,
3073 3080 rawtext,
3074 3081 transaction,
3075 3082 link,
3076 3083 p1,
3077 3084 p2,
3078 3085 flags,
3079 3086 cachedelta,
3080 3087 deltacomputer=deltacomputer,
3081 3088 sidedata=sidedata,
3082 3089 )
3083 3090
3084 def compress(self, data):
3091 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3085 3092 return self._inner.compress(data)
3086 3093
3087 3094 def decompress(self, data):
3088 3095 return self._inner.decompress(data)
3089 3096
3090 3097 def _addrevision(
3091 3098 self,
3092 3099 node,
3093 3100 rawtext,
3094 3101 transaction,
3095 3102 link,
3096 3103 p1,
3097 3104 p2,
3098 3105 flags,
3099 3106 cachedelta,
3100 3107 alwayscache=False,
3101 3108 deltacomputer=None,
3102 3109 sidedata=None,
3103 3110 ):
3104 3111 """internal function to add revisions to the log
3105 3112
3106 3113 see addrevision for argument descriptions.
3107 3114
3108 3115 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3109 3116
3110 3117 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3111 3118 be used.
3112 3119
3113 3120 invariants:
3114 3121 - rawtext is optional (can be None); if not set, cachedelta must be set.
3115 3122 if both are set, they must correspond to each other.
3116 3123 """
3117 3124 if node == self.nullid:
3118 3125 raise error.RevlogError(
3119 3126 _(b"%s: attempt to add null revision") % self.display_id
3120 3127 )
3121 3128 if (
3122 3129 node == self.nodeconstants.wdirid
3123 3130 or node in self.nodeconstants.wdirfilenodeids
3124 3131 ):
3125 3132 raise error.RevlogError(
3126 3133 _(b"%s: attempt to add wdir revision") % self.display_id
3127 3134 )
3128 3135 if self._inner._writinghandles is None:
3129 3136 msg = b'adding revision outside `revlog._writing` context'
3130 3137 raise error.ProgrammingError(msg)
3131 3138
3132 3139 btext = [rawtext]
3133 3140
3134 3141 curr = len(self)
3135 3142 prev = curr - 1
3136 3143
3137 3144 offset = self._get_data_offset(prev)
3138 3145
3139 3146 if self._concurrencychecker:
3140 3147 ifh, dfh, sdfh = self._inner._writinghandles
3141 3148 # XXX no checking for the sidedata file
3142 3149 if self._inline:
3143 3150 # offset is "as if" it were in the .d file, so we need to add on
3144 3151 # the size of the entry metadata.
3145 3152 self._concurrencychecker(
3146 3153 ifh, self._indexfile, offset + curr * self.index.entry_size
3147 3154 )
3148 3155 else:
3149 3156 # Entries in the .i are a consistent size.
3150 3157 self._concurrencychecker(
3151 3158 ifh, self._indexfile, curr * self.index.entry_size
3152 3159 )
3153 3160 self._concurrencychecker(dfh, self._datafile, offset)
3154 3161
3155 3162 p1r, p2r = self.rev(p1), self.rev(p2)
3156 3163
3157 3164 # full versions are inserted when the needed deltas
3158 3165 # become comparable to the uncompressed text
3159 3166 if rawtext is None:
3160 3167 # need rawtext size, before changed by flag processors, which is
3161 3168 # the non-raw size. use revlog explicitly to avoid filelog's extra
3162 3169 # logic that might remove metadata size.
3163 3170 textlen = mdiff.patchedsize(
3164 3171 revlog.size(self, cachedelta[0]), cachedelta[1]
3165 3172 )
3166 3173 else:
3167 3174 textlen = len(rawtext)
3168 3175
3169 3176 if deltacomputer is None:
3170 3177 write_debug = None
3171 3178 if self.delta_config.debug_delta:
3172 3179 write_debug = transaction._report
3173 3180 deltacomputer = deltautil.deltacomputer(
3174 3181 self, write_debug=write_debug
3175 3182 )
3176 3183
3177 3184 if cachedelta is not None and len(cachedelta) == 2:
3178 3185 # If the cached delta has no information about how it should be
3179 3186 # reused, add the default reuse instruction according to the
3180 3187 # revlog's configuration.
3181 3188 if (
3182 3189 self.delta_config.general_delta
3183 3190 and self.delta_config.lazy_delta_base
3184 3191 ):
3185 3192 delta_base_reuse = DELTA_BASE_REUSE_TRY
3186 3193 else:
3187 3194 delta_base_reuse = DELTA_BASE_REUSE_NO
3188 3195 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3189 3196
3190 3197 revinfo = revlogutils.revisioninfo(
3191 3198 node,
3192 3199 p1,
3193 3200 p2,
3194 3201 btext,
3195 3202 textlen,
3196 3203 cachedelta,
3197 3204 flags,
3198 3205 )
3199 3206
3200 3207 deltainfo = deltacomputer.finddeltainfo(revinfo)
3201 3208
3202 3209 compression_mode = COMP_MODE_INLINE
3203 3210 if self._docket is not None:
3204 3211 default_comp = self._docket.default_compression_header
3205 3212 r = deltautil.delta_compression(default_comp, deltainfo)
3206 3213 compression_mode, deltainfo = r
3207 3214
3208 3215 sidedata_compression_mode = COMP_MODE_INLINE
3209 3216 if sidedata and self.feature_config.has_side_data:
3210 3217 sidedata_compression_mode = COMP_MODE_PLAIN
3211 3218 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3212 3219 sidedata_offset = self._docket.sidedata_end
3213 3220 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3214 3221 if (
3215 3222 h != b'u'
3216 3223 and comp_sidedata[0:1] != b'\0'
3217 3224 and len(comp_sidedata) < len(serialized_sidedata)
3218 3225 ):
3219 3226 assert not h
3220 3227 if (
3221 3228 comp_sidedata[0:1]
3222 3229 == self._docket.default_compression_header
3223 3230 ):
3224 3231 sidedata_compression_mode = COMP_MODE_DEFAULT
3225 3232 serialized_sidedata = comp_sidedata
3226 3233 else:
3227 3234 sidedata_compression_mode = COMP_MODE_INLINE
3228 3235 serialized_sidedata = comp_sidedata
3229 3236 else:
3230 3237 serialized_sidedata = b""
3231 3238 # Don't store the offset if the sidedata is empty, that way
3232 3239 # we can easily detect empty sidedata and they will be no different
3233 3240 # than ones we manually add.
3234 3241 sidedata_offset = 0
3235 3242
3236 3243 rank = RANK_UNKNOWN
3237 3244 if self.feature_config.compute_rank:
3238 3245 if (p1r, p2r) == (nullrev, nullrev):
3239 3246 rank = 1
3240 3247 elif p1r != nullrev and p2r == nullrev:
3241 3248 rank = 1 + self.fast_rank(p1r)
3242 3249 elif p1r == nullrev and p2r != nullrev:
3243 3250 rank = 1 + self.fast_rank(p2r)
3244 3251 else: # merge node
3245 3252 if rustdagop is not None and self.index.rust_ext_compat:
3246 3253 rank = rustdagop.rank(self.index, p1r, p2r)
3247 3254 else:
3248 3255 pmin, pmax = sorted((p1r, p2r))
3249 3256 rank = 1 + self.fast_rank(pmax)
3250 3257 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3251 3258
3252 3259 e = revlogutils.entry(
3253 3260 flags=flags,
3254 3261 data_offset=offset,
3255 3262 data_compressed_length=deltainfo.deltalen,
3256 3263 data_uncompressed_length=textlen,
3257 3264 data_compression_mode=compression_mode,
3258 3265 data_delta_base=deltainfo.base,
3259 3266 link_rev=link,
3260 3267 parent_rev_1=p1r,
3261 3268 parent_rev_2=p2r,
3262 3269 node_id=node,
3263 3270 sidedata_offset=sidedata_offset,
3264 3271 sidedata_compressed_length=len(serialized_sidedata),
3265 3272 sidedata_compression_mode=sidedata_compression_mode,
3266 3273 rank=rank,
3267 3274 )
3268 3275
3269 3276 self.index.append(e)
3270 3277 entry = self.index.entry_binary(curr)
3271 3278 if curr == 0 and self._docket is None:
3272 3279 header = self._format_flags | self._format_version
3273 3280 header = self.index.pack_header(header)
3274 3281 entry = header + entry
3275 3282 self._writeentry(
3276 3283 transaction,
3277 3284 entry,
3278 3285 deltainfo.data,
3279 3286 link,
3280 3287 offset,
3281 3288 serialized_sidedata,
3282 3289 sidedata_offset,
3283 3290 )
3284 3291
3285 3292 rawtext = btext[0]
3286 3293
3287 3294 if alwayscache and rawtext is None:
3288 3295 rawtext = deltacomputer.buildtext(revinfo)
3289 3296
3290 3297 if type(rawtext) == bytes: # only accept immutable objects
3291 3298 self._inner._revisioncache = (node, curr, rawtext)
3292 3299 self._chainbasecache[curr] = deltainfo.chainbase
3293 3300 return curr
3294 3301
3295 3302 def _get_data_offset(self, prev):
3296 3303 """Returns the current offset in the (in-transaction) data file.
3297 3304 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3298 3305 file to store that information: since sidedata can be rewritten to the
3299 3306 end of the data file within a transaction, you can have cases where, for
3300 3307 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3301 3308 to `n - 1`'s sidedata being written after `n`'s data.
3302 3309
3303 3310 TODO cache this in a docket file before getting out of experimental."""
3304 3311 if self._docket is None:
3305 3312 return self.end(prev)
3306 3313 else:
3307 3314 return self._docket.data_end
3308 3315
3309 3316 def _writeentry(
3310 3317 self,
3311 3318 transaction,
3312 3319 entry,
3313 3320 data,
3314 3321 link,
3315 3322 offset,
3316 3323 sidedata,
3317 3324 sidedata_offset,
3318 3325 ):
3319 3326 # Files opened in a+ mode have inconsistent behavior on various
3320 3327 # platforms. Windows requires that a file positioning call be made
3321 3328 # when the file handle transitions between reads and writes. See
3322 3329 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3323 3330 # platforms, Python or the platform itself can be buggy. Some versions
3324 3331 # of Solaris have been observed to not append at the end of the file
3325 3332 # if the file was seeked to before the end. See issue4943 for more.
3326 3333 #
3327 3334 # We work around this issue by inserting a seek() before writing.
3328 3335 # Note: This is likely not necessary on Python 3. However, because
3329 3336 # the file handle is reused for reads and may be seeked there, we need
3330 3337 # to be careful before changing this.
3331 3338 index_end = data_end = sidedata_end = None
3332 3339 if self._docket is not None:
3333 3340 index_end = self._docket.index_end
3334 3341 data_end = self._docket.data_end
3335 3342 sidedata_end = self._docket.sidedata_end
3336 3343
3337 3344 files_end = self._inner.write_entry(
3338 3345 transaction,
3339 3346 entry,
3340 3347 data,
3341 3348 link,
3342 3349 offset,
3343 3350 sidedata,
3344 3351 sidedata_offset,
3345 3352 index_end,
3346 3353 data_end,
3347 3354 sidedata_end,
3348 3355 )
3349 3356 self._enforceinlinesize(transaction)
3350 3357 if self._docket is not None:
3351 3358 self._docket.index_end = files_end[0]
3352 3359 self._docket.data_end = files_end[1]
3353 3360 self._docket.sidedata_end = files_end[2]
3354 3361
3355 3362 nodemaputil.setup_persistent_nodemap(transaction, self)
3356 3363
3357 3364 def addgroup(
3358 3365 self,
3359 3366 deltas,
3360 3367 linkmapper,
3361 3368 transaction,
3362 3369 alwayscache=False,
3363 3370 addrevisioncb=None,
3364 3371 duplicaterevisioncb=None,
3365 3372 debug_info=None,
3366 3373 delta_base_reuse_policy=None,
3367 3374 ):
3368 3375 """
3369 3376 add a delta group
3370 3377
3371 3378 given a set of deltas, add them to the revision log. the
3372 3379 first delta is against its parent, which should be in our
3373 3380 log, the rest are against the previous delta.
3374 3381
3375 3382 If ``addrevisioncb`` is defined, it will be called with arguments of
3376 3383 this revlog and the node that was added.
3377 3384 """
3378 3385
3379 3386 if self._adding_group:
3380 3387 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3381 3388
3382 3389 # read the default delta-base reuse policy from revlog config if the
3383 3390 # group did not specify one.
3384 3391 if delta_base_reuse_policy is None:
3385 3392 if (
3386 3393 self.delta_config.general_delta
3387 3394 and self.delta_config.lazy_delta_base
3388 3395 ):
3389 3396 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3390 3397 else:
3391 3398 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3392 3399
3393 3400 self._adding_group = True
3394 3401 empty = True
3395 3402 try:
3396 3403 with self._writing(transaction):
3397 3404 write_debug = None
3398 3405 if self.delta_config.debug_delta:
3399 3406 write_debug = transaction._report
3400 3407 deltacomputer = deltautil.deltacomputer(
3401 3408 self,
3402 3409 write_debug=write_debug,
3403 3410 debug_info=debug_info,
3404 3411 )
3405 3412 # loop through our set of deltas
3406 3413 for data in deltas:
3407 3414 (
3408 3415 node,
3409 3416 p1,
3410 3417 p2,
3411 3418 linknode,
3412 3419 deltabase,
3413 3420 delta,
3414 3421 flags,
3415 3422 sidedata,
3416 3423 ) = data
3417 3424 link = linkmapper(linknode)
3418 3425 flags = flags or REVIDX_DEFAULT_FLAGS
3419 3426
3420 3427 rev = self.index.get_rev(node)
3421 3428 if rev is not None:
3422 3429 # this can happen if two branches make the same change
3423 3430 self._nodeduplicatecallback(transaction, rev)
3424 3431 if duplicaterevisioncb:
3425 3432 duplicaterevisioncb(self, rev)
3426 3433 empty = False
3427 3434 continue
3428 3435
3429 3436 for p in (p1, p2):
3430 3437 if not self.index.has_node(p):
3431 3438 raise error.LookupError(
3432 3439 p, self.radix, _(b'unknown parent')
3433 3440 )
3434 3441
3435 3442 if not self.index.has_node(deltabase):
3436 3443 raise error.LookupError(
3437 3444 deltabase, self.display_id, _(b'unknown delta base')
3438 3445 )
3439 3446
3440 3447 baserev = self.rev(deltabase)
3441 3448
3442 3449 if baserev != nullrev and self.iscensored(baserev):
3443 3450 # if base is censored, delta must be full replacement in a
3444 3451 # single patch operation
3445 3452 hlen = struct.calcsize(b">lll")
3446 3453 oldlen = self.rawsize(baserev)
3447 3454 newlen = len(delta) - hlen
3448 3455 if delta[:hlen] != mdiff.replacediffheader(
3449 3456 oldlen, newlen
3450 3457 ):
3451 3458 raise error.CensoredBaseError(
3452 3459 self.display_id, self.node(baserev)
3453 3460 )
3454 3461
3455 3462 if not flags and self._peek_iscensored(baserev, delta):
3456 3463 flags |= REVIDX_ISCENSORED
3457 3464
3458 3465 # We assume consumers of addrevisioncb will want to retrieve
3459 3466 # the added revision, which will require a call to
3460 3467 # revision(). revision() will fast path if there is a cache
3461 3468 # hit. So, we tell _addrevision() to always cache in this case.
3462 3469 # We're only using addgroup() in the context of changegroup
3463 3470 # generation so the revision data can always be handled as raw
3464 3471 # by the flagprocessor.
3465 3472 rev = self._addrevision(
3466 3473 node,
3467 3474 None,
3468 3475 transaction,
3469 3476 link,
3470 3477 p1,
3471 3478 p2,
3472 3479 flags,
3473 3480 (baserev, delta, delta_base_reuse_policy),
3474 3481 alwayscache=alwayscache,
3475 3482 deltacomputer=deltacomputer,
3476 3483 sidedata=sidedata,
3477 3484 )
3478 3485
3479 3486 if addrevisioncb:
3480 3487 addrevisioncb(self, rev)
3481 3488 empty = False
3482 3489 finally:
3483 3490 self._adding_group = False
3484 3491 return not empty
3485 3492
3486 3493 def iscensored(self, rev):
3487 3494 """Check if a file revision is censored."""
3488 3495 if not self.feature_config.censorable:
3489 3496 return False
3490 3497
3491 3498 return self.flags(rev) & REVIDX_ISCENSORED
3492 3499
3493 3500 def _peek_iscensored(self, baserev, delta):
3494 3501 """Quickly check if a delta produces a censored revision."""
3495 3502 if not self.feature_config.censorable:
3496 3503 return False
3497 3504
3498 3505 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3499 3506
3500 3507 def getstrippoint(self, minlink):
3501 3508 """find the minimum rev that must be stripped to strip the linkrev
3502 3509
3503 3510 Returns a tuple containing the minimum rev and a set of all revs that
3504 3511 have linkrevs that will be broken by this strip.
3505 3512 """
3506 3513 return storageutil.resolvestripinfo(
3507 3514 minlink,
3508 3515 len(self) - 1,
3509 3516 self.headrevs(),
3510 3517 self.linkrev,
3511 3518 self.parentrevs,
3512 3519 )
3513 3520
3514 3521 def strip(self, minlink, transaction):
3515 3522 """truncate the revlog on the first revision with a linkrev >= minlink
3516 3523
3517 3524 This function is called when we're stripping revision minlink and
3518 3525 its descendants from the repository.
3519 3526
3520 3527 We have to remove all revisions with linkrev >= minlink, because
3521 3528 the equivalent changelog revisions will be renumbered after the
3522 3529 strip.
3523 3530
3524 3531 So we truncate the revlog on the first of these revisions, and
3525 3532 trust that the caller has saved the revisions that shouldn't be
3526 3533 removed and that it'll re-add them after this truncation.
3527 3534 """
3528 3535 if len(self) == 0:
3529 3536 return
3530 3537
3531 3538 rev, _ = self.getstrippoint(minlink)
3532 3539 if rev == len(self):
3533 3540 return
3534 3541
3535 3542 # first truncate the files on disk
3536 3543 data_end = self.start(rev)
3537 3544 if not self._inline:
3538 3545 transaction.add(self._datafile, data_end)
3539 3546 end = rev * self.index.entry_size
3540 3547 else:
3541 3548 end = data_end + (rev * self.index.entry_size)
3542 3549
3543 3550 if self._sidedatafile:
3544 3551 sidedata_end = self.sidedata_cut_off(rev)
3545 3552 transaction.add(self._sidedatafile, sidedata_end)
3546 3553
3547 3554 transaction.add(self._indexfile, end)
3548 3555 if self._docket is not None:
3549 3556 # XXX we could, leverage the docket while stripping. However it is
3550 3557 # not powerfull enough at the time of this comment
3551 3558 self._docket.index_end = end
3552 3559 self._docket.data_end = data_end
3553 3560 self._docket.sidedata_end = sidedata_end
3554 3561 self._docket.write(transaction, stripping=True)
3555 3562
3556 3563 # then reset internal state in memory to forget those revisions
3557 3564 self._chaininfocache = util.lrucachedict(500)
3558 3565 self._inner.clear_cache()
3559 3566
3560 3567 del self.index[rev:-1]
3561 3568
3562 3569 def checksize(self):
3563 3570 """Check size of index and data files
3564 3571
3565 3572 return a (dd, di) tuple.
3566 3573 - dd: extra bytes for the "data" file
3567 3574 - di: extra bytes for the "index" file
3568 3575
3569 3576 A healthy revlog will return (0, 0).
3570 3577 """
3571 3578 expected = 0
3572 3579 if len(self):
3573 3580 expected = max(0, self.end(len(self) - 1))
3574 3581
3575 3582 try:
3576 3583 with self._datafp() as f:
3577 3584 f.seek(0, io.SEEK_END)
3578 3585 actual = f.tell()
3579 3586 dd = actual - expected
3580 3587 except FileNotFoundError:
3581 3588 dd = 0
3582 3589
3583 3590 try:
3584 3591 f = self.opener(self._indexfile)
3585 3592 f.seek(0, io.SEEK_END)
3586 3593 actual = f.tell()
3587 3594 f.close()
3588 3595 s = self.index.entry_size
3589 3596 i = max(0, actual // s)
3590 3597 di = actual - (i * s)
3591 3598 if self._inline:
3592 3599 databytes = 0
3593 3600 for r in self:
3594 3601 databytes += max(0, self.length(r))
3595 3602 dd = 0
3596 3603 di = actual - len(self) * s - databytes
3597 3604 except FileNotFoundError:
3598 3605 di = 0
3599 3606
3600 3607 return (dd, di)
3601 3608
3602 3609 def files(self):
3603 3610 """return list of files that compose this revlog"""
3604 3611 res = [self._indexfile]
3605 3612 if self._docket_file is None:
3606 3613 if not self._inline:
3607 3614 res.append(self._datafile)
3608 3615 else:
3609 3616 res.append(self._docket_file)
3610 3617 res.extend(self._docket.old_index_filepaths(include_empty=False))
3611 3618 if self._docket.data_end:
3612 3619 res.append(self._datafile)
3613 3620 res.extend(self._docket.old_data_filepaths(include_empty=False))
3614 3621 if self._docket.sidedata_end:
3615 3622 res.append(self._sidedatafile)
3616 3623 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3617 3624 return res
3618 3625
3619 3626 def emitrevisions(
3620 3627 self,
3621 3628 nodes,
3622 3629 nodesorder=None,
3623 3630 revisiondata=False,
3624 3631 assumehaveparentrevisions=False,
3625 3632 deltamode=repository.CG_DELTAMODE_STD,
3626 3633 sidedata_helpers=None,
3627 3634 debug_info=None,
3628 3635 ):
3629 3636 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3630 3637 raise error.ProgrammingError(
3631 3638 b'unhandled value for nodesorder: %s' % nodesorder
3632 3639 )
3633 3640
3634 3641 if nodesorder is None and not self.delta_config.general_delta:
3635 3642 nodesorder = b'storage'
3636 3643
3637 3644 if (
3638 3645 not self._storedeltachains
3639 3646 and deltamode != repository.CG_DELTAMODE_PREV
3640 3647 ):
3641 3648 deltamode = repository.CG_DELTAMODE_FULL
3642 3649
3643 3650 return storageutil.emitrevisions(
3644 3651 self,
3645 3652 nodes,
3646 3653 nodesorder,
3647 3654 revlogrevisiondelta,
3648 3655 deltaparentfn=self.deltaparent,
3649 3656 candeltafn=self._candelta,
3650 3657 rawsizefn=self.rawsize,
3651 3658 revdifffn=self.revdiff,
3652 3659 flagsfn=self.flags,
3653 3660 deltamode=deltamode,
3654 3661 revisiondata=revisiondata,
3655 3662 assumehaveparentrevisions=assumehaveparentrevisions,
3656 3663 sidedata_helpers=sidedata_helpers,
3657 3664 debug_info=debug_info,
3658 3665 )
3659 3666
3660 3667 DELTAREUSEALWAYS = b'always'
3661 3668 DELTAREUSESAMEREVS = b'samerevs'
3662 3669 DELTAREUSENEVER = b'never'
3663 3670
3664 3671 DELTAREUSEFULLADD = b'fulladd'
3665 3672
3666 3673 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3667 3674
3668 3675 def clone(
3669 3676 self,
3670 3677 tr,
3671 3678 destrevlog,
3672 3679 addrevisioncb=None,
3673 3680 deltareuse=DELTAREUSESAMEREVS,
3674 3681 forcedeltabothparents=None,
3675 3682 sidedata_helpers=None,
3676 3683 ):
3677 3684 """Copy this revlog to another, possibly with format changes.
3678 3685
3679 3686 The destination revlog will contain the same revisions and nodes.
3680 3687 However, it may not be bit-for-bit identical due to e.g. delta encoding
3681 3688 differences.
3682 3689
3683 3690 The ``deltareuse`` argument control how deltas from the existing revlog
3684 3691 are preserved in the destination revlog. The argument can have the
3685 3692 following values:
3686 3693
3687 3694 DELTAREUSEALWAYS
3688 3695 Deltas will always be reused (if possible), even if the destination
3689 3696 revlog would not select the same revisions for the delta. This is the
3690 3697 fastest mode of operation.
3691 3698 DELTAREUSESAMEREVS
3692 3699 Deltas will be reused if the destination revlog would pick the same
3693 3700 revisions for the delta. This mode strikes a balance between speed
3694 3701 and optimization.
3695 3702 DELTAREUSENEVER
3696 3703 Deltas will never be reused. This is the slowest mode of execution.
3697 3704 This mode can be used to recompute deltas (e.g. if the diff/delta
3698 3705 algorithm changes).
3699 3706 DELTAREUSEFULLADD
3700 3707 Revision will be re-added as if their were new content. This is
3701 3708 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3702 3709 eg: large file detection and handling.
3703 3710
3704 3711 Delta computation can be slow, so the choice of delta reuse policy can
3705 3712 significantly affect run time.
3706 3713
3707 3714 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3708 3715 two extremes. Deltas will be reused if they are appropriate. But if the
3709 3716 delta could choose a better revision, it will do so. This means if you
3710 3717 are converting a non-generaldelta revlog to a generaldelta revlog,
3711 3718 deltas will be recomputed if the delta's parent isn't a parent of the
3712 3719 revision.
3713 3720
3714 3721 In addition to the delta policy, the ``forcedeltabothparents``
3715 3722 argument controls whether to force compute deltas against both parents
3716 3723 for merges. By default, the current default is used.
3717 3724
3718 3725 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3719 3726 `sidedata_helpers`.
3720 3727 """
3721 3728 if deltareuse not in self.DELTAREUSEALL:
3722 3729 raise ValueError(
3723 3730 _(b'value for deltareuse invalid: %s') % deltareuse
3724 3731 )
3725 3732
3726 3733 if len(destrevlog):
3727 3734 raise ValueError(_(b'destination revlog is not empty'))
3728 3735
3729 3736 if getattr(self, 'filteredrevs', None):
3730 3737 raise ValueError(_(b'source revlog has filtered revisions'))
3731 3738 if getattr(destrevlog, 'filteredrevs', None):
3732 3739 raise ValueError(_(b'destination revlog has filtered revisions'))
3733 3740
3734 3741 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3735 3742 # if possible.
3736 3743 old_delta_config = destrevlog.delta_config
3737 3744 destrevlog.delta_config = destrevlog.delta_config.copy()
3738 3745
3739 3746 try:
3740 3747 if deltareuse == self.DELTAREUSEALWAYS:
3741 3748 destrevlog.delta_config.lazy_delta_base = True
3742 3749 destrevlog.delta_config.lazy_delta = True
3743 3750 elif deltareuse == self.DELTAREUSESAMEREVS:
3744 3751 destrevlog.delta_config.lazy_delta_base = False
3745 3752 destrevlog.delta_config.lazy_delta = True
3746 3753 elif deltareuse == self.DELTAREUSENEVER:
3747 3754 destrevlog.delta_config.lazy_delta_base = False
3748 3755 destrevlog.delta_config.lazy_delta = False
3749 3756
3750 3757 delta_both_parents = (
3751 3758 forcedeltabothparents or old_delta_config.delta_both_parents
3752 3759 )
3753 3760 destrevlog.delta_config.delta_both_parents = delta_both_parents
3754 3761
3755 3762 with self.reading(), destrevlog._writing(tr):
3756 3763 self._clone(
3757 3764 tr,
3758 3765 destrevlog,
3759 3766 addrevisioncb,
3760 3767 deltareuse,
3761 3768 forcedeltabothparents,
3762 3769 sidedata_helpers,
3763 3770 )
3764 3771
3765 3772 finally:
3766 3773 destrevlog.delta_config = old_delta_config
3767 3774
3768 3775 def _clone(
3769 3776 self,
3770 3777 tr,
3771 3778 destrevlog,
3772 3779 addrevisioncb,
3773 3780 deltareuse,
3774 3781 forcedeltabothparents,
3775 3782 sidedata_helpers,
3776 3783 ):
3777 3784 """perform the core duty of `revlog.clone` after parameter processing"""
3778 3785 write_debug = None
3779 3786 if self.delta_config.debug_delta:
3780 3787 write_debug = tr._report
3781 3788 deltacomputer = deltautil.deltacomputer(
3782 3789 destrevlog,
3783 3790 write_debug=write_debug,
3784 3791 )
3785 3792 index = self.index
3786 3793 for rev in self:
3787 3794 entry = index[rev]
3788 3795
3789 3796 # Some classes override linkrev to take filtered revs into
3790 3797 # account. Use raw entry from index.
3791 3798 flags = entry[0] & 0xFFFF
3792 3799 linkrev = entry[4]
3793 3800 p1 = index[entry[5]][7]
3794 3801 p2 = index[entry[6]][7]
3795 3802 node = entry[7]
3796 3803
3797 3804 # (Possibly) reuse the delta from the revlog if allowed and
3798 3805 # the revlog chunk is a delta.
3799 3806 cachedelta = None
3800 3807 rawtext = None
3801 3808 if deltareuse == self.DELTAREUSEFULLADD:
3802 3809 text = self._revisiondata(rev)
3803 3810 sidedata = self.sidedata(rev)
3804 3811
3805 3812 if sidedata_helpers is not None:
3806 3813 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3807 3814 self, sidedata_helpers, sidedata, rev
3808 3815 )
3809 3816 flags = flags | new_flags[0] & ~new_flags[1]
3810 3817
3811 3818 destrevlog.addrevision(
3812 3819 text,
3813 3820 tr,
3814 3821 linkrev,
3815 3822 p1,
3816 3823 p2,
3817 3824 cachedelta=cachedelta,
3818 3825 node=node,
3819 3826 flags=flags,
3820 3827 deltacomputer=deltacomputer,
3821 3828 sidedata=sidedata,
3822 3829 )
3823 3830 else:
3824 3831 if destrevlog.delta_config.lazy_delta:
3825 3832 dp = self.deltaparent(rev)
3826 3833 if dp != nullrev:
3827 3834 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3828 3835
3829 3836 sidedata = None
3830 3837 if not cachedelta:
3831 3838 try:
3832 3839 rawtext = self._revisiondata(rev)
3833 3840 except error.CensoredNodeError as censored:
3834 3841 assert flags & REVIDX_ISCENSORED
3835 3842 rawtext = censored.tombstone
3836 3843 sidedata = self.sidedata(rev)
3837 3844 if sidedata is None:
3838 3845 sidedata = self.sidedata(rev)
3839 3846
3840 3847 if sidedata_helpers is not None:
3841 3848 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3842 3849 self, sidedata_helpers, sidedata, rev
3843 3850 )
3844 3851 flags = flags | new_flags[0] & ~new_flags[1]
3845 3852
3846 3853 destrevlog._addrevision(
3847 3854 node,
3848 3855 rawtext,
3849 3856 tr,
3850 3857 linkrev,
3851 3858 p1,
3852 3859 p2,
3853 3860 flags,
3854 3861 cachedelta,
3855 3862 deltacomputer=deltacomputer,
3856 3863 sidedata=sidedata,
3857 3864 )
3858 3865
3859 3866 if addrevisioncb:
3860 3867 addrevisioncb(self, rev, node)
3861 3868
3862 3869 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3863 3870 if self._format_version == REVLOGV0:
3864 3871 raise error.RevlogError(
3865 3872 _(b'cannot censor with version %d revlogs')
3866 3873 % self._format_version
3867 3874 )
3868 3875 elif self._format_version == REVLOGV1:
3869 3876 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3870 3877 else:
3871 3878 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3872 3879
3873 3880 def verifyintegrity(self, state):
3874 3881 """Verifies the integrity of the revlog.
3875 3882
3876 3883 Yields ``revlogproblem`` instances describing problems that are
3877 3884 found.
3878 3885 """
3879 3886 dd, di = self.checksize()
3880 3887 if dd:
3881 3888 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3882 3889 if di:
3883 3890 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3884 3891
3885 3892 version = self._format_version
3886 3893
3887 3894 # The verifier tells us what version revlog we should be.
3888 3895 if version != state[b'expectedversion']:
3889 3896 yield revlogproblem(
3890 3897 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3891 3898 % (self.display_id, version, state[b'expectedversion'])
3892 3899 )
3893 3900
3894 3901 state[b'skipread'] = set()
3895 3902 state[b'safe_renamed'] = set()
3896 3903
3897 3904 for rev in self:
3898 3905 node = self.node(rev)
3899 3906
3900 3907 # Verify contents. 4 cases to care about:
3901 3908 #
3902 3909 # common: the most common case
3903 3910 # rename: with a rename
3904 3911 # meta: file content starts with b'\1\n', the metadata
3905 3912 # header defined in filelog.py, but without a rename
3906 3913 # ext: content stored externally
3907 3914 #
3908 3915 # More formally, their differences are shown below:
3909 3916 #
3910 3917 # | common | rename | meta | ext
3911 3918 # -------------------------------------------------------
3912 3919 # flags() | 0 | 0 | 0 | not 0
3913 3920 # renamed() | False | True | False | ?
3914 3921 # rawtext[0:2]=='\1\n'| False | True | True | ?
3915 3922 #
3916 3923 # "rawtext" means the raw text stored in revlog data, which
3917 3924 # could be retrieved by "rawdata(rev)". "text"
3918 3925 # mentioned below is "revision(rev)".
3919 3926 #
3920 3927 # There are 3 different lengths stored physically:
3921 3928 # 1. L1: rawsize, stored in revlog index
3922 3929 # 2. L2: len(rawtext), stored in revlog data
3923 3930 # 3. L3: len(text), stored in revlog data if flags==0, or
3924 3931 # possibly somewhere else if flags!=0
3925 3932 #
3926 3933 # L1 should be equal to L2. L3 could be different from them.
3927 3934 # "text" may or may not affect commit hash depending on flag
3928 3935 # processors (see flagutil.addflagprocessor).
3929 3936 #
3930 3937 # | common | rename | meta | ext
3931 3938 # -------------------------------------------------
3932 3939 # rawsize() | L1 | L1 | L1 | L1
3933 3940 # size() | L1 | L2-LM | L1(*) | L1 (?)
3934 3941 # len(rawtext) | L2 | L2 | L2 | L2
3935 3942 # len(text) | L2 | L2 | L2 | L3
3936 3943 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3937 3944 #
3938 3945 # LM: length of metadata, depending on rawtext
3939 3946 # (*): not ideal, see comment in filelog.size
3940 3947 # (?): could be "- len(meta)" if the resolved content has
3941 3948 # rename metadata
3942 3949 #
3943 3950 # Checks needed to be done:
3944 3951 # 1. length check: L1 == L2, in all cases.
3945 3952 # 2. hash check: depending on flag processor, we may need to
3946 3953 # use either "text" (external), or "rawtext" (in revlog).
3947 3954
3948 3955 try:
3949 3956 skipflags = state.get(b'skipflags', 0)
3950 3957 if skipflags:
3951 3958 skipflags &= self.flags(rev)
3952 3959
3953 3960 _verify_revision(self, skipflags, state, node)
3954 3961
3955 3962 l1 = self.rawsize(rev)
3956 3963 l2 = len(self.rawdata(node))
3957 3964
3958 3965 if l1 != l2:
3959 3966 yield revlogproblem(
3960 3967 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3961 3968 node=node,
3962 3969 )
3963 3970
3964 3971 except error.CensoredNodeError:
3965 3972 if state[b'erroroncensored']:
3966 3973 yield revlogproblem(
3967 3974 error=_(b'censored file data'), node=node
3968 3975 )
3969 3976 state[b'skipread'].add(node)
3970 3977 except Exception as e:
3971 3978 yield revlogproblem(
3972 3979 error=_(b'unpacking %s: %s')
3973 3980 % (short(node), stringutil.forcebytestr(e)),
3974 3981 node=node,
3975 3982 )
3976 3983 state[b'skipread'].add(node)
3977 3984
3978 3985 def storageinfo(
3979 3986 self,
3980 3987 exclusivefiles=False,
3981 3988 sharedfiles=False,
3982 3989 revisionscount=False,
3983 3990 trackedsize=False,
3984 3991 storedsize=False,
3985 3992 ):
3986 3993 d = {}
3987 3994
3988 3995 if exclusivefiles:
3989 3996 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3990 3997 if not self._inline:
3991 3998 d[b'exclusivefiles'].append((self.opener, self._datafile))
3992 3999
3993 4000 if sharedfiles:
3994 4001 d[b'sharedfiles'] = []
3995 4002
3996 4003 if revisionscount:
3997 4004 d[b'revisionscount'] = len(self)
3998 4005
3999 4006 if trackedsize:
4000 4007 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4001 4008
4002 4009 if storedsize:
4003 4010 d[b'storedsize'] = sum(
4004 4011 self.opener.stat(path).st_size for path in self.files()
4005 4012 )
4006 4013
4007 4014 return d
4008 4015
4009 4016 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4010 4017 if not self.feature_config.has_side_data:
4011 4018 return
4012 4019 # revlog formats with sidedata support does not support inline
4013 4020 assert not self._inline
4014 4021 if not helpers[1] and not helpers[2]:
4015 4022 # Nothing to generate or remove
4016 4023 return
4017 4024
4018 4025 new_entries = []
4019 4026 # append the new sidedata
4020 4027 with self._writing(transaction):
4021 4028 ifh, dfh, sdfh = self._inner._writinghandles
4022 4029 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4023 4030
4024 4031 current_offset = sdfh.tell()
4025 4032 for rev in range(startrev, endrev + 1):
4026 4033 entry = self.index[rev]
4027 4034 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4028 4035 store=self,
4029 4036 sidedata_helpers=helpers,
4030 4037 sidedata={},
4031 4038 rev=rev,
4032 4039 )
4033 4040
4034 4041 serialized_sidedata = sidedatautil.serialize_sidedata(
4035 4042 new_sidedata
4036 4043 )
4037 4044
4038 4045 sidedata_compression_mode = COMP_MODE_INLINE
4039 4046 if serialized_sidedata and self.feature_config.has_side_data:
4040 4047 sidedata_compression_mode = COMP_MODE_PLAIN
4041 4048 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4042 4049 if (
4043 4050 h != b'u'
4044 4051 and comp_sidedata[0] != b'\0'
4045 4052 and len(comp_sidedata) < len(serialized_sidedata)
4046 4053 ):
4047 4054 assert not h
4048 4055 if (
4049 4056 comp_sidedata[0]
4050 4057 == self._docket.default_compression_header
4051 4058 ):
4052 4059 sidedata_compression_mode = COMP_MODE_DEFAULT
4053 4060 serialized_sidedata = comp_sidedata
4054 4061 else:
4055 4062 sidedata_compression_mode = COMP_MODE_INLINE
4056 4063 serialized_sidedata = comp_sidedata
4057 4064 if entry[8] != 0 or entry[9] != 0:
4058 4065 # rewriting entries that already have sidedata is not
4059 4066 # supported yet, because it introduces garbage data in the
4060 4067 # revlog.
4061 4068 msg = b"rewriting existing sidedata is not supported yet"
4062 4069 raise error.Abort(msg)
4063 4070
4064 4071 # Apply (potential) flags to add and to remove after running
4065 4072 # the sidedata helpers
4066 4073 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4067 4074 entry_update = (
4068 4075 current_offset,
4069 4076 len(serialized_sidedata),
4070 4077 new_offset_flags,
4071 4078 sidedata_compression_mode,
4072 4079 )
4073 4080
4074 4081 # the sidedata computation might have move the file cursors around
4075 4082 sdfh.seek(current_offset, os.SEEK_SET)
4076 4083 sdfh.write(serialized_sidedata)
4077 4084 new_entries.append(entry_update)
4078 4085 current_offset += len(serialized_sidedata)
4079 4086 self._docket.sidedata_end = sdfh.tell()
4080 4087
4081 4088 # rewrite the new index entries
4082 4089 ifh.seek(startrev * self.index.entry_size)
4083 4090 for i, e in enumerate(new_entries):
4084 4091 rev = startrev + i
4085 4092 self.index.replace_sidedata_info(rev, *e)
4086 4093 packed = self.index.entry_binary(rev)
4087 4094 if rev == 0 and self._docket is None:
4088 4095 header = self._format_flags | self._format_version
4089 4096 header = self.index.pack_header(header)
4090 4097 packed = header + packed
4091 4098 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now