##// END OF EJS Templates
typing: make the revlog classes known to pytype...
Matt Harbison -
r53102:bcaa5d40 default
parent child Browse files
Show More
@@ -1,4109 +1,4123
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import functools
20 20 import io
21 21 import os
22 22 import struct
23 23 import typing
24 24 import weakref
25 25 import zlib
26 26
27 27 from typing import (
28 28 Optional,
29 29 Tuple,
30 30 )
31 31
32 32 # import stuff from node for others to import from revlog
33 33 from .node import (
34 34 bin,
35 35 hex,
36 36 nullrev,
37 37 sha1nodeconstants,
38 38 short,
39 39 wdirrev,
40 40 )
41 41 from .i18n import _
42 42 from .revlogutils.constants import (
43 43 ALL_KINDS,
44 44 CHANGELOGV2,
45 45 COMP_MODE_DEFAULT,
46 46 COMP_MODE_INLINE,
47 47 COMP_MODE_PLAIN,
48 48 DELTA_BASE_REUSE_NO,
49 49 DELTA_BASE_REUSE_TRY,
50 50 ENTRY_RANK,
51 51 FEATURES_BY_VERSION,
52 52 FLAG_GENERALDELTA,
53 53 FLAG_INLINE_DATA,
54 54 INDEX_HEADER,
55 55 KIND_CHANGELOG,
56 56 KIND_FILELOG,
57 57 RANK_UNKNOWN,
58 58 REVLOGV0,
59 59 REVLOGV1,
60 60 REVLOGV1_FLAGS,
61 61 REVLOGV2,
62 62 REVLOGV2_FLAGS,
63 63 REVLOG_DEFAULT_FLAGS,
64 64 REVLOG_DEFAULT_FORMAT,
65 65 REVLOG_DEFAULT_VERSION,
66 66 SUPPORTED_FLAGS,
67 67 )
68 68 from .revlogutils.flagutil import (
69 69 REVIDX_DEFAULT_FLAGS,
70 70 REVIDX_ELLIPSIS,
71 71 REVIDX_EXTSTORED,
72 72 REVIDX_FLAGS_ORDER,
73 73 REVIDX_HASCOPIESINFO,
74 74 REVIDX_ISCENSORED,
75 75 REVIDX_RAWTEXT_CHANGING_FLAGS,
76 76 )
77 77 from .thirdparty import attr
78 78
79 79 # Force pytype to use the non-vendored package
80 80 if typing.TYPE_CHECKING:
81 81 # noinspection PyPackageRequirements
82 82 import attr
83 83
84 84 from . import (
85 85 ancestor,
86 86 dagop,
87 87 error,
88 88 mdiff,
89 89 policy,
90 90 pycompat,
91 91 revlogutils,
92 92 templatefilters,
93 93 util,
94 94 vfs as vfsmod,
95 95 )
96 96 from .interfaces import (
97 97 repository,
98 98 util as interfaceutil,
99 99 )
100 100 from .revlogutils import (
101 101 deltas as deltautil,
102 102 docket as docketutil,
103 103 flagutil,
104 104 nodemap as nodemaputil,
105 105 randomaccessfile,
106 106 revlogv0,
107 107 rewrite,
108 108 sidedata as sidedatautil,
109 109 )
110 110 from .utils import (
111 111 storageutil,
112 112 stringutil,
113 113 )
114 114
115 115 # blanked usage of all the name to prevent pyflakes constraints
116 116 # We need these name available in the module for extensions.
117 117
118 118 REVLOGV0
119 119 REVLOGV1
120 120 REVLOGV2
121 121 CHANGELOGV2
122 122 FLAG_INLINE_DATA
123 123 FLAG_GENERALDELTA
124 124 REVLOG_DEFAULT_FLAGS
125 125 REVLOG_DEFAULT_FORMAT
126 126 REVLOG_DEFAULT_VERSION
127 127 REVLOGV1_FLAGS
128 128 REVLOGV2_FLAGS
129 129 REVIDX_ISCENSORED
130 130 REVIDX_ELLIPSIS
131 131 REVIDX_HASCOPIESINFO
132 132 REVIDX_EXTSTORED
133 133 REVIDX_DEFAULT_FLAGS
134 134 REVIDX_FLAGS_ORDER
135 135 REVIDX_RAWTEXT_CHANGING_FLAGS
136 136
137 137 parsers = policy.importmod('parsers')
138 138 rustancestor = policy.importrust('ancestor')
139 139 rustdagop = policy.importrust('dagop')
140 140 rustrevlog = policy.importrust('revlog')
141 141
142 142 # Aliased for performance.
143 143 _zlibdecompress = zlib.decompress
144 144
145 145 # max size of inline data embedded into a revlog
146 146 _maxinline = 131072
147 147
148 148
149 149 # Flag processors for REVIDX_ELLIPSIS.
150 150 def ellipsisreadprocessor(rl, text):
151 151 return text, False
152 152
153 153
154 154 def ellipsiswriteprocessor(rl, text):
155 155 return text, False
156 156
157 157
158 158 def ellipsisrawprocessor(rl, text):
159 159 return False
160 160
161 161
162 162 ellipsisprocessor = (
163 163 ellipsisreadprocessor,
164 164 ellipsiswriteprocessor,
165 165 ellipsisrawprocessor,
166 166 )
167 167
168 168
169 169 def _verify_revision(rl, skipflags, state, node):
170 170 """Verify the integrity of the given revlog ``node`` while providing a hook
171 171 point for extensions to influence the operation."""
172 172 if skipflags:
173 173 state[b'skipread'].add(node)
174 174 else:
175 175 # Side-effect: read content and verify hash.
176 176 rl.revision(node)
177 177
178 178
179 179 # True if a fast implementation for persistent-nodemap is available
180 180 #
181 181 # We also consider we have a "fast" implementation in "pure" python because
182 182 # people using pure don't really have performance consideration (and a
183 183 # wheelbarrow of other slowness source)
184 184 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
185 185 parsers, 'BaseIndexObject'
186 186 )
187 187
188 188
189 @interfaceutil.implementer(repository.irevisiondelta)
190 189 @attr.s(slots=True)
191 class revlogrevisiondelta:
190 class RevLogRevisionDelta:
192 191 node = attr.ib()
193 192 p1node = attr.ib()
194 193 p2node = attr.ib()
195 194 basenode = attr.ib()
196 195 flags = attr.ib()
197 196 baserevisionsize = attr.ib()
198 197 revision = attr.ib()
199 198 delta = attr.ib()
200 199 sidedata = attr.ib()
201 200 protocol_flags = attr.ib()
202 201 linknode = attr.ib(default=None)
203 202
204 203
205 @interfaceutil.implementer(repository.iverifyproblem)
204 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
205 RevLogRevisionDelta
206 )
207
208 if typing.TYPE_CHECKING:
209 revlogrevisiondelta = RevLogRevisionDelta
210
211
206 212 @attr.s(frozen=True)
207 class revlogproblem:
213 class RevLogProblem:
208 214 warning = attr.ib(default=None)
209 215 error = attr.ib(default=None)
210 216 node = attr.ib(default=None)
211 217
212 218
219 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
220 RevLogProblem
221 )
222
223 if typing.TYPE_CHECKING:
224 revlogproblem = RevLogProblem
225
226
213 227 def parse_index_v1(data, inline):
214 228 # call the C implementation to parse the index data
215 229 index, cache = parsers.parse_index2(data, inline)
216 230 return index, cache
217 231
218 232
219 233 def parse_index_v2(data, inline):
220 234 # call the C implementation to parse the index data
221 235 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
222 236 return index, cache
223 237
224 238
225 239 def parse_index_cl_v2(data, inline):
226 240 # call the C implementation to parse the index data
227 241 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
228 242 return index, cache
229 243
230 244
231 245 if hasattr(parsers, 'parse_index_devel_nodemap'):
232 246
233 247 def parse_index_v1_nodemap(data, inline):
234 248 index, cache = parsers.parse_index_devel_nodemap(data, inline)
235 249 return index, cache
236 250
237 251 else:
238 252 parse_index_v1_nodemap = None
239 253
240 254
241 255 def parse_index_v1_rust(data, inline, default_header):
242 256 cache = (0, data) if inline else None
243 257 return rustrevlog.Index(data, default_header), cache
244 258
245 259
246 260 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
247 261 # signed integer)
248 262 _maxentrysize = 0x7FFFFFFF
249 263
250 264 FILE_TOO_SHORT_MSG = _(
251 265 b'cannot read from revlog %s;'
252 266 b' expected %d bytes from offset %d, data size is %d'
253 267 )
254 268
255 269 hexdigits = b'0123456789abcdefABCDEF'
256 270
257 271
258 272 class _Config:
259 273 def copy(self):
260 274 return self.__class__(**self.__dict__)
261 275
262 276
263 277 @attr.s()
264 278 class FeatureConfig(_Config):
265 279 """Hold configuration values about the available revlog features"""
266 280
267 281 # the default compression engine
268 282 compression_engine = attr.ib(default=b'zlib')
269 283 # compression engines options
270 284 compression_engine_options = attr.ib(default=attr.Factory(dict))
271 285
272 286 # can we use censor on this revlog
273 287 censorable = attr.ib(default=False)
274 288 # does this revlog use the "side data" feature
275 289 has_side_data = attr.ib(default=False)
276 290 # might remove rank configuration once the computation has no impact
277 291 compute_rank = attr.ib(default=False)
278 292 # parent order is supposed to be semantically irrelevant, so we
279 293 # normally resort parents to ensure that the first parent is non-null,
280 294 # if there is a non-null parent at all.
281 295 # filelog abuses the parent order as flag to mark some instances of
282 296 # meta-encoded files, so allow it to disable this behavior.
283 297 canonical_parent_order = attr.ib(default=False)
284 298 # can ellipsis commit be used
285 299 enable_ellipsis = attr.ib(default=False)
286 300
287 301 def copy(self):
288 302 new = super().copy()
289 303 new.compression_engine_options = self.compression_engine_options.copy()
290 304 return new
291 305
292 306
293 307 @attr.s()
294 308 class DataConfig(_Config):
295 309 """Hold configuration value about how the revlog data are read"""
296 310
297 311 # should we try to open the "pending" version of the revlog
298 312 try_pending = attr.ib(default=False)
299 313 # should we try to open the "splitted" version of the revlog
300 314 try_split = attr.ib(default=False)
301 315 # When True, indexfile should be opened with checkambig=True at writing,
302 316 # to avoid file stat ambiguity.
303 317 check_ambig = attr.ib(default=False)
304 318
305 319 # If true, use mmap instead of reading to deal with large index
306 320 mmap_large_index = attr.ib(default=False)
307 321 # how much data is large
308 322 mmap_index_threshold = attr.ib(default=None)
309 323 # How much data to read and cache into the raw revlog data cache.
310 324 chunk_cache_size = attr.ib(default=65536)
311 325
312 326 # The size of the uncompressed cache compared to the largest revision seen.
313 327 uncompressed_cache_factor = attr.ib(default=None)
314 328
315 329 # The number of chunk cached
316 330 uncompressed_cache_count = attr.ib(default=None)
317 331
318 332 # Allow sparse reading of the revlog data
319 333 with_sparse_read = attr.ib(default=False)
320 334 # minimal density of a sparse read chunk
321 335 sr_density_threshold = attr.ib(default=0.50)
322 336 # minimal size of data we skip when performing sparse read
323 337 sr_min_gap_size = attr.ib(default=262144)
324 338
325 339 # are delta encoded against arbitrary bases.
326 340 generaldelta = attr.ib(default=False)
327 341
328 342
329 343 @attr.s()
330 344 class DeltaConfig(_Config):
331 345 """Hold configuration value about how new delta are computed
332 346
333 347 Some attributes are duplicated from DataConfig to help havign each object
334 348 self contained.
335 349 """
336 350
337 351 # can delta be encoded against arbitrary bases.
338 352 general_delta = attr.ib(default=False)
339 353 # Allow sparse writing of the revlog data
340 354 sparse_revlog = attr.ib(default=False)
341 355 # maximum length of a delta chain
342 356 max_chain_len = attr.ib(default=None)
343 357 # Maximum distance between delta chain base start and end
344 358 max_deltachain_span = attr.ib(default=-1)
345 359 # If `upper_bound_comp` is not None, this is the expected maximal gain from
346 360 # compression for the data content.
347 361 upper_bound_comp = attr.ib(default=None)
348 362 # Should we try a delta against both parent
349 363 delta_both_parents = attr.ib(default=True)
350 364 # Test delta base candidate group by chunk of this maximal size.
351 365 candidate_group_chunk_size = attr.ib(default=0)
352 366 # Should we display debug information about delta computation
353 367 debug_delta = attr.ib(default=False)
354 368 # trust incoming delta by default
355 369 lazy_delta = attr.ib(default=True)
356 370 # trust the base of incoming delta by default
357 371 lazy_delta_base = attr.ib(default=False)
358 372
359 373
360 374 class _InnerRevlog:
361 375 """An inner layer of the revlog object
362 376
363 377 That layer exist to be able to delegate some operation to Rust, its
364 378 boundaries are arbitrary and based on what we can delegate to Rust.
365 379 """
366 380
367 381 opener: vfsmod.vfs
368 382
369 383 def __init__(
370 384 self,
371 385 opener: vfsmod.vfs,
372 386 index,
373 387 index_file,
374 388 data_file,
375 389 sidedata_file,
376 390 inline,
377 391 data_config,
378 392 delta_config,
379 393 feature_config,
380 394 chunk_cache,
381 395 default_compression_header,
382 396 ):
383 397 self.opener = opener
384 398 self.index = index
385 399
386 400 self.index_file = index_file
387 401 self.data_file = data_file
388 402 self.sidedata_file = sidedata_file
389 403 self.inline = inline
390 404 self.data_config = data_config
391 405 self.delta_config = delta_config
392 406 self.feature_config = feature_config
393 407
394 408 # used during diverted write.
395 409 self._orig_index_file = None
396 410
397 411 self._default_compression_header = default_compression_header
398 412
399 413 # index
400 414
401 415 # 3-tuple of file handles being used for active writing.
402 416 self._writinghandles = None
403 417
404 418 self._segmentfile = randomaccessfile.randomaccessfile(
405 419 self.opener,
406 420 (self.index_file if self.inline else self.data_file),
407 421 self.data_config.chunk_cache_size,
408 422 chunk_cache,
409 423 )
410 424 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
411 425 self.opener,
412 426 self.sidedata_file,
413 427 self.data_config.chunk_cache_size,
414 428 )
415 429
416 430 # revlog header -> revlog compressor
417 431 self._decompressors = {}
418 432 # 3-tuple of (node, rev, text) for a raw revision.
419 433 self._revisioncache = None
420 434
421 435 # cache some uncompressed chunks
422 436 # rev β†’ uncompressed_chunk
423 437 #
424 438 # the max cost is dynamically updated to be proportionnal to the
425 439 # size of revision we actually encounter.
426 440 self._uncompressed_chunk_cache = None
427 441 if self.data_config.uncompressed_cache_factor is not None:
428 442 self._uncompressed_chunk_cache = util.lrucachedict(
429 443 self.data_config.uncompressed_cache_count,
430 444 maxcost=65536, # some arbitrary initial value
431 445 )
432 446
433 447 self._delay_buffer = None
434 448
435 449 def __len__(self):
436 450 return len(self.index)
437 451
438 452 def clear_cache(self):
439 453 assert not self.is_delaying
440 454 self._revisioncache = None
441 455 if self._uncompressed_chunk_cache is not None:
442 456 self._uncompressed_chunk_cache.clear()
443 457 self._segmentfile.clear_cache()
444 458 self._segmentfile_sidedata.clear_cache()
445 459
446 460 @property
447 461 def canonical_index_file(self):
448 462 if self._orig_index_file is not None:
449 463 return self._orig_index_file
450 464 return self.index_file
451 465
452 466 @property
453 467 def is_delaying(self):
454 468 """is the revlog is currently delaying the visibility of written data?
455 469
456 470 The delaying mechanism can be either in-memory or written on disk in a
457 471 side-file."""
458 472 return (self._delay_buffer is not None) or (
459 473 self._orig_index_file is not None
460 474 )
461 475
462 476 # Derived from index values.
463 477
464 478 def start(self, rev):
465 479 """the offset of the data chunk for this revision"""
466 480 return int(self.index[rev][0] >> 16)
467 481
468 482 def length(self, rev):
469 483 """the length of the data chunk for this revision"""
470 484 return self.index[rev][1]
471 485
472 486 def end(self, rev):
473 487 """the end of the data chunk for this revision"""
474 488 return self.start(rev) + self.length(rev)
475 489
476 490 def deltaparent(self, rev):
477 491 """return deltaparent of the given revision"""
478 492 base = self.index[rev][3]
479 493 if base == rev:
480 494 return nullrev
481 495 elif self.delta_config.general_delta:
482 496 return base
483 497 else:
484 498 return rev - 1
485 499
486 500 def issnapshot(self, rev):
487 501 """tells whether rev is a snapshot"""
488 502 if not self.delta_config.sparse_revlog:
489 503 return self.deltaparent(rev) == nullrev
490 504 elif hasattr(self.index, 'issnapshot'):
491 505 # directly assign the method to cache the testing and access
492 506 self.issnapshot = self.index.issnapshot
493 507 return self.issnapshot(rev)
494 508 if rev == nullrev:
495 509 return True
496 510 entry = self.index[rev]
497 511 base = entry[3]
498 512 if base == rev:
499 513 return True
500 514 if base == nullrev:
501 515 return True
502 516 p1 = entry[5]
503 517 while self.length(p1) == 0:
504 518 b = self.deltaparent(p1)
505 519 if b == p1:
506 520 break
507 521 p1 = b
508 522 p2 = entry[6]
509 523 while self.length(p2) == 0:
510 524 b = self.deltaparent(p2)
511 525 if b == p2:
512 526 break
513 527 p2 = b
514 528 if base == p1 or base == p2:
515 529 return False
516 530 return self.issnapshot(base)
517 531
518 532 def _deltachain(self, rev, stoprev=None):
519 533 """Obtain the delta chain for a revision.
520 534
521 535 ``stoprev`` specifies a revision to stop at. If not specified, we
522 536 stop at the base of the chain.
523 537
524 538 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
525 539 revs in ascending order and ``stopped`` is a bool indicating whether
526 540 ``stoprev`` was hit.
527 541 """
528 542 generaldelta = self.delta_config.general_delta
529 543 # Try C implementation.
530 544 try:
531 545 return self.index.deltachain(rev, stoprev, generaldelta)
532 546 except AttributeError:
533 547 pass
534 548
535 549 chain = []
536 550
537 551 # Alias to prevent attribute lookup in tight loop.
538 552 index = self.index
539 553
540 554 iterrev = rev
541 555 e = index[iterrev]
542 556 while iterrev != e[3] and iterrev != stoprev:
543 557 chain.append(iterrev)
544 558 if generaldelta:
545 559 iterrev = e[3]
546 560 else:
547 561 iterrev -= 1
548 562 e = index[iterrev]
549 563
550 564 if iterrev == stoprev:
551 565 stopped = True
552 566 else:
553 567 chain.append(iterrev)
554 568 stopped = False
555 569
556 570 chain.reverse()
557 571 return chain, stopped
558 572
559 573 @util.propertycache
560 574 def _compressor(self):
561 575 engine = util.compengines[self.feature_config.compression_engine]
562 576 return engine.revlogcompressor(
563 577 self.feature_config.compression_engine_options
564 578 )
565 579
566 580 @util.propertycache
567 581 def _decompressor(self):
568 582 """the default decompressor"""
569 583 if self._default_compression_header is None:
570 584 return None
571 585 t = self._default_compression_header
572 586 c = self._get_decompressor(t)
573 587 return c.decompress
574 588
575 589 def _get_decompressor(self, t: bytes):
576 590 try:
577 591 compressor = self._decompressors[t]
578 592 except KeyError:
579 593 try:
580 594 engine = util.compengines.forrevlogheader(t)
581 595 compressor = engine.revlogcompressor(
582 596 self.feature_config.compression_engine_options
583 597 )
584 598 self._decompressors[t] = compressor
585 599 except KeyError:
586 600 raise error.RevlogError(
587 601 _(b'unknown compression type %s') % binascii.hexlify(t)
588 602 )
589 603 return compressor
590 604
591 605 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
592 606 """Generate a possibly-compressed representation of data."""
593 607 if not data:
594 608 return b'', data
595 609
596 610 compressed = self._compressor.compress(data)
597 611
598 612 if compressed:
599 613 # The revlog compressor added the header in the returned data.
600 614 return b'', compressed
601 615
602 616 if data[0:1] == b'\0':
603 617 return b'', data
604 618 return b'u', data
605 619
606 620 def decompress(self, data: bytes):
607 621 """Decompress a revlog chunk.
608 622
609 623 The chunk is expected to begin with a header identifying the
610 624 format type so it can be routed to an appropriate decompressor.
611 625 """
612 626 if not data:
613 627 return data
614 628
615 629 # Revlogs are read much more frequently than they are written and many
616 630 # chunks only take microseconds to decompress, so performance is
617 631 # important here.
618 632 #
619 633 # We can make a few assumptions about revlogs:
620 634 #
621 635 # 1) the majority of chunks will be compressed (as opposed to inline
622 636 # raw data).
623 637 # 2) decompressing *any* data will likely by at least 10x slower than
624 638 # returning raw inline data.
625 639 # 3) we want to prioritize common and officially supported compression
626 640 # engines
627 641 #
628 642 # It follows that we want to optimize for "decompress compressed data
629 643 # when encoded with common and officially supported compression engines"
630 644 # case over "raw data" and "data encoded by less common or non-official
631 645 # compression engines." That is why we have the inline lookup first
632 646 # followed by the compengines lookup.
633 647 #
634 648 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
635 649 # compressed chunks. And this matters for changelog and manifest reads.
636 650 t = data[0:1]
637 651
638 652 if t == b'x':
639 653 try:
640 654 return _zlibdecompress(data)
641 655 except zlib.error as e:
642 656 raise error.RevlogError(
643 657 _(b'revlog decompress error: %s')
644 658 % stringutil.forcebytestr(e)
645 659 )
646 660 # '\0' is more common than 'u' so it goes first.
647 661 elif t == b'\0':
648 662 return data
649 663 elif t == b'u':
650 664 return util.buffer(data, 1)
651 665
652 666 compressor = self._get_decompressor(t)
653 667
654 668 return compressor.decompress(data)
655 669
656 670 @contextlib.contextmanager
657 671 def reading(self):
658 672 """Context manager that keeps data and sidedata files open for reading"""
659 673 if len(self.index) == 0:
660 674 yield # nothing to be read
661 675 elif self._delay_buffer is not None and self.inline:
662 676 msg = "revlog with delayed write should not be inline"
663 677 raise error.ProgrammingError(msg)
664 678 else:
665 679 with self._segmentfile.reading():
666 680 with self._segmentfile_sidedata.reading():
667 681 yield
668 682
669 683 @property
670 684 def is_writing(self):
671 685 """True is a writing context is open"""
672 686 return self._writinghandles is not None
673 687
674 688 @property
675 689 def is_open(self):
676 690 """True if any file handle is being held
677 691
678 692 Used for assert and debug in the python code"""
679 693 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
680 694
681 695 @contextlib.contextmanager
682 696 def writing(self, transaction, data_end=None, sidedata_end=None):
683 697 """Open the revlog files for writing
684 698
685 699 Add content to a revlog should be done within such context.
686 700 """
687 701 if self.is_writing:
688 702 yield
689 703 else:
690 704 ifh = dfh = sdfh = None
691 705 try:
692 706 r = len(self.index)
693 707 # opening the data file.
694 708 dsize = 0
695 709 if r:
696 710 dsize = self.end(r - 1)
697 711 dfh = None
698 712 if not self.inline:
699 713 try:
700 714 dfh = self.opener(self.data_file, mode=b"r+")
701 715 if data_end is None:
702 716 dfh.seek(0, os.SEEK_END)
703 717 else:
704 718 dfh.seek(data_end, os.SEEK_SET)
705 719 except FileNotFoundError:
706 720 dfh = self.opener(self.data_file, mode=b"w+")
707 721 transaction.add(self.data_file, dsize)
708 722 if self.sidedata_file is not None:
709 723 assert sidedata_end is not None
710 724 # revlog-v2 does not inline, help Pytype
711 725 assert dfh is not None
712 726 try:
713 727 sdfh = self.opener(self.sidedata_file, mode=b"r+")
714 728 dfh.seek(sidedata_end, os.SEEK_SET)
715 729 except FileNotFoundError:
716 730 sdfh = self.opener(self.sidedata_file, mode=b"w+")
717 731 transaction.add(self.sidedata_file, sidedata_end)
718 732
719 733 # opening the index file.
720 734 isize = r * self.index.entry_size
721 735 ifh = self.__index_write_fp()
722 736 if self.inline:
723 737 transaction.add(self.index_file, dsize + isize)
724 738 else:
725 739 transaction.add(self.index_file, isize)
726 740 # exposing all file handle for writing.
727 741 self._writinghandles = (ifh, dfh, sdfh)
728 742 self._segmentfile.writing_handle = ifh if self.inline else dfh
729 743 self._segmentfile_sidedata.writing_handle = sdfh
730 744 yield
731 745 finally:
732 746 self._writinghandles = None
733 747 self._segmentfile.writing_handle = None
734 748 self._segmentfile_sidedata.writing_handle = None
735 749 if dfh is not None:
736 750 dfh.close()
737 751 if sdfh is not None:
738 752 sdfh.close()
739 753 # closing the index file last to avoid exposing referent to
740 754 # potential unflushed data content.
741 755 if ifh is not None:
742 756 ifh.close()
743 757
744 758 def __index_write_fp(self, index_end=None):
745 759 """internal method to open the index file for writing
746 760
747 761 You should not use this directly and use `_writing` instead
748 762 """
749 763 try:
750 764 if self._delay_buffer is None:
751 765 f = self.opener(
752 766 self.index_file,
753 767 mode=b"r+",
754 768 checkambig=self.data_config.check_ambig,
755 769 )
756 770 else:
757 771 # check_ambig affect we way we open file for writing, however
758 772 # here, we do not actually open a file for writting as write
759 773 # will appened to a delay_buffer. So check_ambig is not
760 774 # meaningful and unneeded here.
761 775 f = randomaccessfile.appender(
762 776 self.opener, self.index_file, b"r+", self._delay_buffer
763 777 )
764 778 if index_end is None:
765 779 f.seek(0, os.SEEK_END)
766 780 else:
767 781 f.seek(index_end, os.SEEK_SET)
768 782 return f
769 783 except FileNotFoundError:
770 784 if self._delay_buffer is None:
771 785 return self.opener(
772 786 self.index_file,
773 787 mode=b"w+",
774 788 checkambig=self.data_config.check_ambig,
775 789 )
776 790 else:
777 791 return randomaccessfile.appender(
778 792 self.opener, self.index_file, b"w+", self._delay_buffer
779 793 )
780 794
781 795 def __index_new_fp(self):
782 796 """internal method to create a new index file for writing
783 797
784 798 You should not use this unless you are upgrading from inline revlog
785 799 """
786 800 return self.opener(
787 801 self.index_file,
788 802 mode=b"w",
789 803 checkambig=self.data_config.check_ambig,
790 804 )
791 805
792 806 def split_inline(self, tr, header, new_index_file_path=None):
793 807 """split the data of an inline revlog into an index and a data file"""
794 808 assert self._delay_buffer is None
795 809 existing_handles = False
796 810 if self._writinghandles is not None:
797 811 existing_handles = True
798 812 fp = self._writinghandles[0]
799 813 fp.flush()
800 814 fp.close()
801 815 # We can't use the cached file handle after close(). So prevent
802 816 # its usage.
803 817 self._writinghandles = None
804 818 self._segmentfile.writing_handle = None
805 819 # No need to deal with sidedata writing handle as it is only
806 820 # relevant with revlog-v2 which is never inline, not reaching
807 821 # this code
808 822
809 823 new_dfh = self.opener(self.data_file, mode=b"w+")
810 824 new_dfh.truncate(0) # drop any potentially existing data
811 825 try:
812 826 with self.reading():
813 827 for r in range(len(self.index)):
814 828 new_dfh.write(self.get_segment_for_revs(r, r)[1])
815 829 new_dfh.flush()
816 830
817 831 if new_index_file_path is not None:
818 832 self.index_file = new_index_file_path
819 833 with self.__index_new_fp() as fp:
820 834 self.inline = False
821 835 for i in range(len(self.index)):
822 836 e = self.index.entry_binary(i)
823 837 if i == 0:
824 838 packed_header = self.index.pack_header(header)
825 839 e = packed_header + e
826 840 fp.write(e)
827 841
828 842 # If we don't use side-write, the temp file replace the real
829 843 # index when we exit the context manager
830 844
831 845 self._segmentfile = randomaccessfile.randomaccessfile(
832 846 self.opener,
833 847 self.data_file,
834 848 self.data_config.chunk_cache_size,
835 849 )
836 850
837 851 if existing_handles:
838 852 # switched from inline to conventional reopen the index
839 853 ifh = self.__index_write_fp()
840 854 self._writinghandles = (ifh, new_dfh, None)
841 855 self._segmentfile.writing_handle = new_dfh
842 856 new_dfh = None
843 857 # No need to deal with sidedata writing handle as it is only
844 858 # relevant with revlog-v2 which is never inline, not reaching
845 859 # this code
846 860 finally:
847 861 if new_dfh is not None:
848 862 new_dfh.close()
849 863 return self.index_file
850 864
851 865 def get_segment_for_revs(self, startrev, endrev):
852 866 """Obtain a segment of raw data corresponding to a range of revisions.
853 867
854 868 Accepts the start and end revisions and an optional already-open
855 869 file handle to be used for reading. If the file handle is read, its
856 870 seek position will not be preserved.
857 871
858 872 Requests for data may be satisfied by a cache.
859 873
860 874 Returns a 2-tuple of (offset, data) for the requested range of
861 875 revisions. Offset is the integer offset from the beginning of the
862 876 revlog and data is a str or buffer of the raw byte data.
863 877
864 878 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
865 879 to determine where each revision's data begins and ends.
866 880
867 881 API: we should consider making this a private part of the InnerRevlog
868 882 at some point.
869 883 """
870 884 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
871 885 # (functions are expensive).
872 886 index = self.index
873 887 istart = index[startrev]
874 888 start = int(istart[0] >> 16)
875 889 if startrev == endrev:
876 890 end = start + istart[1]
877 891 else:
878 892 iend = index[endrev]
879 893 end = int(iend[0] >> 16) + iend[1]
880 894
881 895 if self.inline:
882 896 start += (startrev + 1) * self.index.entry_size
883 897 end += (endrev + 1) * self.index.entry_size
884 898 length = end - start
885 899
886 900 return start, self._segmentfile.read_chunk(start, length)
887 901
888 902 def _chunk(self, rev):
889 903 """Obtain a single decompressed chunk for a revision.
890 904
891 905 Accepts an integer revision and an optional already-open file handle
892 906 to be used for reading. If used, the seek position of the file will not
893 907 be preserved.
894 908
895 909 Returns a str holding uncompressed data for the requested revision.
896 910 """
897 911 if self._uncompressed_chunk_cache is not None:
898 912 uncomp = self._uncompressed_chunk_cache.get(rev)
899 913 if uncomp is not None:
900 914 return uncomp
901 915
902 916 compression_mode = self.index[rev][10]
903 917 data = self.get_segment_for_revs(rev, rev)[1]
904 918 if compression_mode == COMP_MODE_PLAIN:
905 919 uncomp = data
906 920 elif compression_mode == COMP_MODE_DEFAULT:
907 921 uncomp = self._decompressor(data)
908 922 elif compression_mode == COMP_MODE_INLINE:
909 923 uncomp = self.decompress(data)
910 924 else:
911 925 msg = b'unknown compression mode %d'
912 926 msg %= compression_mode
913 927 raise error.RevlogError(msg)
914 928 if self._uncompressed_chunk_cache is not None:
915 929 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
916 930 return uncomp
917 931
918 932 def _chunks(self, revs, targetsize=None):
919 933 """Obtain decompressed chunks for the specified revisions.
920 934
921 935 Accepts an iterable of numeric revisions that are assumed to be in
922 936 ascending order. Also accepts an optional already-open file handle
923 937 to be used for reading. If used, the seek position of the file will
924 938 not be preserved.
925 939
926 940 This function is similar to calling ``self._chunk()`` multiple times,
927 941 but is faster.
928 942
929 943 Returns a list with decompressed data for each requested revision.
930 944 """
931 945 if not revs:
932 946 return []
933 947 start = self.start
934 948 length = self.length
935 949 inline = self.inline
936 950 iosize = self.index.entry_size
937 951 buffer = util.buffer
938 952
939 953 fetched_revs = []
940 954 fadd = fetched_revs.append
941 955
942 956 chunks = []
943 957 ladd = chunks.append
944 958
945 959 if self._uncompressed_chunk_cache is None:
946 960 fetched_revs = revs
947 961 else:
948 962 for rev in revs:
949 963 cached_value = self._uncompressed_chunk_cache.get(rev)
950 964 if cached_value is None:
951 965 fadd(rev)
952 966 else:
953 967 ladd((rev, cached_value))
954 968
955 969 if not fetched_revs:
956 970 slicedchunks = ()
957 971 elif not self.data_config.with_sparse_read:
958 972 slicedchunks = (fetched_revs,)
959 973 else:
960 974 slicedchunks = deltautil.slicechunk(
961 975 self,
962 976 fetched_revs,
963 977 targetsize=targetsize,
964 978 )
965 979
966 980 for revschunk in slicedchunks:
967 981 firstrev = revschunk[0]
968 982 # Skip trailing revisions with empty diff
969 983 for lastrev in revschunk[::-1]:
970 984 if length(lastrev) != 0:
971 985 break
972 986
973 987 try:
974 988 offset, data = self.get_segment_for_revs(firstrev, lastrev)
975 989 except OverflowError:
976 990 # issue4215 - we can't cache a run of chunks greater than
977 991 # 2G on Windows
978 992 for rev in revschunk:
979 993 ladd((rev, self._chunk(rev)))
980 994
981 995 decomp = self.decompress
982 996 # self._decompressor might be None, but will not be used in that case
983 997 def_decomp = self._decompressor
984 998 for rev in revschunk:
985 999 chunkstart = start(rev)
986 1000 if inline:
987 1001 chunkstart += (rev + 1) * iosize
988 1002 chunklength = length(rev)
989 1003 comp_mode = self.index[rev][10]
990 1004 c = buffer(data, chunkstart - offset, chunklength)
991 1005 if comp_mode == COMP_MODE_PLAIN:
992 1006 c = c
993 1007 elif comp_mode == COMP_MODE_INLINE:
994 1008 c = decomp(c)
995 1009 elif comp_mode == COMP_MODE_DEFAULT:
996 1010 c = def_decomp(c)
997 1011 else:
998 1012 msg = b'unknown compression mode %d'
999 1013 msg %= comp_mode
1000 1014 raise error.RevlogError(msg)
1001 1015 ladd((rev, c))
1002 1016 if self._uncompressed_chunk_cache is not None:
1003 1017 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1004 1018
1005 1019 chunks.sort()
1006 1020 return [x[1] for x in chunks]
1007 1021
1008 1022 def raw_text(self, node, rev):
1009 1023 """return the possibly unvalidated rawtext for a revision
1010 1024
1011 1025 returns (rev, rawtext, validated)
1012 1026 """
1013 1027
1014 1028 # revision in the cache (could be useful to apply delta)
1015 1029 cachedrev = None
1016 1030 # An intermediate text to apply deltas to
1017 1031 basetext = None
1018 1032
1019 1033 # Check if we have the entry in cache
1020 1034 # The cache entry looks like (node, rev, rawtext)
1021 1035 if self._revisioncache:
1022 1036 cachedrev = self._revisioncache[1]
1023 1037
1024 1038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1025 1039 if stopped:
1026 1040 basetext = self._revisioncache[2]
1027 1041
1028 1042 # drop cache to save memory, the caller is expected to
1029 1043 # update self._inner._revisioncache after validating the text
1030 1044 self._revisioncache = None
1031 1045
1032 1046 targetsize = None
1033 1047 rawsize = self.index[rev][2]
1034 1048 if 0 <= rawsize:
1035 1049 targetsize = 4 * rawsize
1036 1050
1037 1051 if self._uncompressed_chunk_cache is not None:
1038 1052 # dynamically update the uncompressed_chunk_cache size to the
1039 1053 # largest revision we saw in this revlog.
1040 1054 factor = self.data_config.uncompressed_cache_factor
1041 1055 candidate_size = rawsize * factor
1042 1056 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1043 1057 self._uncompressed_chunk_cache.maxcost = candidate_size
1044 1058
1045 1059 bins = self._chunks(chain, targetsize=targetsize)
1046 1060 if basetext is None:
1047 1061 basetext = bytes(bins[0])
1048 1062 bins = bins[1:]
1049 1063
1050 1064 rawtext = mdiff.patches(basetext, bins)
1051 1065 del basetext # let us have a chance to free memory early
1052 1066 return (rev, rawtext, False)
1053 1067
1054 1068 def sidedata(self, rev, sidedata_end):
1055 1069 """Return the sidedata for a given revision number."""
1056 1070 index_entry = self.index[rev]
1057 1071 sidedata_offset = index_entry[8]
1058 1072 sidedata_size = index_entry[9]
1059 1073
1060 1074 if self.inline:
1061 1075 sidedata_offset += self.index.entry_size * (1 + rev)
1062 1076 if sidedata_size == 0:
1063 1077 return {}
1064 1078
1065 1079 if sidedata_end < sidedata_offset + sidedata_size:
1066 1080 filename = self.sidedata_file
1067 1081 end = sidedata_end
1068 1082 offset = sidedata_offset
1069 1083 length = sidedata_size
1070 1084 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1071 1085 raise error.RevlogError(m)
1072 1086
1073 1087 comp_segment = self._segmentfile_sidedata.read_chunk(
1074 1088 sidedata_offset, sidedata_size
1075 1089 )
1076 1090
1077 1091 comp = self.index[rev][11]
1078 1092 if comp == COMP_MODE_PLAIN:
1079 1093 segment = comp_segment
1080 1094 elif comp == COMP_MODE_DEFAULT:
1081 1095 segment = self._decompressor(comp_segment)
1082 1096 elif comp == COMP_MODE_INLINE:
1083 1097 segment = self.decompress(comp_segment)
1084 1098 else:
1085 1099 msg = b'unknown compression mode %d'
1086 1100 msg %= comp
1087 1101 raise error.RevlogError(msg)
1088 1102
1089 1103 sidedata = sidedatautil.deserialize_sidedata(segment)
1090 1104 return sidedata
1091 1105
1092 1106 def write_entry(
1093 1107 self,
1094 1108 transaction,
1095 1109 entry,
1096 1110 data,
1097 1111 link,
1098 1112 offset,
1099 1113 sidedata,
1100 1114 sidedata_offset,
1101 1115 index_end,
1102 1116 data_end,
1103 1117 sidedata_end,
1104 1118 ):
1105 1119 # Files opened in a+ mode have inconsistent behavior on various
1106 1120 # platforms. Windows requires that a file positioning call be made
1107 1121 # when the file handle transitions between reads and writes. See
1108 1122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1109 1123 # platforms, Python or the platform itself can be buggy. Some versions
1110 1124 # of Solaris have been observed to not append at the end of the file
1111 1125 # if the file was seeked to before the end. See issue4943 for more.
1112 1126 #
1113 1127 # We work around this issue by inserting a seek() before writing.
1114 1128 # Note: This is likely not necessary on Python 3. However, because
1115 1129 # the file handle is reused for reads and may be seeked there, we need
1116 1130 # to be careful before changing this.
1117 1131 if self._writinghandles is None:
1118 1132 msg = b'adding revision outside `revlog._writing` context'
1119 1133 raise error.ProgrammingError(msg)
1120 1134 ifh, dfh, sdfh = self._writinghandles
1121 1135 if index_end is None:
1122 1136 ifh.seek(0, os.SEEK_END)
1123 1137 else:
1124 1138 ifh.seek(index_end, os.SEEK_SET)
1125 1139 if dfh:
1126 1140 if data_end is None:
1127 1141 dfh.seek(0, os.SEEK_END)
1128 1142 else:
1129 1143 dfh.seek(data_end, os.SEEK_SET)
1130 1144 if sdfh:
1131 1145 sdfh.seek(sidedata_end, os.SEEK_SET)
1132 1146
1133 1147 curr = len(self.index) - 1
1134 1148 if not self.inline:
1135 1149 transaction.add(self.data_file, offset)
1136 1150 if self.sidedata_file:
1137 1151 transaction.add(self.sidedata_file, sidedata_offset)
1138 1152 transaction.add(self.canonical_index_file, curr * len(entry))
1139 1153 if data[0]:
1140 1154 dfh.write(data[0])
1141 1155 dfh.write(data[1])
1142 1156 if sidedata:
1143 1157 sdfh.write(sidedata)
1144 1158 if self._delay_buffer is None:
1145 1159 ifh.write(entry)
1146 1160 else:
1147 1161 self._delay_buffer.append(entry)
1148 1162 elif self._delay_buffer is not None:
1149 1163 msg = b'invalid delayed write on inline revlog'
1150 1164 raise error.ProgrammingError(msg)
1151 1165 else:
1152 1166 offset += curr * self.index.entry_size
1153 1167 transaction.add(self.canonical_index_file, offset)
1154 1168 assert not sidedata
1155 1169 ifh.write(entry)
1156 1170 ifh.write(data[0])
1157 1171 ifh.write(data[1])
1158 1172 return (
1159 1173 ifh.tell(),
1160 1174 dfh.tell() if dfh else None,
1161 1175 sdfh.tell() if sdfh else None,
1162 1176 )
1163 1177
1164 1178 def _divert_index(self):
1165 1179 index_file = self.index_file
1166 1180 # when we encounter a legacy inline-changelog, split it. However it is
1167 1181 # important to use the expected filename for pending content
1168 1182 # (<radix>.a) otherwise hooks won't be seeing the content of the
1169 1183 # pending transaction.
1170 1184 if index_file.endswith(b'.s'):
1171 1185 index_file = self.index_file[:-2]
1172 1186 return index_file + b'.a'
1173 1187
1174 1188 def delay(self):
1175 1189 assert not self.is_open
1176 1190 if self.inline:
1177 1191 msg = "revlog with delayed write should not be inline"
1178 1192 raise error.ProgrammingError(msg)
1179 1193 if self._delay_buffer is not None or self._orig_index_file is not None:
1180 1194 # delay or divert already in place
1181 1195 return None
1182 1196 elif len(self.index) == 0:
1183 1197 self._orig_index_file = self.index_file
1184 1198 self.index_file = self._divert_index()
1185 1199 assert self._orig_index_file is not None
1186 1200 assert self.index_file is not None
1187 1201 if self.opener.exists(self.index_file):
1188 1202 self.opener.unlink(self.index_file)
1189 1203 return self.index_file
1190 1204 else:
1191 1205 self._delay_buffer = []
1192 1206 return None
1193 1207
1194 1208 def write_pending(self):
1195 1209 assert not self.is_open
1196 1210 if self.inline:
1197 1211 msg = "revlog with delayed write should not be inline"
1198 1212 raise error.ProgrammingError(msg)
1199 1213 if self._orig_index_file is not None:
1200 1214 return None, True
1201 1215 any_pending = False
1202 1216 pending_index_file = self._divert_index()
1203 1217 if self.opener.exists(pending_index_file):
1204 1218 self.opener.unlink(pending_index_file)
1205 1219 util.copyfile(
1206 1220 self.opener.join(self.index_file),
1207 1221 self.opener.join(pending_index_file),
1208 1222 )
1209 1223 if self._delay_buffer:
1210 1224 with self.opener(pending_index_file, b'r+') as ifh:
1211 1225 ifh.seek(0, os.SEEK_END)
1212 1226 ifh.write(b"".join(self._delay_buffer))
1213 1227 any_pending = True
1214 1228 self._delay_buffer = None
1215 1229 self._orig_index_file = self.index_file
1216 1230 self.index_file = pending_index_file
1217 1231 return self.index_file, any_pending
1218 1232
1219 1233 def finalize_pending(self):
1220 1234 assert not self.is_open
1221 1235 if self.inline:
1222 1236 msg = "revlog with delayed write should not be inline"
1223 1237 raise error.ProgrammingError(msg)
1224 1238
1225 1239 delay = self._delay_buffer is not None
1226 1240 divert = self._orig_index_file is not None
1227 1241
1228 1242 if delay and divert:
1229 1243 assert False, "unreachable"
1230 1244 elif delay:
1231 1245 if self._delay_buffer:
1232 1246 with self.opener(self.index_file, b'r+') as ifh:
1233 1247 ifh.seek(0, os.SEEK_END)
1234 1248 ifh.write(b"".join(self._delay_buffer))
1235 1249 self._delay_buffer = None
1236 1250 elif divert:
1237 1251 if self.opener.exists(self.index_file):
1238 1252 self.opener.rename(
1239 1253 self.index_file,
1240 1254 self._orig_index_file,
1241 1255 checkambig=True,
1242 1256 )
1243 1257 self.index_file = self._orig_index_file
1244 1258 self._orig_index_file = None
1245 1259 else:
1246 1260 msg = b"not delay or divert found on this revlog"
1247 1261 raise error.ProgrammingError(msg)
1248 1262 return self.canonical_index_file
1249 1263
1250 1264
1251 1265 class revlog:
1252 1266 """
1253 1267 the underlying revision storage object
1254 1268
1255 1269 A revlog consists of two parts, an index and the revision data.
1256 1270
1257 1271 The index is a file with a fixed record size containing
1258 1272 information on each revision, including its nodeid (hash), the
1259 1273 nodeids of its parents, the position and offset of its data within
1260 1274 the data file, and the revision it's based on. Finally, each entry
1261 1275 contains a linkrev entry that can serve as a pointer to external
1262 1276 data.
1263 1277
1264 1278 The revision data itself is a linear collection of data chunks.
1265 1279 Each chunk represents a revision and is usually represented as a
1266 1280 delta against the previous chunk. To bound lookup time, runs of
1267 1281 deltas are limited to about 2 times the length of the original
1268 1282 version data. This makes retrieval of a version proportional to
1269 1283 its size, or O(1) relative to the number of revisions.
1270 1284
1271 1285 Both pieces of the revlog are written to in an append-only
1272 1286 fashion, which means we never need to rewrite a file to insert or
1273 1287 remove data, and can use some simple techniques to avoid the need
1274 1288 for locking while reading.
1275 1289
1276 1290 If checkambig, indexfile is opened with checkambig=True at
1277 1291 writing, to avoid file stat ambiguity.
1278 1292
1279 1293 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1280 1294 index will be mmapped rather than read if it is larger than the
1281 1295 configured threshold.
1282 1296
1283 1297 If censorable is True, the revlog can have censored revisions.
1284 1298
1285 1299 If `upperboundcomp` is not None, this is the expected maximal gain from
1286 1300 compression for the data content.
1287 1301
1288 1302 `concurrencychecker` is an optional function that receives 3 arguments: a
1289 1303 file handle, a filename, and an expected position. It should check whether
1290 1304 the current position in the file handle is valid, and log/warn/fail (by
1291 1305 raising).
1292 1306
1293 1307 See mercurial/revlogutils/contants.py for details about the content of an
1294 1308 index entry.
1295 1309 """
1296 1310
1297 1311 _flagserrorclass = error.RevlogError
1298 1312
1299 1313 opener: vfsmod.vfs
1300 1314
1301 1315 @staticmethod
1302 1316 def is_inline_index(header_bytes):
1303 1317 """Determine if a revlog is inline from the initial bytes of the index"""
1304 1318 if len(header_bytes) == 0:
1305 1319 return True
1306 1320
1307 1321 header = INDEX_HEADER.unpack(header_bytes)[0]
1308 1322
1309 1323 _format_flags = header & ~0xFFFF
1310 1324 _format_version = header & 0xFFFF
1311 1325
1312 1326 features = FEATURES_BY_VERSION[_format_version]
1313 1327 return features[b'inline'](_format_flags)
1314 1328
1315 1329 _docket_file: Optional[bytes]
1316 1330
1317 1331 def __init__(
1318 1332 self,
1319 1333 opener: vfsmod.vfs,
1320 1334 target,
1321 1335 radix,
1322 1336 postfix=None, # only exist for `tmpcensored` now
1323 1337 checkambig=False,
1324 1338 mmaplargeindex=False,
1325 1339 censorable=False,
1326 1340 upperboundcomp=None,
1327 1341 persistentnodemap=False,
1328 1342 concurrencychecker=None,
1329 1343 trypending=False,
1330 1344 try_split=False,
1331 1345 canonical_parent_order=True,
1332 1346 data_config=None,
1333 1347 delta_config=None,
1334 1348 feature_config=None,
1335 1349 may_inline=True, # may inline new revlog
1336 1350 ):
1337 1351 """
1338 1352 create a revlog object
1339 1353
1340 1354 opener is a function that abstracts the file opening operation
1341 1355 and can be used to implement COW semantics or the like.
1342 1356
1343 1357 `target`: a (KIND, ID) tuple that identify the content stored in
1344 1358 this revlog. It help the rest of the code to understand what the revlog
1345 1359 is about without having to resort to heuristic and index filename
1346 1360 analysis. Note: that this must be reliably be set by normal code, but
1347 1361 that test, debug, or performance measurement code might not set this to
1348 1362 accurate value.
1349 1363 """
1350 1364
1351 1365 self.radix = radix
1352 1366
1353 1367 self._docket_file = None
1354 1368 self._indexfile = None
1355 1369 self._datafile = None
1356 1370 self._sidedatafile = None
1357 1371 self._nodemap_file = None
1358 1372 self.postfix = postfix
1359 1373 self._trypending = trypending
1360 1374 self._try_split = try_split
1361 1375 self._may_inline = may_inline
1362 1376 self.opener = opener
1363 1377 if persistentnodemap:
1364 1378 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1365 1379
1366 1380 assert target[0] in ALL_KINDS
1367 1381 assert len(target) == 2
1368 1382 self.target = target
1369 1383 if feature_config is not None:
1370 1384 self.feature_config = feature_config.copy()
1371 1385 elif b'feature-config' in self.opener.options:
1372 1386 self.feature_config = self.opener.options[b'feature-config'].copy()
1373 1387 else:
1374 1388 self.feature_config = FeatureConfig()
1375 1389 self.feature_config.censorable = censorable
1376 1390 self.feature_config.canonical_parent_order = canonical_parent_order
1377 1391 if data_config is not None:
1378 1392 self.data_config = data_config.copy()
1379 1393 elif b'data-config' in self.opener.options:
1380 1394 self.data_config = self.opener.options[b'data-config'].copy()
1381 1395 else:
1382 1396 self.data_config = DataConfig()
1383 1397 self.data_config.check_ambig = checkambig
1384 1398 self.data_config.mmap_large_index = mmaplargeindex
1385 1399 if delta_config is not None:
1386 1400 self.delta_config = delta_config.copy()
1387 1401 elif b'delta-config' in self.opener.options:
1388 1402 self.delta_config = self.opener.options[b'delta-config'].copy()
1389 1403 else:
1390 1404 self.delta_config = DeltaConfig()
1391 1405 self.delta_config.upper_bound_comp = upperboundcomp
1392 1406
1393 1407 # Maps rev to chain base rev.
1394 1408 self._chainbasecache = util.lrucachedict(100)
1395 1409
1396 1410 self.index = None
1397 1411 self._docket = None
1398 1412 self._nodemap_docket = None
1399 1413 # Mapping of partial identifiers to full nodes.
1400 1414 self._pcache = {}
1401 1415
1402 1416 # other optionnals features
1403 1417
1404 1418 # Make copy of flag processors so each revlog instance can support
1405 1419 # custom flags.
1406 1420 self._flagprocessors = dict(flagutil.flagprocessors)
1407 1421 # prevent nesting of addgroup
1408 1422 self._adding_group = None
1409 1423
1410 1424 chunk_cache = self._loadindex()
1411 1425 self._load_inner(chunk_cache)
1412 1426 self._concurrencychecker = concurrencychecker
1413 1427
1414 1428 def _init_opts(self):
1415 1429 """process options (from above/config) to setup associated default revlog mode
1416 1430
1417 1431 These values might be affected when actually reading on disk information.
1418 1432
1419 1433 The relevant values are returned for use in _loadindex().
1420 1434
1421 1435 * newversionflags:
1422 1436 version header to use if we need to create a new revlog
1423 1437
1424 1438 * mmapindexthreshold:
1425 1439 minimal index size for start to use mmap
1426 1440
1427 1441 * force_nodemap:
1428 1442 force the usage of a "development" version of the nodemap code
1429 1443 """
1430 1444 opts = self.opener.options
1431 1445
1432 1446 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1433 1447 new_header = CHANGELOGV2
1434 1448 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1435 1449 self.feature_config.compute_rank = compute_rank
1436 1450 elif b'revlogv2' in opts:
1437 1451 new_header = REVLOGV2
1438 1452 elif b'revlogv1' in opts:
1439 1453 new_header = REVLOGV1
1440 1454 if self._may_inline:
1441 1455 new_header |= FLAG_INLINE_DATA
1442 1456 if b'generaldelta' in opts:
1443 1457 new_header |= FLAG_GENERALDELTA
1444 1458 elif b'revlogv0' in self.opener.options:
1445 1459 new_header = REVLOGV0
1446 1460 else:
1447 1461 new_header = REVLOG_DEFAULT_VERSION
1448 1462
1449 1463 mmapindexthreshold = None
1450 1464 if self.data_config.mmap_large_index:
1451 1465 mmapindexthreshold = self.data_config.mmap_index_threshold
1452 1466 if self.feature_config.enable_ellipsis:
1453 1467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1454 1468
1455 1469 # revlog v0 doesn't have flag processors
1456 1470 for flag, processor in opts.get(b'flagprocessors', {}).items():
1457 1471 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1458 1472
1459 1473 chunk_cache_size = self.data_config.chunk_cache_size
1460 1474 if chunk_cache_size <= 0:
1461 1475 raise error.RevlogError(
1462 1476 _(b'revlog chunk cache size %r is not greater than 0')
1463 1477 % chunk_cache_size
1464 1478 )
1465 1479 elif chunk_cache_size & (chunk_cache_size - 1):
1466 1480 raise error.RevlogError(
1467 1481 _(b'revlog chunk cache size %r is not a power of 2')
1468 1482 % chunk_cache_size
1469 1483 )
1470 1484 force_nodemap = opts.get(b'devel-force-nodemap', False)
1471 1485 return new_header, mmapindexthreshold, force_nodemap
1472 1486
1473 1487 def _get_data(self, filepath, mmap_threshold, size=None):
1474 1488 """return a file content with or without mmap
1475 1489
1476 1490 If the file is missing return the empty string"""
1477 1491 try:
1478 1492 with self.opener(filepath) as fp:
1479 1493 if mmap_threshold is not None:
1480 1494 file_size = self.opener.fstat(fp).st_size
1481 1495 if (
1482 1496 file_size >= mmap_threshold
1483 1497 and self.opener.is_mmap_safe(filepath)
1484 1498 ):
1485 1499 if size is not None:
1486 1500 # avoid potentiel mmap crash
1487 1501 size = min(file_size, size)
1488 1502 # TODO: should .close() to release resources without
1489 1503 # relying on Python GC
1490 1504 if size is None:
1491 1505 return util.buffer(util.mmapread(fp))
1492 1506 else:
1493 1507 return util.buffer(util.mmapread(fp, size))
1494 1508 if size is None:
1495 1509 return fp.read()
1496 1510 else:
1497 1511 return fp.read(size)
1498 1512 except FileNotFoundError:
1499 1513 return b''
1500 1514
1501 1515 def get_streams(self, max_linkrev, force_inline=False):
1502 1516 """return a list of streams that represent this revlog
1503 1517
1504 1518 This is used by stream-clone to do bytes to bytes copies of a repository.
1505 1519
1506 1520 This streams data for all revisions that refer to a changelog revision up
1507 1521 to `max_linkrev`.
1508 1522
1509 1523 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1510 1524
1511 1525 It returns is a list of three-tuple:
1512 1526
1513 1527 [
1514 1528 (filename, bytes_stream, stream_size),
1515 1529 …
1516 1530 ]
1517 1531 """
1518 1532 n = len(self)
1519 1533 index = self.index
1520 1534 while n > 0:
1521 1535 linkrev = index[n - 1][4]
1522 1536 if linkrev < max_linkrev:
1523 1537 break
1524 1538 # note: this loop will rarely go through multiple iterations, since
1525 1539 # it only traverses commits created during the current streaming
1526 1540 # pull operation.
1527 1541 #
1528 1542 # If this become a problem, using a binary search should cap the
1529 1543 # runtime of this.
1530 1544 n = n - 1
1531 1545 if n == 0:
1532 1546 # no data to send
1533 1547 return []
1534 1548 index_size = n * index.entry_size
1535 1549 data_size = self.end(n - 1)
1536 1550
1537 1551 # XXX we might have been split (or stripped) since the object
1538 1552 # initialization, We need to close this race too, but having a way to
1539 1553 # pre-open the file we feed to the revlog and never closing them before
1540 1554 # we are done streaming.
1541 1555
1542 1556 if self._inline:
1543 1557
1544 1558 def get_stream():
1545 1559 with self.opener(self._indexfile, mode=b"r") as fp:
1546 1560 yield None
1547 1561 size = index_size + data_size
1548 1562 if size <= 65536:
1549 1563 yield fp.read(size)
1550 1564 else:
1551 1565 yield from util.filechunkiter(fp, limit=size)
1552 1566
1553 1567 inline_stream = get_stream()
1554 1568 next(inline_stream)
1555 1569 return [
1556 1570 (self._indexfile, inline_stream, index_size + data_size),
1557 1571 ]
1558 1572 elif force_inline:
1559 1573
1560 1574 def get_stream():
1561 1575 with self.reading():
1562 1576 yield None
1563 1577
1564 1578 for rev in range(n):
1565 1579 idx = self.index.entry_binary(rev)
1566 1580 if rev == 0 and self._docket is None:
1567 1581 # re-inject the inline flag
1568 1582 header = self._format_flags
1569 1583 header |= self._format_version
1570 1584 header |= FLAG_INLINE_DATA
1571 1585 header = self.index.pack_header(header)
1572 1586 idx = header + idx
1573 1587 yield idx
1574 1588 yield self._inner.get_segment_for_revs(rev, rev)[1]
1575 1589
1576 1590 inline_stream = get_stream()
1577 1591 next(inline_stream)
1578 1592 return [
1579 1593 (self._indexfile, inline_stream, index_size + data_size),
1580 1594 ]
1581 1595 else:
1582 1596
1583 1597 def get_index_stream():
1584 1598 with self.opener(self._indexfile, mode=b"r") as fp:
1585 1599 yield None
1586 1600 if index_size <= 65536:
1587 1601 yield fp.read(index_size)
1588 1602 else:
1589 1603 yield from util.filechunkiter(fp, limit=index_size)
1590 1604
1591 1605 def get_data_stream():
1592 1606 with self._datafp() as fp:
1593 1607 yield None
1594 1608 if data_size <= 65536:
1595 1609 yield fp.read(data_size)
1596 1610 else:
1597 1611 yield from util.filechunkiter(fp, limit=data_size)
1598 1612
1599 1613 index_stream = get_index_stream()
1600 1614 next(index_stream)
1601 1615 data_stream = get_data_stream()
1602 1616 next(data_stream)
1603 1617 return [
1604 1618 (self._datafile, data_stream, data_size),
1605 1619 (self._indexfile, index_stream, index_size),
1606 1620 ]
1607 1621
1608 1622 def _loadindex(self, docket=None):
1609 1623 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1610 1624
1611 1625 if self.postfix is not None:
1612 1626 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1613 1627 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1614 1628 entry_point = b'%s.i.a' % self.radix
1615 1629 elif self._try_split and self.opener.exists(self._split_index_file):
1616 1630 entry_point = self._split_index_file
1617 1631 else:
1618 1632 entry_point = b'%s.i' % self.radix
1619 1633
1620 1634 if docket is not None:
1621 1635 self._docket = docket
1622 1636 self._docket_file = entry_point
1623 1637 else:
1624 1638 self._initempty = True
1625 1639 entry_data = self._get_data(entry_point, mmapindexthreshold)
1626 1640 if len(entry_data) > 0:
1627 1641 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1628 1642 self._initempty = False
1629 1643 else:
1630 1644 header = new_header
1631 1645
1632 1646 self._format_flags = header & ~0xFFFF
1633 1647 self._format_version = header & 0xFFFF
1634 1648
1635 1649 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1636 1650 if supported_flags is None:
1637 1651 msg = _(b'unknown version (%d) in revlog %s')
1638 1652 msg %= (self._format_version, self.display_id)
1639 1653 raise error.RevlogError(msg)
1640 1654 elif self._format_flags & ~supported_flags:
1641 1655 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1642 1656 display_flag = self._format_flags >> 16
1643 1657 msg %= (display_flag, self._format_version, self.display_id)
1644 1658 raise error.RevlogError(msg)
1645 1659
1646 1660 features = FEATURES_BY_VERSION[self._format_version]
1647 1661 self._inline = features[b'inline'](self._format_flags)
1648 1662 self.delta_config.general_delta = features[b'generaldelta'](
1649 1663 self._format_flags
1650 1664 )
1651 1665 self.feature_config.has_side_data = features[b'sidedata']
1652 1666
1653 1667 if not features[b'docket']:
1654 1668 self._indexfile = entry_point
1655 1669 index_data = entry_data
1656 1670 else:
1657 1671 self._docket_file = entry_point
1658 1672 if self._initempty:
1659 1673 self._docket = docketutil.default_docket(self, header)
1660 1674 else:
1661 1675 self._docket = docketutil.parse_docket(
1662 1676 self, entry_data, use_pending=self._trypending
1663 1677 )
1664 1678
1665 1679 if self._docket is not None:
1666 1680 self._indexfile = self._docket.index_filepath()
1667 1681 index_data = b''
1668 1682 index_size = self._docket.index_end
1669 1683 if index_size > 0:
1670 1684 index_data = self._get_data(
1671 1685 self._indexfile, mmapindexthreshold, size=index_size
1672 1686 )
1673 1687 if len(index_data) < index_size:
1674 1688 msg = _(b'too few index data for %s: got %d, expected %d')
1675 1689 msg %= (self.display_id, len(index_data), index_size)
1676 1690 raise error.RevlogError(msg)
1677 1691
1678 1692 self._inline = False
1679 1693 # generaldelta implied by version 2 revlogs.
1680 1694 self.delta_config.general_delta = True
1681 1695 # the logic for persistent nodemap will be dealt with within the
1682 1696 # main docket, so disable it for now.
1683 1697 self._nodemap_file = None
1684 1698
1685 1699 if self._docket is not None:
1686 1700 self._datafile = self._docket.data_filepath()
1687 1701 self._sidedatafile = self._docket.sidedata_filepath()
1688 1702 elif self.postfix is None:
1689 1703 self._datafile = b'%s.d' % self.radix
1690 1704 else:
1691 1705 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1692 1706
1693 1707 self.nodeconstants = sha1nodeconstants
1694 1708 self.nullid = self.nodeconstants.nullid
1695 1709
1696 1710 # sparse-revlog can't be on without general-delta (issue6056)
1697 1711 if not self.delta_config.general_delta:
1698 1712 self.delta_config.sparse_revlog = False
1699 1713
1700 1714 self._storedeltachains = True
1701 1715
1702 1716 devel_nodemap = (
1703 1717 self._nodemap_file
1704 1718 and force_nodemap
1705 1719 and parse_index_v1_nodemap is not None
1706 1720 )
1707 1721
1708 1722 use_rust_index = False
1709 1723 if rustrevlog is not None and self._nodemap_file is not None:
1710 1724 # we would like to use the rust_index in all case, especially
1711 1725 # because it is necessary for AncestorsIterator and LazyAncestors
1712 1726 # since the 6.7 cycle.
1713 1727 #
1714 1728 # However, the performance impact of inconditionnaly building the
1715 1729 # nodemap is currently a problem for non-persistent nodemap
1716 1730 # repository.
1717 1731 use_rust_index = True
1718 1732
1719 1733 self._parse_index = parse_index_v1
1720 1734 if self._format_version == REVLOGV0:
1721 1735 self._parse_index = revlogv0.parse_index_v0
1722 1736 elif self._format_version == REVLOGV2:
1723 1737 self._parse_index = parse_index_v2
1724 1738 elif self._format_version == CHANGELOGV2:
1725 1739 self._parse_index = parse_index_cl_v2
1726 1740 elif devel_nodemap:
1727 1741 self._parse_index = parse_index_v1_nodemap
1728 1742 elif use_rust_index:
1729 1743 self._parse_index = functools.partial(
1730 1744 parse_index_v1_rust, default_header=new_header
1731 1745 )
1732 1746 try:
1733 1747 d = self._parse_index(index_data, self._inline)
1734 1748 index, chunkcache = d
1735 1749 use_nodemap = (
1736 1750 not self._inline
1737 1751 and self._nodemap_file is not None
1738 1752 and hasattr(index, 'update_nodemap_data')
1739 1753 )
1740 1754 if use_nodemap:
1741 1755 nodemap_data = nodemaputil.persisted_data(self)
1742 1756 if nodemap_data is not None:
1743 1757 docket = nodemap_data[0]
1744 1758 if (
1745 1759 len(d[0]) > docket.tip_rev
1746 1760 and d[0][docket.tip_rev][7] == docket.tip_node
1747 1761 ):
1748 1762 # no changelog tampering
1749 1763 self._nodemap_docket = docket
1750 1764 index.update_nodemap_data(*nodemap_data)
1751 1765 except (ValueError, IndexError):
1752 1766 raise error.RevlogError(
1753 1767 _(b"index %s is corrupted") % self.display_id
1754 1768 )
1755 1769 self.index = index
1756 1770 # revnum -> (chain-length, sum-delta-length)
1757 1771 self._chaininfocache = util.lrucachedict(500)
1758 1772
1759 1773 return chunkcache
1760 1774
1761 1775 def _load_inner(self, chunk_cache):
1762 1776 if self._docket is None:
1763 1777 default_compression_header = None
1764 1778 else:
1765 1779 default_compression_header = self._docket.default_compression_header
1766 1780
1767 1781 self._inner = _InnerRevlog(
1768 1782 opener=self.opener,
1769 1783 index=self.index,
1770 1784 index_file=self._indexfile,
1771 1785 data_file=self._datafile,
1772 1786 sidedata_file=self._sidedatafile,
1773 1787 inline=self._inline,
1774 1788 data_config=self.data_config,
1775 1789 delta_config=self.delta_config,
1776 1790 feature_config=self.feature_config,
1777 1791 chunk_cache=chunk_cache,
1778 1792 default_compression_header=default_compression_header,
1779 1793 )
1780 1794
1781 1795 def get_revlog(self):
1782 1796 """simple function to mirror API of other not-really-revlog API"""
1783 1797 return self
1784 1798
1785 1799 @util.propertycache
1786 1800 def revlog_kind(self):
1787 1801 return self.target[0]
1788 1802
1789 1803 @util.propertycache
1790 1804 def display_id(self):
1791 1805 """The public facing "ID" of the revlog that we use in message"""
1792 1806 if self.revlog_kind == KIND_FILELOG:
1793 1807 # Reference the file without the "data/" prefix, so it is familiar
1794 1808 # to the user.
1795 1809 return self.target[1]
1796 1810 else:
1797 1811 return self.radix
1798 1812
1799 1813 def _datafp(self, mode=b'r'):
1800 1814 """file object for the revlog's data file"""
1801 1815 return self.opener(self._datafile, mode=mode)
1802 1816
1803 1817 def tiprev(self):
1804 1818 return len(self.index) - 1
1805 1819
1806 1820 def tip(self):
1807 1821 return self.node(self.tiprev())
1808 1822
1809 1823 def __contains__(self, rev):
1810 1824 return 0 <= rev < len(self)
1811 1825
1812 1826 def __len__(self):
1813 1827 return len(self.index)
1814 1828
1815 1829 def __iter__(self):
1816 1830 return iter(range(len(self)))
1817 1831
1818 1832 def revs(self, start=0, stop=None):
1819 1833 """iterate over all rev in this revlog (from start to stop)"""
1820 1834 return storageutil.iterrevs(len(self), start=start, stop=stop)
1821 1835
1822 1836 def hasnode(self, node):
1823 1837 try:
1824 1838 self.rev(node)
1825 1839 return True
1826 1840 except KeyError:
1827 1841 return False
1828 1842
1829 1843 def _candelta(self, baserev, rev):
1830 1844 """whether two revisions (baserev, rev) can be delta-ed or not"""
1831 1845 # Disable delta if either rev requires a content-changing flag
1832 1846 # processor (ex. LFS). This is because such flag processor can alter
1833 1847 # the rawtext content that the delta will be based on, and two clients
1834 1848 # could have a same revlog node with different flags (i.e. different
1835 1849 # rawtext contents) and the delta could be incompatible.
1836 1850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1837 1851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1838 1852 ):
1839 1853 return False
1840 1854 return True
1841 1855
1842 1856 def update_caches(self, transaction):
1843 1857 """update on disk cache
1844 1858
1845 1859 If a transaction is passed, the update may be delayed to transaction
1846 1860 commit."""
1847 1861 if self._nodemap_file is not None:
1848 1862 if transaction is None:
1849 1863 nodemaputil.update_persistent_nodemap(self)
1850 1864 else:
1851 1865 nodemaputil.setup_persistent_nodemap(transaction, self)
1852 1866
1853 1867 def clearcaches(self):
1854 1868 """Clear in-memory caches"""
1855 1869 self._chainbasecache.clear()
1856 1870 self._inner.clear_cache()
1857 1871 self._pcache = {}
1858 1872 self._nodemap_docket = None
1859 1873 self.index.clearcaches()
1860 1874 # The python code is the one responsible for validating the docket, we
1861 1875 # end up having to refresh it here.
1862 1876 use_nodemap = (
1863 1877 not self._inline
1864 1878 and self._nodemap_file is not None
1865 1879 and hasattr(self.index, 'update_nodemap_data')
1866 1880 )
1867 1881 if use_nodemap:
1868 1882 nodemap_data = nodemaputil.persisted_data(self)
1869 1883 if nodemap_data is not None:
1870 1884 self._nodemap_docket = nodemap_data[0]
1871 1885 self.index.update_nodemap_data(*nodemap_data)
1872 1886
1873 1887 def rev(self, node):
1874 1888 """return the revision number associated with a <nodeid>"""
1875 1889 try:
1876 1890 return self.index.rev(node)
1877 1891 except TypeError:
1878 1892 raise
1879 1893 except error.RevlogError:
1880 1894 # parsers.c radix tree lookup failed
1881 1895 if (
1882 1896 node == self.nodeconstants.wdirid
1883 1897 or node in self.nodeconstants.wdirfilenodeids
1884 1898 ):
1885 1899 raise error.WdirUnsupported
1886 1900 raise error.LookupError(node, self.display_id, _(b'no node'))
1887 1901
1888 1902 # Accessors for index entries.
1889 1903
1890 1904 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1891 1905 # are flags.
1892 1906 def start(self, rev):
1893 1907 return int(self.index[rev][0] >> 16)
1894 1908
1895 1909 def sidedata_cut_off(self, rev):
1896 1910 sd_cut_off = self.index[rev][8]
1897 1911 if sd_cut_off != 0:
1898 1912 return sd_cut_off
1899 1913 # This is some annoying dance, because entries without sidedata
1900 1914 # currently use 0 as their ofsset. (instead of previous-offset +
1901 1915 # previous-size)
1902 1916 #
1903 1917 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1904 1918 # In the meantime, we need this.
1905 1919 while 0 <= rev:
1906 1920 e = self.index[rev]
1907 1921 if e[9] != 0:
1908 1922 return e[8] + e[9]
1909 1923 rev -= 1
1910 1924 return 0
1911 1925
1912 1926 def flags(self, rev):
1913 1927 return self.index[rev][0] & 0xFFFF
1914 1928
1915 1929 def length(self, rev):
1916 1930 return self.index[rev][1]
1917 1931
1918 1932 def sidedata_length(self, rev):
1919 1933 if not self.feature_config.has_side_data:
1920 1934 return 0
1921 1935 return self.index[rev][9]
1922 1936
1923 1937 def rawsize(self, rev):
1924 1938 """return the length of the uncompressed text for a given revision"""
1925 1939 l = self.index[rev][2]
1926 1940 if l >= 0:
1927 1941 return l
1928 1942
1929 1943 t = self.rawdata(rev)
1930 1944 return len(t)
1931 1945
1932 1946 def size(self, rev):
1933 1947 """length of non-raw text (processed by a "read" flag processor)"""
1934 1948 # fast path: if no "read" flag processor could change the content,
1935 1949 # size is rawsize. note: ELLIPSIS is known to not change the content.
1936 1950 flags = self.flags(rev)
1937 1951 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1938 1952 return self.rawsize(rev)
1939 1953
1940 1954 return len(self.revision(rev))
1941 1955
1942 1956 def fast_rank(self, rev):
1943 1957 """Return the rank of a revision if already known, or None otherwise.
1944 1958
1945 1959 The rank of a revision is the size of the sub-graph it defines as a
1946 1960 head. Equivalently, the rank of a revision `r` is the size of the set
1947 1961 `ancestors(r)`, `r` included.
1948 1962
1949 1963 This method returns the rank retrieved from the revlog in constant
1950 1964 time. It makes no attempt at computing unknown values for versions of
1951 1965 the revlog which do not persist the rank.
1952 1966 """
1953 1967 rank = self.index[rev][ENTRY_RANK]
1954 1968 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1955 1969 return None
1956 1970 if rev == nullrev:
1957 1971 return 0 # convention
1958 1972 return rank
1959 1973
1960 1974 def chainbase(self, rev):
1961 1975 base = self._chainbasecache.get(rev)
1962 1976 if base is not None:
1963 1977 return base
1964 1978
1965 1979 index = self.index
1966 1980 iterrev = rev
1967 1981 base = index[iterrev][3]
1968 1982 while base != iterrev:
1969 1983 iterrev = base
1970 1984 base = index[iterrev][3]
1971 1985
1972 1986 self._chainbasecache[rev] = base
1973 1987 return base
1974 1988
1975 1989 def linkrev(self, rev):
1976 1990 return self.index[rev][4]
1977 1991
1978 1992 def parentrevs(self, rev):
1979 1993 try:
1980 1994 entry = self.index[rev]
1981 1995 except IndexError:
1982 1996 if rev == wdirrev:
1983 1997 raise error.WdirUnsupported
1984 1998 raise
1985 1999
1986 2000 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1987 2001 return entry[6], entry[5]
1988 2002 else:
1989 2003 return entry[5], entry[6]
1990 2004
1991 2005 # fast parentrevs(rev) where rev isn't filtered
1992 2006 _uncheckedparentrevs = parentrevs
1993 2007
1994 2008 def node(self, rev):
1995 2009 try:
1996 2010 return self.index[rev][7]
1997 2011 except IndexError:
1998 2012 if rev == wdirrev:
1999 2013 raise error.WdirUnsupported
2000 2014 raise
2001 2015
2002 2016 # Derived from index values.
2003 2017
2004 2018 def end(self, rev):
2005 2019 return self.start(rev) + self.length(rev)
2006 2020
2007 2021 def parents(self, node):
2008 2022 i = self.index
2009 2023 d = i[self.rev(node)]
2010 2024 # inline node() to avoid function call overhead
2011 2025 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2012 2026 return i[d[6]][7], i[d[5]][7]
2013 2027 else:
2014 2028 return i[d[5]][7], i[d[6]][7]
2015 2029
2016 2030 def chainlen(self, rev):
2017 2031 return self._chaininfo(rev)[0]
2018 2032
2019 2033 def _chaininfo(self, rev):
2020 2034 chaininfocache = self._chaininfocache
2021 2035 if rev in chaininfocache:
2022 2036 return chaininfocache[rev]
2023 2037 index = self.index
2024 2038 generaldelta = self.delta_config.general_delta
2025 2039 iterrev = rev
2026 2040 e = index[iterrev]
2027 2041 clen = 0
2028 2042 compresseddeltalen = 0
2029 2043 while iterrev != e[3]:
2030 2044 clen += 1
2031 2045 compresseddeltalen += e[1]
2032 2046 if generaldelta:
2033 2047 iterrev = e[3]
2034 2048 else:
2035 2049 iterrev -= 1
2036 2050 if iterrev in chaininfocache:
2037 2051 t = chaininfocache[iterrev]
2038 2052 clen += t[0]
2039 2053 compresseddeltalen += t[1]
2040 2054 break
2041 2055 e = index[iterrev]
2042 2056 else:
2043 2057 # Add text length of base since decompressing that also takes
2044 2058 # work. For cache hits the length is already included.
2045 2059 compresseddeltalen += e[1]
2046 2060 r = (clen, compresseddeltalen)
2047 2061 chaininfocache[rev] = r
2048 2062 return r
2049 2063
2050 2064 def _deltachain(self, rev, stoprev=None):
2051 2065 return self._inner._deltachain(rev, stoprev=stoprev)
2052 2066
2053 2067 def ancestors(self, revs, stoprev=0, inclusive=False):
2054 2068 """Generate the ancestors of 'revs' in reverse revision order.
2055 2069 Does not generate revs lower than stoprev.
2056 2070
2057 2071 See the documentation for ancestor.lazyancestors for more details."""
2058 2072
2059 2073 # first, make sure start revisions aren't filtered
2060 2074 revs = list(revs)
2061 2075 checkrev = self.node
2062 2076 for r in revs:
2063 2077 checkrev(r)
2064 2078 # and we're sure ancestors aren't filtered as well
2065 2079
2066 2080 if rustancestor is not None and self.index.rust_ext_compat:
2067 2081 lazyancestors = rustancestor.LazyAncestors
2068 2082 arg = self.index
2069 2083 else:
2070 2084 lazyancestors = ancestor.lazyancestors
2071 2085 arg = self._uncheckedparentrevs
2072 2086 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2073 2087
2074 2088 def descendants(self, revs):
2075 2089 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2076 2090
2077 2091 def findcommonmissing(self, common=None, heads=None):
2078 2092 """Return a tuple of the ancestors of common and the ancestors of heads
2079 2093 that are not ancestors of common. In revset terminology, we return the
2080 2094 tuple:
2081 2095
2082 2096 ::common, (::heads) - (::common)
2083 2097
2084 2098 The list is sorted by revision number, meaning it is
2085 2099 topologically sorted.
2086 2100
2087 2101 'heads' and 'common' are both lists of node IDs. If heads is
2088 2102 not supplied, uses all of the revlog's heads. If common is not
2089 2103 supplied, uses nullid."""
2090 2104 if common is None:
2091 2105 common = [self.nullid]
2092 2106 if heads is None:
2093 2107 heads = self.heads()
2094 2108
2095 2109 common = [self.rev(n) for n in common]
2096 2110 heads = [self.rev(n) for n in heads]
2097 2111
2098 2112 # we want the ancestors, but inclusive
2099 2113 class lazyset:
2100 2114 def __init__(self, lazyvalues):
2101 2115 self.addedvalues = set()
2102 2116 self.lazyvalues = lazyvalues
2103 2117
2104 2118 def __contains__(self, value):
2105 2119 return value in self.addedvalues or value in self.lazyvalues
2106 2120
2107 2121 def __iter__(self):
2108 2122 added = self.addedvalues
2109 2123 for r in added:
2110 2124 yield r
2111 2125 for r in self.lazyvalues:
2112 2126 if not r in added:
2113 2127 yield r
2114 2128
2115 2129 def add(self, value):
2116 2130 self.addedvalues.add(value)
2117 2131
2118 2132 def update(self, values):
2119 2133 self.addedvalues.update(values)
2120 2134
2121 2135 has = lazyset(self.ancestors(common))
2122 2136 has.add(nullrev)
2123 2137 has.update(common)
2124 2138
2125 2139 # take all ancestors from heads that aren't in has
2126 2140 missing = set()
2127 2141 visit = collections.deque(r for r in heads if r not in has)
2128 2142 while visit:
2129 2143 r = visit.popleft()
2130 2144 if r in missing:
2131 2145 continue
2132 2146 else:
2133 2147 missing.add(r)
2134 2148 for p in self.parentrevs(r):
2135 2149 if p not in has:
2136 2150 visit.append(p)
2137 2151 missing = list(missing)
2138 2152 missing.sort()
2139 2153 return has, [self.node(miss) for miss in missing]
2140 2154
2141 2155 def incrementalmissingrevs(self, common=None):
2142 2156 """Return an object that can be used to incrementally compute the
2143 2157 revision numbers of the ancestors of arbitrary sets that are not
2144 2158 ancestors of common. This is an ancestor.incrementalmissingancestors
2145 2159 object.
2146 2160
2147 2161 'common' is a list of revision numbers. If common is not supplied, uses
2148 2162 nullrev.
2149 2163 """
2150 2164 if common is None:
2151 2165 common = [nullrev]
2152 2166
2153 2167 if rustancestor is not None and self.index.rust_ext_compat:
2154 2168 return rustancestor.MissingAncestors(self.index, common)
2155 2169 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2156 2170
2157 2171 def findmissingrevs(self, common=None, heads=None):
2158 2172 """Return the revision numbers of the ancestors of heads that
2159 2173 are not ancestors of common.
2160 2174
2161 2175 More specifically, return a list of revision numbers corresponding to
2162 2176 nodes N such that every N satisfies the following constraints:
2163 2177
2164 2178 1. N is an ancestor of some node in 'heads'
2165 2179 2. N is not an ancestor of any node in 'common'
2166 2180
2167 2181 The list is sorted by revision number, meaning it is
2168 2182 topologically sorted.
2169 2183
2170 2184 'heads' and 'common' are both lists of revision numbers. If heads is
2171 2185 not supplied, uses all of the revlog's heads. If common is not
2172 2186 supplied, uses nullid."""
2173 2187 if common is None:
2174 2188 common = [nullrev]
2175 2189 if heads is None:
2176 2190 heads = self.headrevs()
2177 2191
2178 2192 inc = self.incrementalmissingrevs(common=common)
2179 2193 return inc.missingancestors(heads)
2180 2194
2181 2195 def findmissing(self, common=None, heads=None):
2182 2196 """Return the ancestors of heads that are not ancestors of common.
2183 2197
2184 2198 More specifically, return a list of nodes N such that every N
2185 2199 satisfies the following constraints:
2186 2200
2187 2201 1. N is an ancestor of some node in 'heads'
2188 2202 2. N is not an ancestor of any node in 'common'
2189 2203
2190 2204 The list is sorted by revision number, meaning it is
2191 2205 topologically sorted.
2192 2206
2193 2207 'heads' and 'common' are both lists of node IDs. If heads is
2194 2208 not supplied, uses all of the revlog's heads. If common is not
2195 2209 supplied, uses nullid."""
2196 2210 if common is None:
2197 2211 common = [self.nullid]
2198 2212 if heads is None:
2199 2213 heads = self.heads()
2200 2214
2201 2215 common = [self.rev(n) for n in common]
2202 2216 heads = [self.rev(n) for n in heads]
2203 2217
2204 2218 inc = self.incrementalmissingrevs(common=common)
2205 2219 return [self.node(r) for r in inc.missingancestors(heads)]
2206 2220
2207 2221 def nodesbetween(self, roots=None, heads=None):
2208 2222 """Return a topological path from 'roots' to 'heads'.
2209 2223
2210 2224 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2211 2225 topologically sorted list of all nodes N that satisfy both of
2212 2226 these constraints:
2213 2227
2214 2228 1. N is a descendant of some node in 'roots'
2215 2229 2. N is an ancestor of some node in 'heads'
2216 2230
2217 2231 Every node is considered to be both a descendant and an ancestor
2218 2232 of itself, so every reachable node in 'roots' and 'heads' will be
2219 2233 included in 'nodes'.
2220 2234
2221 2235 'outroots' is the list of reachable nodes in 'roots', i.e., the
2222 2236 subset of 'roots' that is returned in 'nodes'. Likewise,
2223 2237 'outheads' is the subset of 'heads' that is also in 'nodes'.
2224 2238
2225 2239 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2226 2240 unspecified, uses nullid as the only root. If 'heads' is
2227 2241 unspecified, uses list of all of the revlog's heads."""
2228 2242 nonodes = ([], [], [])
2229 2243 if roots is not None:
2230 2244 roots = list(roots)
2231 2245 if not roots:
2232 2246 return nonodes
2233 2247 lowestrev = min([self.rev(n) for n in roots])
2234 2248 else:
2235 2249 roots = [self.nullid] # Everybody's a descendant of nullid
2236 2250 lowestrev = nullrev
2237 2251 if (lowestrev == nullrev) and (heads is None):
2238 2252 # We want _all_ the nodes!
2239 2253 return (
2240 2254 [self.node(r) for r in self],
2241 2255 [self.nullid],
2242 2256 list(self.heads()),
2243 2257 )
2244 2258 if heads is None:
2245 2259 # All nodes are ancestors, so the latest ancestor is the last
2246 2260 # node.
2247 2261 highestrev = len(self) - 1
2248 2262 # Set ancestors to None to signal that every node is an ancestor.
2249 2263 ancestors = None
2250 2264 # Set heads to an empty dictionary for later discovery of heads
2251 2265 heads = {}
2252 2266 else:
2253 2267 heads = list(heads)
2254 2268 if not heads:
2255 2269 return nonodes
2256 2270 ancestors = set()
2257 2271 # Turn heads into a dictionary so we can remove 'fake' heads.
2258 2272 # Also, later we will be using it to filter out the heads we can't
2259 2273 # find from roots.
2260 2274 heads = dict.fromkeys(heads, False)
2261 2275 # Start at the top and keep marking parents until we're done.
2262 2276 nodestotag = set(heads)
2263 2277 # Remember where the top was so we can use it as a limit later.
2264 2278 highestrev = max([self.rev(n) for n in nodestotag])
2265 2279 while nodestotag:
2266 2280 # grab a node to tag
2267 2281 n = nodestotag.pop()
2268 2282 # Never tag nullid
2269 2283 if n == self.nullid:
2270 2284 continue
2271 2285 # A node's revision number represents its place in a
2272 2286 # topologically sorted list of nodes.
2273 2287 r = self.rev(n)
2274 2288 if r >= lowestrev:
2275 2289 if n not in ancestors:
2276 2290 # If we are possibly a descendant of one of the roots
2277 2291 # and we haven't already been marked as an ancestor
2278 2292 ancestors.add(n) # Mark as ancestor
2279 2293 # Add non-nullid parents to list of nodes to tag.
2280 2294 nodestotag.update(
2281 2295 [p for p in self.parents(n) if p != self.nullid]
2282 2296 )
2283 2297 elif n in heads: # We've seen it before, is it a fake head?
2284 2298 # So it is, real heads should not be the ancestors of
2285 2299 # any other heads.
2286 2300 heads.pop(n)
2287 2301 if not ancestors:
2288 2302 return nonodes
2289 2303 # Now that we have our set of ancestors, we want to remove any
2290 2304 # roots that are not ancestors.
2291 2305
2292 2306 # If one of the roots was nullid, everything is included anyway.
2293 2307 if lowestrev > nullrev:
2294 2308 # But, since we weren't, let's recompute the lowest rev to not
2295 2309 # include roots that aren't ancestors.
2296 2310
2297 2311 # Filter out roots that aren't ancestors of heads
2298 2312 roots = [root for root in roots if root in ancestors]
2299 2313 # Recompute the lowest revision
2300 2314 if roots:
2301 2315 lowestrev = min([self.rev(root) for root in roots])
2302 2316 else:
2303 2317 # No more roots? Return empty list
2304 2318 return nonodes
2305 2319 else:
2306 2320 # We are descending from nullid, and don't need to care about
2307 2321 # any other roots.
2308 2322 lowestrev = nullrev
2309 2323 roots = [self.nullid]
2310 2324 # Transform our roots list into a set.
2311 2325 descendants = set(roots)
2312 2326 # Also, keep the original roots so we can filter out roots that aren't
2313 2327 # 'real' roots (i.e. are descended from other roots).
2314 2328 roots = descendants.copy()
2315 2329 # Our topologically sorted list of output nodes.
2316 2330 orderedout = []
2317 2331 # Don't start at nullid since we don't want nullid in our output list,
2318 2332 # and if nullid shows up in descendants, empty parents will look like
2319 2333 # they're descendants.
2320 2334 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2321 2335 n = self.node(r)
2322 2336 isdescendant = False
2323 2337 if lowestrev == nullrev: # Everybody is a descendant of nullid
2324 2338 isdescendant = True
2325 2339 elif n in descendants:
2326 2340 # n is already a descendant
2327 2341 isdescendant = True
2328 2342 # This check only needs to be done here because all the roots
2329 2343 # will start being marked is descendants before the loop.
2330 2344 if n in roots:
2331 2345 # If n was a root, check if it's a 'real' root.
2332 2346 p = tuple(self.parents(n))
2333 2347 # If any of its parents are descendants, it's not a root.
2334 2348 if (p[0] in descendants) or (p[1] in descendants):
2335 2349 roots.remove(n)
2336 2350 else:
2337 2351 p = tuple(self.parents(n))
2338 2352 # A node is a descendant if either of its parents are
2339 2353 # descendants. (We seeded the dependents list with the roots
2340 2354 # up there, remember?)
2341 2355 if (p[0] in descendants) or (p[1] in descendants):
2342 2356 descendants.add(n)
2343 2357 isdescendant = True
2344 2358 if isdescendant and ((ancestors is None) or (n in ancestors)):
2345 2359 # Only include nodes that are both descendants and ancestors.
2346 2360 orderedout.append(n)
2347 2361 if (ancestors is not None) and (n in heads):
2348 2362 # We're trying to figure out which heads are reachable
2349 2363 # from roots.
2350 2364 # Mark this head as having been reached
2351 2365 heads[n] = True
2352 2366 elif ancestors is None:
2353 2367 # Otherwise, we're trying to discover the heads.
2354 2368 # Assume this is a head because if it isn't, the next step
2355 2369 # will eventually remove it.
2356 2370 heads[n] = True
2357 2371 # But, obviously its parents aren't.
2358 2372 for p in self.parents(n):
2359 2373 heads.pop(p, None)
2360 2374 heads = [head for head, flag in heads.items() if flag]
2361 2375 roots = list(roots)
2362 2376 assert orderedout
2363 2377 assert roots
2364 2378 assert heads
2365 2379 return (orderedout, roots, heads)
2366 2380
2367 2381 def headrevs(self, revs=None):
2368 2382 if revs is None:
2369 2383 try:
2370 2384 return self.index.headrevs()
2371 2385 except AttributeError:
2372 2386 return self._headrevs()
2373 2387 if rustdagop is not None and self.index.rust_ext_compat:
2374 2388 return rustdagop.headrevs(self.index, revs)
2375 2389 return dagop.headrevs(revs, self._uncheckedparentrevs)
2376 2390
2377 2391 def headrevsdiff(self, start, stop):
2378 2392 try:
2379 2393 return self.index.headrevsdiff(start, stop)
2380 2394 except AttributeError:
2381 2395 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2382 2396
2383 2397 def computephases(self, roots):
2384 2398 return self.index.computephasesmapsets(roots)
2385 2399
2386 2400 def _headrevs(self):
2387 2401 count = len(self)
2388 2402 if not count:
2389 2403 return [nullrev]
2390 2404 # we won't iter over filtered rev so nobody is a head at start
2391 2405 ishead = [0] * (count + 1)
2392 2406 index = self.index
2393 2407 for r in self:
2394 2408 ishead[r] = 1 # I may be an head
2395 2409 e = index[r]
2396 2410 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2397 2411 return [r for r, val in enumerate(ishead) if val]
2398 2412
2399 2413 def _head_node_ids(self):
2400 2414 try:
2401 2415 return self.index.head_node_ids()
2402 2416 except AttributeError:
2403 2417 return [self.node(r) for r in self.headrevs()]
2404 2418
2405 2419 def heads(self, start=None, stop=None):
2406 2420 """return the list of all nodes that have no children
2407 2421
2408 2422 if start is specified, only heads that are descendants of
2409 2423 start will be returned
2410 2424 if stop is specified, it will consider all the revs from stop
2411 2425 as if they had no children
2412 2426 """
2413 2427 if start is None and stop is None:
2414 2428 if not len(self):
2415 2429 return [self.nullid]
2416 2430 return self._head_node_ids()
2417 2431 if start is None:
2418 2432 start = nullrev
2419 2433 else:
2420 2434 start = self.rev(start)
2421 2435
2422 2436 stoprevs = {self.rev(n) for n in stop or []}
2423 2437
2424 2438 revs = dagop.headrevssubset(
2425 2439 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2426 2440 )
2427 2441
2428 2442 return [self.node(rev) for rev in revs]
2429 2443
2430 2444 def diffheads(self, start, stop):
2431 2445 """return the nodes that make up the difference between
2432 2446 heads of revs before `start` and heads of revs before `stop`"""
2433 2447 removed, added = self.headrevsdiff(start, stop)
2434 2448 return [self.node(r) for r in removed], [self.node(r) for r in added]
2435 2449
2436 2450 def children(self, node):
2437 2451 """find the children of a given node"""
2438 2452 c = []
2439 2453 p = self.rev(node)
2440 2454 for r in self.revs(start=p + 1):
2441 2455 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2442 2456 if prevs:
2443 2457 for pr in prevs:
2444 2458 if pr == p:
2445 2459 c.append(self.node(r))
2446 2460 elif p == nullrev:
2447 2461 c.append(self.node(r))
2448 2462 return c
2449 2463
2450 2464 def commonancestorsheads(self, a, b):
2451 2465 """calculate all the heads of the common ancestors of nodes a and b"""
2452 2466 a, b = self.rev(a), self.rev(b)
2453 2467 ancs = self._commonancestorsheads(a, b)
2454 2468 return pycompat.maplist(self.node, ancs)
2455 2469
2456 2470 def _commonancestorsheads(self, *revs):
2457 2471 """calculate all the heads of the common ancestors of revs"""
2458 2472 try:
2459 2473 ancs = self.index.commonancestorsheads(*revs)
2460 2474 except (AttributeError, OverflowError): # C implementation failed
2461 2475 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2462 2476 return ancs
2463 2477
2464 2478 def isancestor(self, a, b):
2465 2479 """return True if node a is an ancestor of node b
2466 2480
2467 2481 A revision is considered an ancestor of itself."""
2468 2482 a, b = self.rev(a), self.rev(b)
2469 2483 return self.isancestorrev(a, b)
2470 2484
2471 2485 def isancestorrev(self, a, b):
2472 2486 """return True if revision a is an ancestor of revision b
2473 2487
2474 2488 A revision is considered an ancestor of itself.
2475 2489
2476 2490 The implementation of this is trivial but the use of
2477 2491 reachableroots is not."""
2478 2492 if a == nullrev:
2479 2493 return True
2480 2494 elif a == b:
2481 2495 return True
2482 2496 elif a > b:
2483 2497 return False
2484 2498 return bool(self.reachableroots(a, [b], [a], includepath=False))
2485 2499
2486 2500 def reachableroots(self, minroot, heads, roots, includepath=False):
2487 2501 """return (heads(::(<roots> and <roots>::<heads>)))
2488 2502
2489 2503 If includepath is True, return (<roots>::<heads>)."""
2490 2504 try:
2491 2505 return self.index.reachableroots2(
2492 2506 minroot, heads, roots, includepath
2493 2507 )
2494 2508 except AttributeError:
2495 2509 return dagop._reachablerootspure(
2496 2510 self.parentrevs, minroot, roots, heads, includepath
2497 2511 )
2498 2512
2499 2513 def ancestor(self, a, b):
2500 2514 """calculate the "best" common ancestor of nodes a and b"""
2501 2515
2502 2516 a, b = self.rev(a), self.rev(b)
2503 2517 try:
2504 2518 ancs = self.index.ancestors(a, b)
2505 2519 except (AttributeError, OverflowError):
2506 2520 ancs = ancestor.ancestors(self.parentrevs, a, b)
2507 2521 if ancs:
2508 2522 # choose a consistent winner when there's a tie
2509 2523 return min(map(self.node, ancs))
2510 2524 return self.nullid
2511 2525
2512 2526 def _match(self, id):
2513 2527 if isinstance(id, int):
2514 2528 # rev
2515 2529 return self.node(id)
2516 2530 if len(id) == self.nodeconstants.nodelen:
2517 2531 # possibly a binary node
2518 2532 # odds of a binary node being all hex in ASCII are 1 in 10**25
2519 2533 try:
2520 2534 node = id
2521 2535 self.rev(node) # quick search the index
2522 2536 return node
2523 2537 except error.LookupError:
2524 2538 pass # may be partial hex id
2525 2539 try:
2526 2540 # str(rev)
2527 2541 rev = int(id)
2528 2542 if b"%d" % rev != id:
2529 2543 raise ValueError
2530 2544 if rev < 0:
2531 2545 rev = len(self) + rev
2532 2546 if rev < 0 or rev >= len(self):
2533 2547 raise ValueError
2534 2548 return self.node(rev)
2535 2549 except (ValueError, OverflowError):
2536 2550 pass
2537 2551 if len(id) == 2 * self.nodeconstants.nodelen:
2538 2552 try:
2539 2553 # a full hex nodeid?
2540 2554 node = bin(id)
2541 2555 self.rev(node)
2542 2556 return node
2543 2557 except (binascii.Error, error.LookupError):
2544 2558 pass
2545 2559
2546 2560 def _partialmatch(self, id):
2547 2561 # we don't care wdirfilenodeids as they should be always full hash
2548 2562 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2549 2563 ambiguous = False
2550 2564 try:
2551 2565 partial = self.index.partialmatch(id)
2552 2566 if partial and self.hasnode(partial):
2553 2567 if maybewdir:
2554 2568 # single 'ff...' match in radix tree, ambiguous with wdir
2555 2569 ambiguous = True
2556 2570 else:
2557 2571 return partial
2558 2572 elif maybewdir:
2559 2573 # no 'ff...' match in radix tree, wdir identified
2560 2574 raise error.WdirUnsupported
2561 2575 else:
2562 2576 return None
2563 2577 except error.RevlogError:
2564 2578 # parsers.c radix tree lookup gave multiple matches
2565 2579 # fast path: for unfiltered changelog, radix tree is accurate
2566 2580 if not getattr(self, 'filteredrevs', None):
2567 2581 ambiguous = True
2568 2582 # fall through to slow path that filters hidden revisions
2569 2583 except (AttributeError, ValueError):
2570 2584 # we are pure python, or key is not hex
2571 2585 pass
2572 2586 if ambiguous:
2573 2587 raise error.AmbiguousPrefixLookupError(
2574 2588 id, self.display_id, _(b'ambiguous identifier')
2575 2589 )
2576 2590
2577 2591 if id in self._pcache:
2578 2592 return self._pcache[id]
2579 2593
2580 2594 if len(id) <= 40:
2581 2595 # hex(node)[:...]
2582 2596 l = len(id) // 2 * 2 # grab an even number of digits
2583 2597 try:
2584 2598 # we're dropping the last digit, so let's check that it's hex,
2585 2599 # to avoid the expensive computation below if it's not
2586 2600 if len(id) % 2 > 0:
2587 2601 if not (id[-1] in hexdigits):
2588 2602 return None
2589 2603 prefix = bin(id[:l])
2590 2604 except binascii.Error:
2591 2605 pass
2592 2606 else:
2593 2607 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2594 2608 nl = [
2595 2609 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2596 2610 ]
2597 2611 if self.nodeconstants.nullhex.startswith(id):
2598 2612 nl.append(self.nullid)
2599 2613 if len(nl) > 0:
2600 2614 if len(nl) == 1 and not maybewdir:
2601 2615 self._pcache[id] = nl[0]
2602 2616 return nl[0]
2603 2617 raise error.AmbiguousPrefixLookupError(
2604 2618 id, self.display_id, _(b'ambiguous identifier')
2605 2619 )
2606 2620 if maybewdir:
2607 2621 raise error.WdirUnsupported
2608 2622 return None
2609 2623
2610 2624 def lookup(self, id):
2611 2625 """locate a node based on:
2612 2626 - revision number or str(revision number)
2613 2627 - nodeid or subset of hex nodeid
2614 2628 """
2615 2629 n = self._match(id)
2616 2630 if n is not None:
2617 2631 return n
2618 2632 n = self._partialmatch(id)
2619 2633 if n:
2620 2634 return n
2621 2635
2622 2636 raise error.LookupError(id, self.display_id, _(b'no match found'))
2623 2637
2624 2638 def shortest(self, node, minlength=1):
2625 2639 """Find the shortest unambiguous prefix that matches node."""
2626 2640
2627 2641 def isvalid(prefix):
2628 2642 try:
2629 2643 matchednode = self._partialmatch(prefix)
2630 2644 except error.AmbiguousPrefixLookupError:
2631 2645 return False
2632 2646 except error.WdirUnsupported:
2633 2647 # single 'ff...' match
2634 2648 return True
2635 2649 if matchednode is None:
2636 2650 raise error.LookupError(node, self.display_id, _(b'no node'))
2637 2651 return True
2638 2652
2639 2653 def maybewdir(prefix):
2640 2654 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2641 2655
2642 2656 hexnode = hex(node)
2643 2657
2644 2658 def disambiguate(hexnode, minlength):
2645 2659 """Disambiguate against wdirid."""
2646 2660 for length in range(minlength, len(hexnode) + 1):
2647 2661 prefix = hexnode[:length]
2648 2662 if not maybewdir(prefix):
2649 2663 return prefix
2650 2664
2651 2665 if not getattr(self, 'filteredrevs', None):
2652 2666 try:
2653 2667 length = max(self.index.shortest(node), minlength)
2654 2668 return disambiguate(hexnode, length)
2655 2669 except error.RevlogError:
2656 2670 if node != self.nodeconstants.wdirid:
2657 2671 raise error.LookupError(
2658 2672 node, self.display_id, _(b'no node')
2659 2673 )
2660 2674 except AttributeError:
2661 2675 # Fall through to pure code
2662 2676 pass
2663 2677
2664 2678 if node == self.nodeconstants.wdirid:
2665 2679 for length in range(minlength, len(hexnode) + 1):
2666 2680 prefix = hexnode[:length]
2667 2681 if isvalid(prefix):
2668 2682 return prefix
2669 2683
2670 2684 for length in range(minlength, len(hexnode) + 1):
2671 2685 prefix = hexnode[:length]
2672 2686 if isvalid(prefix):
2673 2687 return disambiguate(hexnode, length)
2674 2688
2675 2689 def cmp(self, node, text):
2676 2690 """compare text with a given file revision
2677 2691
2678 2692 returns True if text is different than what is stored.
2679 2693 """
2680 2694 p1, p2 = self.parents(node)
2681 2695 return storageutil.hashrevisionsha1(text, p1, p2) != node
2682 2696
2683 2697 def deltaparent(self, rev):
2684 2698 """return deltaparent of the given revision"""
2685 2699 base = self.index[rev][3]
2686 2700 if base == rev:
2687 2701 return nullrev
2688 2702 elif self.delta_config.general_delta:
2689 2703 return base
2690 2704 else:
2691 2705 return rev - 1
2692 2706
2693 2707 def issnapshot(self, rev):
2694 2708 """tells whether rev is a snapshot"""
2695 2709 ret = self._inner.issnapshot(rev)
2696 2710 self.issnapshot = self._inner.issnapshot
2697 2711 return ret
2698 2712
2699 2713 def snapshotdepth(self, rev):
2700 2714 """number of snapshot in the chain before this one"""
2701 2715 if not self.issnapshot(rev):
2702 2716 raise error.ProgrammingError(b'revision %d not a snapshot')
2703 2717 return len(self._inner._deltachain(rev)[0]) - 1
2704 2718
2705 2719 def revdiff(self, rev1, rev2):
2706 2720 """return or calculate a delta between two revisions
2707 2721
2708 2722 The delta calculated is in binary form and is intended to be written to
2709 2723 revlog data directly. So this function needs raw revision data.
2710 2724 """
2711 2725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2712 2726 return bytes(self._inner._chunk(rev2))
2713 2727
2714 2728 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2715 2729
2716 2730 def revision(self, nodeorrev):
2717 2731 """return an uncompressed revision of a given node or revision
2718 2732 number.
2719 2733 """
2720 2734 return self._revisiondata(nodeorrev)
2721 2735
2722 2736 def sidedata(self, nodeorrev):
2723 2737 """a map of extra data related to the changeset but not part of the hash
2724 2738
2725 2739 This function currently return a dictionary. However, more advanced
2726 2740 mapping object will likely be used in the future for a more
2727 2741 efficient/lazy code.
2728 2742 """
2729 2743 # deal with <nodeorrev> argument type
2730 2744 if isinstance(nodeorrev, int):
2731 2745 rev = nodeorrev
2732 2746 else:
2733 2747 rev = self.rev(nodeorrev)
2734 2748 return self._sidedata(rev)
2735 2749
2736 2750 def _rawtext(self, node, rev):
2737 2751 """return the possibly unvalidated rawtext for a revision
2738 2752
2739 2753 returns (rev, rawtext, validated)
2740 2754 """
2741 2755 # Check if we have the entry in cache
2742 2756 # The cache entry looks like (node, rev, rawtext)
2743 2757 if self._inner._revisioncache:
2744 2758 if self._inner._revisioncache[0] == node:
2745 2759 return (rev, self._inner._revisioncache[2], True)
2746 2760
2747 2761 if rev is None:
2748 2762 rev = self.rev(node)
2749 2763
2750 2764 return self._inner.raw_text(node, rev)
2751 2765
2752 2766 def _revisiondata(self, nodeorrev, raw=False):
2753 2767 # deal with <nodeorrev> argument type
2754 2768 if isinstance(nodeorrev, int):
2755 2769 rev = nodeorrev
2756 2770 node = self.node(rev)
2757 2771 else:
2758 2772 node = nodeorrev
2759 2773 rev = None
2760 2774
2761 2775 # fast path the special `nullid` rev
2762 2776 if node == self.nullid:
2763 2777 return b""
2764 2778
2765 2779 # ``rawtext`` is the text as stored inside the revlog. Might be the
2766 2780 # revision or might need to be processed to retrieve the revision.
2767 2781 rev, rawtext, validated = self._rawtext(node, rev)
2768 2782
2769 2783 if raw and validated:
2770 2784 # if we don't want to process the raw text and that raw
2771 2785 # text is cached, we can exit early.
2772 2786 return rawtext
2773 2787 if rev is None:
2774 2788 rev = self.rev(node)
2775 2789 # the revlog's flag for this revision
2776 2790 # (usually alter its state or content)
2777 2791 flags = self.flags(rev)
2778 2792
2779 2793 if validated and flags == REVIDX_DEFAULT_FLAGS:
2780 2794 # no extra flags set, no flag processor runs, text = rawtext
2781 2795 return rawtext
2782 2796
2783 2797 if raw:
2784 2798 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2785 2799 text = rawtext
2786 2800 else:
2787 2801 r = flagutil.processflagsread(self, rawtext, flags)
2788 2802 text, validatehash = r
2789 2803 if validatehash:
2790 2804 self.checkhash(text, node, rev=rev)
2791 2805 if not validated:
2792 2806 self._inner._revisioncache = (node, rev, rawtext)
2793 2807
2794 2808 return text
2795 2809
2796 2810 def _sidedata(self, rev):
2797 2811 """Return the sidedata for a given revision number."""
2798 2812 sidedata_end = None
2799 2813 if self._docket is not None:
2800 2814 sidedata_end = self._docket.sidedata_end
2801 2815 return self._inner.sidedata(rev, sidedata_end)
2802 2816
2803 2817 def rawdata(self, nodeorrev):
2804 2818 """return an uncompressed raw data of a given node or revision number."""
2805 2819 return self._revisiondata(nodeorrev, raw=True)
2806 2820
2807 2821 def hash(self, text, p1, p2):
2808 2822 """Compute a node hash.
2809 2823
2810 2824 Available as a function so that subclasses can replace the hash
2811 2825 as needed.
2812 2826 """
2813 2827 return storageutil.hashrevisionsha1(text, p1, p2)
2814 2828
2815 2829 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2816 2830 """Check node hash integrity.
2817 2831
2818 2832 Available as a function so that subclasses can extend hash mismatch
2819 2833 behaviors as needed.
2820 2834 """
2821 2835 try:
2822 2836 if p1 is None and p2 is None:
2823 2837 p1, p2 = self.parents(node)
2824 2838 if node != self.hash(text, p1, p2):
2825 2839 # Clear the revision cache on hash failure. The revision cache
2826 2840 # only stores the raw revision and clearing the cache does have
2827 2841 # the side-effect that we won't have a cache hit when the raw
2828 2842 # revision data is accessed. But this case should be rare and
2829 2843 # it is extra work to teach the cache about the hash
2830 2844 # verification state.
2831 2845 if (
2832 2846 self._inner._revisioncache
2833 2847 and self._inner._revisioncache[0] == node
2834 2848 ):
2835 2849 self._inner._revisioncache = None
2836 2850
2837 2851 revornode = rev
2838 2852 if revornode is None:
2839 2853 revornode = templatefilters.short(hex(node))
2840 2854 raise error.RevlogError(
2841 2855 _(b"integrity check failed on %s:%s")
2842 2856 % (self.display_id, pycompat.bytestr(revornode))
2843 2857 )
2844 2858 except error.RevlogError:
2845 2859 if self.feature_config.censorable and storageutil.iscensoredtext(
2846 2860 text
2847 2861 ):
2848 2862 raise error.CensoredNodeError(self.display_id, node, text)
2849 2863 raise
2850 2864
2851 2865 @property
2852 2866 def _split_index_file(self):
2853 2867 """the path where to expect the index of an ongoing splitting operation
2854 2868
2855 2869 The file will only exist if a splitting operation is in progress, but
2856 2870 it is always expected at the same location."""
2857 2871 parts = self.radix.split(b'/')
2858 2872 if len(parts) > 1:
2859 2873 # adds a '-s' prefix to the ``data/` or `meta/` base
2860 2874 head = parts[0] + b'-s'
2861 2875 mids = parts[1:-1]
2862 2876 tail = parts[-1] + b'.i'
2863 2877 pieces = [head] + mids + [tail]
2864 2878 return b'/'.join(pieces)
2865 2879 else:
2866 2880 # the revlog is stored at the root of the store (changelog or
2867 2881 # manifest), no risk of collision.
2868 2882 return self.radix + b'.i.s'
2869 2883
2870 2884 def _enforceinlinesize(self, tr):
2871 2885 """Check if the revlog is too big for inline and convert if so.
2872 2886
2873 2887 This should be called after revisions are added to the revlog. If the
2874 2888 revlog has grown too large to be an inline revlog, it will convert it
2875 2889 to use multiple index and data files.
2876 2890 """
2877 2891 tiprev = len(self) - 1
2878 2892 total_size = self.start(tiprev) + self.length(tiprev)
2879 2893 if not self._inline or (self._may_inline and total_size < _maxinline):
2880 2894 return
2881 2895
2882 2896 if self._docket is not None:
2883 2897 msg = b"inline revlog should not have a docket"
2884 2898 raise error.ProgrammingError(msg)
2885 2899
2886 2900 # In the common case, we enforce inline size because the revlog has
2887 2901 # been appened too. And in such case, it must have an initial offset
2888 2902 # recorded in the transaction.
2889 2903 troffset = tr.findoffset(self._inner.canonical_index_file)
2890 2904 pre_touched = troffset is not None
2891 2905 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2892 2906 raise error.RevlogError(
2893 2907 _(b"%s not found in the transaction") % self._indexfile
2894 2908 )
2895 2909
2896 2910 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2897 2911 tr.add(self._datafile, 0)
2898 2912
2899 2913 new_index_file_path = None
2900 2914 old_index_file_path = self._indexfile
2901 2915 new_index_file_path = self._split_index_file
2902 2916 opener = self.opener
2903 2917 weak_self = weakref.ref(self)
2904 2918
2905 2919 # the "split" index replace the real index when the transaction is
2906 2920 # finalized
2907 2921 def finalize_callback(tr):
2908 2922 opener.rename(
2909 2923 new_index_file_path,
2910 2924 old_index_file_path,
2911 2925 checkambig=True,
2912 2926 )
2913 2927 maybe_self = weak_self()
2914 2928 if maybe_self is not None:
2915 2929 maybe_self._indexfile = old_index_file_path
2916 2930 maybe_self._inner.index_file = maybe_self._indexfile
2917 2931
2918 2932 def abort_callback(tr):
2919 2933 maybe_self = weak_self()
2920 2934 if maybe_self is not None:
2921 2935 maybe_self._indexfile = old_index_file_path
2922 2936 maybe_self._inner.inline = True
2923 2937 maybe_self._inner.index_file = old_index_file_path
2924 2938
2925 2939 tr.registertmp(new_index_file_path)
2926 2940 # we use 001 here to make this this happens after the finalisation of
2927 2941 # pending changelog write (using 000). Otherwise the two finalizer
2928 2942 # would step over each other and delete the changelog.i file.
2929 2943 if self.target[1] is not None:
2930 2944 callback_id = b'001-revlog-split-%d-%s' % self.target
2931 2945 else:
2932 2946 callback_id = b'001-revlog-split-%d' % self.target[0]
2933 2947 tr.addfinalize(callback_id, finalize_callback)
2934 2948 tr.addabort(callback_id, abort_callback)
2935 2949
2936 2950 self._format_flags &= ~FLAG_INLINE_DATA
2937 2951 self._inner.split_inline(
2938 2952 tr,
2939 2953 self._format_flags | self._format_version,
2940 2954 new_index_file_path=new_index_file_path,
2941 2955 )
2942 2956
2943 2957 self._inline = False
2944 2958 if new_index_file_path is not None:
2945 2959 self._indexfile = new_index_file_path
2946 2960
2947 2961 nodemaputil.setup_persistent_nodemap(tr, self)
2948 2962
2949 2963 def _nodeduplicatecallback(self, transaction, node):
2950 2964 """called when trying to add a node already stored."""
2951 2965
2952 2966 @contextlib.contextmanager
2953 2967 def reading(self):
2954 2968 with self._inner.reading():
2955 2969 yield
2956 2970
2957 2971 @contextlib.contextmanager
2958 2972 def _writing(self, transaction):
2959 2973 if self._trypending:
2960 2974 msg = b'try to write in a `trypending` revlog: %s'
2961 2975 msg %= self.display_id
2962 2976 raise error.ProgrammingError(msg)
2963 2977 if self._inner.is_writing:
2964 2978 yield
2965 2979 else:
2966 2980 data_end = None
2967 2981 sidedata_end = None
2968 2982 if self._docket is not None:
2969 2983 data_end = self._docket.data_end
2970 2984 sidedata_end = self._docket.sidedata_end
2971 2985 with self._inner.writing(
2972 2986 transaction,
2973 2987 data_end=data_end,
2974 2988 sidedata_end=sidedata_end,
2975 2989 ):
2976 2990 yield
2977 2991 if self._docket is not None:
2978 2992 self._write_docket(transaction)
2979 2993
2980 2994 @property
2981 2995 def is_delaying(self):
2982 2996 return self._inner.is_delaying
2983 2997
2984 2998 def _write_docket(self, transaction):
2985 2999 """write the current docket on disk
2986 3000
2987 3001 Exist as a method to help changelog to implement transaction logic
2988 3002
2989 3003 We could also imagine using the same transaction logic for all revlog
2990 3004 since docket are cheap."""
2991 3005 self._docket.write(transaction)
2992 3006
2993 3007 def addrevision(
2994 3008 self,
2995 3009 text,
2996 3010 transaction,
2997 3011 link,
2998 3012 p1,
2999 3013 p2,
3000 3014 cachedelta=None,
3001 3015 node=None,
3002 3016 flags=REVIDX_DEFAULT_FLAGS,
3003 3017 deltacomputer=None,
3004 3018 sidedata=None,
3005 3019 ):
3006 3020 """add a revision to the log
3007 3021
3008 3022 text - the revision data to add
3009 3023 transaction - the transaction object used for rollback
3010 3024 link - the linkrev data to add
3011 3025 p1, p2 - the parent nodeids of the revision
3012 3026 cachedelta - an optional precomputed delta
3013 3027 node - nodeid of revision; typically node is not specified, and it is
3014 3028 computed by default as hash(text, p1, p2), however subclasses might
3015 3029 use different hashing method (and override checkhash() in such case)
3016 3030 flags - the known flags to set on the revision
3017 3031 deltacomputer - an optional deltacomputer instance shared between
3018 3032 multiple calls
3019 3033 """
3020 3034 if link == nullrev:
3021 3035 raise error.RevlogError(
3022 3036 _(b"attempted to add linkrev -1 to %s") % self.display_id
3023 3037 )
3024 3038
3025 3039 if sidedata is None:
3026 3040 sidedata = {}
3027 3041 elif sidedata and not self.feature_config.has_side_data:
3028 3042 raise error.ProgrammingError(
3029 3043 _(b"trying to add sidedata to a revlog who don't support them")
3030 3044 )
3031 3045
3032 3046 if flags:
3033 3047 node = node or self.hash(text, p1, p2)
3034 3048
3035 3049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3036 3050
3037 3051 # If the flag processor modifies the revision data, ignore any provided
3038 3052 # cachedelta.
3039 3053 if rawtext != text:
3040 3054 cachedelta = None
3041 3055
3042 3056 if len(rawtext) > _maxentrysize:
3043 3057 raise error.RevlogError(
3044 3058 _(
3045 3059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3046 3060 )
3047 3061 % (self.display_id, len(rawtext))
3048 3062 )
3049 3063
3050 3064 node = node or self.hash(rawtext, p1, p2)
3051 3065 rev = self.index.get_rev(node)
3052 3066 if rev is not None:
3053 3067 return rev
3054 3068
3055 3069 if validatehash:
3056 3070 self.checkhash(rawtext, node, p1=p1, p2=p2)
3057 3071
3058 3072 return self.addrawrevision(
3059 3073 rawtext,
3060 3074 transaction,
3061 3075 link,
3062 3076 p1,
3063 3077 p2,
3064 3078 node,
3065 3079 flags,
3066 3080 cachedelta=cachedelta,
3067 3081 deltacomputer=deltacomputer,
3068 3082 sidedata=sidedata,
3069 3083 )
3070 3084
3071 3085 def addrawrevision(
3072 3086 self,
3073 3087 rawtext,
3074 3088 transaction,
3075 3089 link,
3076 3090 p1,
3077 3091 p2,
3078 3092 node,
3079 3093 flags,
3080 3094 cachedelta=None,
3081 3095 deltacomputer=None,
3082 3096 sidedata=None,
3083 3097 ):
3084 3098 """add a raw revision with known flags, node and parents
3085 3099 useful when reusing a revision not stored in this revlog (ex: received
3086 3100 over wire, or read from an external bundle).
3087 3101 """
3088 3102 with self._writing(transaction):
3089 3103 return self._addrevision(
3090 3104 node,
3091 3105 rawtext,
3092 3106 transaction,
3093 3107 link,
3094 3108 p1,
3095 3109 p2,
3096 3110 flags,
3097 3111 cachedelta,
3098 3112 deltacomputer=deltacomputer,
3099 3113 sidedata=sidedata,
3100 3114 )
3101 3115
3102 3116 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3103 3117 return self._inner.compress(data)
3104 3118
3105 3119 def decompress(self, data):
3106 3120 return self._inner.decompress(data)
3107 3121
3108 3122 def _addrevision(
3109 3123 self,
3110 3124 node,
3111 3125 rawtext,
3112 3126 transaction,
3113 3127 link,
3114 3128 p1,
3115 3129 p2,
3116 3130 flags,
3117 3131 cachedelta,
3118 3132 alwayscache=False,
3119 3133 deltacomputer=None,
3120 3134 sidedata=None,
3121 3135 ):
3122 3136 """internal function to add revisions to the log
3123 3137
3124 3138 see addrevision for argument descriptions.
3125 3139
3126 3140 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3127 3141
3128 3142 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3129 3143 be used.
3130 3144
3131 3145 invariants:
3132 3146 - rawtext is optional (can be None); if not set, cachedelta must be set.
3133 3147 if both are set, they must correspond to each other.
3134 3148 """
3135 3149 if node == self.nullid:
3136 3150 raise error.RevlogError(
3137 3151 _(b"%s: attempt to add null revision") % self.display_id
3138 3152 )
3139 3153 if (
3140 3154 node == self.nodeconstants.wdirid
3141 3155 or node in self.nodeconstants.wdirfilenodeids
3142 3156 ):
3143 3157 raise error.RevlogError(
3144 3158 _(b"%s: attempt to add wdir revision") % self.display_id
3145 3159 )
3146 3160 if self._inner._writinghandles is None:
3147 3161 msg = b'adding revision outside `revlog._writing` context'
3148 3162 raise error.ProgrammingError(msg)
3149 3163
3150 3164 btext = [rawtext]
3151 3165
3152 3166 curr = len(self)
3153 3167 prev = curr - 1
3154 3168
3155 3169 offset = self._get_data_offset(prev)
3156 3170
3157 3171 if self._concurrencychecker:
3158 3172 ifh, dfh, sdfh = self._inner._writinghandles
3159 3173 # XXX no checking for the sidedata file
3160 3174 if self._inline:
3161 3175 # offset is "as if" it were in the .d file, so we need to add on
3162 3176 # the size of the entry metadata.
3163 3177 self._concurrencychecker(
3164 3178 ifh, self._indexfile, offset + curr * self.index.entry_size
3165 3179 )
3166 3180 else:
3167 3181 # Entries in the .i are a consistent size.
3168 3182 self._concurrencychecker(
3169 3183 ifh, self._indexfile, curr * self.index.entry_size
3170 3184 )
3171 3185 self._concurrencychecker(dfh, self._datafile, offset)
3172 3186
3173 3187 p1r, p2r = self.rev(p1), self.rev(p2)
3174 3188
3175 3189 # full versions are inserted when the needed deltas
3176 3190 # become comparable to the uncompressed text
3177 3191 if rawtext is None:
3178 3192 # need rawtext size, before changed by flag processors, which is
3179 3193 # the non-raw size. use revlog explicitly to avoid filelog's extra
3180 3194 # logic that might remove metadata size.
3181 3195 textlen = mdiff.patchedsize(
3182 3196 revlog.size(self, cachedelta[0]), cachedelta[1]
3183 3197 )
3184 3198 else:
3185 3199 textlen = len(rawtext)
3186 3200
3187 3201 if deltacomputer is None:
3188 3202 write_debug = None
3189 3203 if self.delta_config.debug_delta:
3190 3204 write_debug = transaction._report
3191 3205 deltacomputer = deltautil.deltacomputer(
3192 3206 self, write_debug=write_debug
3193 3207 )
3194 3208
3195 3209 if cachedelta is not None and len(cachedelta) == 2:
3196 3210 # If the cached delta has no information about how it should be
3197 3211 # reused, add the default reuse instruction according to the
3198 3212 # revlog's configuration.
3199 3213 if (
3200 3214 self.delta_config.general_delta
3201 3215 and self.delta_config.lazy_delta_base
3202 3216 ):
3203 3217 delta_base_reuse = DELTA_BASE_REUSE_TRY
3204 3218 else:
3205 3219 delta_base_reuse = DELTA_BASE_REUSE_NO
3206 3220 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3207 3221
3208 3222 revinfo = revlogutils.revisioninfo(
3209 3223 node,
3210 3224 p1,
3211 3225 p2,
3212 3226 btext,
3213 3227 textlen,
3214 3228 cachedelta,
3215 3229 flags,
3216 3230 )
3217 3231
3218 3232 deltainfo = deltacomputer.finddeltainfo(revinfo)
3219 3233
3220 3234 compression_mode = COMP_MODE_INLINE
3221 3235 if self._docket is not None:
3222 3236 default_comp = self._docket.default_compression_header
3223 3237 r = deltautil.delta_compression(default_comp, deltainfo)
3224 3238 compression_mode, deltainfo = r
3225 3239
3226 3240 sidedata_compression_mode = COMP_MODE_INLINE
3227 3241 if sidedata and self.feature_config.has_side_data:
3228 3242 sidedata_compression_mode = COMP_MODE_PLAIN
3229 3243 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3230 3244 sidedata_offset = self._docket.sidedata_end
3231 3245 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3232 3246 if (
3233 3247 h != b'u'
3234 3248 and comp_sidedata[0:1] != b'\0'
3235 3249 and len(comp_sidedata) < len(serialized_sidedata)
3236 3250 ):
3237 3251 assert not h
3238 3252 if (
3239 3253 comp_sidedata[0:1]
3240 3254 == self._docket.default_compression_header
3241 3255 ):
3242 3256 sidedata_compression_mode = COMP_MODE_DEFAULT
3243 3257 serialized_sidedata = comp_sidedata
3244 3258 else:
3245 3259 sidedata_compression_mode = COMP_MODE_INLINE
3246 3260 serialized_sidedata = comp_sidedata
3247 3261 else:
3248 3262 serialized_sidedata = b""
3249 3263 # Don't store the offset if the sidedata is empty, that way
3250 3264 # we can easily detect empty sidedata and they will be no different
3251 3265 # than ones we manually add.
3252 3266 sidedata_offset = 0
3253 3267
3254 3268 rank = RANK_UNKNOWN
3255 3269 if self.feature_config.compute_rank:
3256 3270 if (p1r, p2r) == (nullrev, nullrev):
3257 3271 rank = 1
3258 3272 elif p1r != nullrev and p2r == nullrev:
3259 3273 rank = 1 + self.fast_rank(p1r)
3260 3274 elif p1r == nullrev and p2r != nullrev:
3261 3275 rank = 1 + self.fast_rank(p2r)
3262 3276 else: # merge node
3263 3277 if rustdagop is not None and self.index.rust_ext_compat:
3264 3278 rank = rustdagop.rank(self.index, p1r, p2r)
3265 3279 else:
3266 3280 pmin, pmax = sorted((p1r, p2r))
3267 3281 rank = 1 + self.fast_rank(pmax)
3268 3282 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3269 3283
3270 3284 e = revlogutils.entry(
3271 3285 flags=flags,
3272 3286 data_offset=offset,
3273 3287 data_compressed_length=deltainfo.deltalen,
3274 3288 data_uncompressed_length=textlen,
3275 3289 data_compression_mode=compression_mode,
3276 3290 data_delta_base=deltainfo.base,
3277 3291 link_rev=link,
3278 3292 parent_rev_1=p1r,
3279 3293 parent_rev_2=p2r,
3280 3294 node_id=node,
3281 3295 sidedata_offset=sidedata_offset,
3282 3296 sidedata_compressed_length=len(serialized_sidedata),
3283 3297 sidedata_compression_mode=sidedata_compression_mode,
3284 3298 rank=rank,
3285 3299 )
3286 3300
3287 3301 self.index.append(e)
3288 3302 entry = self.index.entry_binary(curr)
3289 3303 if curr == 0 and self._docket is None:
3290 3304 header = self._format_flags | self._format_version
3291 3305 header = self.index.pack_header(header)
3292 3306 entry = header + entry
3293 3307 self._writeentry(
3294 3308 transaction,
3295 3309 entry,
3296 3310 deltainfo.data,
3297 3311 link,
3298 3312 offset,
3299 3313 serialized_sidedata,
3300 3314 sidedata_offset,
3301 3315 )
3302 3316
3303 3317 rawtext = btext[0]
3304 3318
3305 3319 if alwayscache and rawtext is None:
3306 3320 rawtext = deltacomputer.buildtext(revinfo)
3307 3321
3308 3322 if type(rawtext) == bytes: # only accept immutable objects
3309 3323 self._inner._revisioncache = (node, curr, rawtext)
3310 3324 self._chainbasecache[curr] = deltainfo.chainbase
3311 3325 return curr
3312 3326
3313 3327 def _get_data_offset(self, prev):
3314 3328 """Returns the current offset in the (in-transaction) data file.
3315 3329 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3316 3330 file to store that information: since sidedata can be rewritten to the
3317 3331 end of the data file within a transaction, you can have cases where, for
3318 3332 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3319 3333 to `n - 1`'s sidedata being written after `n`'s data.
3320 3334
3321 3335 TODO cache this in a docket file before getting out of experimental."""
3322 3336 if self._docket is None:
3323 3337 return self.end(prev)
3324 3338 else:
3325 3339 return self._docket.data_end
3326 3340
3327 3341 def _writeentry(
3328 3342 self,
3329 3343 transaction,
3330 3344 entry,
3331 3345 data,
3332 3346 link,
3333 3347 offset,
3334 3348 sidedata,
3335 3349 sidedata_offset,
3336 3350 ):
3337 3351 # Files opened in a+ mode have inconsistent behavior on various
3338 3352 # platforms. Windows requires that a file positioning call be made
3339 3353 # when the file handle transitions between reads and writes. See
3340 3354 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3341 3355 # platforms, Python or the platform itself can be buggy. Some versions
3342 3356 # of Solaris have been observed to not append at the end of the file
3343 3357 # if the file was seeked to before the end. See issue4943 for more.
3344 3358 #
3345 3359 # We work around this issue by inserting a seek() before writing.
3346 3360 # Note: This is likely not necessary on Python 3. However, because
3347 3361 # the file handle is reused for reads and may be seeked there, we need
3348 3362 # to be careful before changing this.
3349 3363 index_end = data_end = sidedata_end = None
3350 3364 if self._docket is not None:
3351 3365 index_end = self._docket.index_end
3352 3366 data_end = self._docket.data_end
3353 3367 sidedata_end = self._docket.sidedata_end
3354 3368
3355 3369 files_end = self._inner.write_entry(
3356 3370 transaction,
3357 3371 entry,
3358 3372 data,
3359 3373 link,
3360 3374 offset,
3361 3375 sidedata,
3362 3376 sidedata_offset,
3363 3377 index_end,
3364 3378 data_end,
3365 3379 sidedata_end,
3366 3380 )
3367 3381 self._enforceinlinesize(transaction)
3368 3382 if self._docket is not None:
3369 3383 self._docket.index_end = files_end[0]
3370 3384 self._docket.data_end = files_end[1]
3371 3385 self._docket.sidedata_end = files_end[2]
3372 3386
3373 3387 nodemaputil.setup_persistent_nodemap(transaction, self)
3374 3388
3375 3389 def addgroup(
3376 3390 self,
3377 3391 deltas,
3378 3392 linkmapper,
3379 3393 transaction,
3380 3394 alwayscache=False,
3381 3395 addrevisioncb=None,
3382 3396 duplicaterevisioncb=None,
3383 3397 debug_info=None,
3384 3398 delta_base_reuse_policy=None,
3385 3399 ):
3386 3400 """
3387 3401 add a delta group
3388 3402
3389 3403 given a set of deltas, add them to the revision log. the
3390 3404 first delta is against its parent, which should be in our
3391 3405 log, the rest are against the previous delta.
3392 3406
3393 3407 If ``addrevisioncb`` is defined, it will be called with arguments of
3394 3408 this revlog and the node that was added.
3395 3409 """
3396 3410
3397 3411 if self._adding_group:
3398 3412 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3399 3413
3400 3414 # read the default delta-base reuse policy from revlog config if the
3401 3415 # group did not specify one.
3402 3416 if delta_base_reuse_policy is None:
3403 3417 if (
3404 3418 self.delta_config.general_delta
3405 3419 and self.delta_config.lazy_delta_base
3406 3420 ):
3407 3421 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3408 3422 else:
3409 3423 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3410 3424
3411 3425 self._adding_group = True
3412 3426 empty = True
3413 3427 try:
3414 3428 with self._writing(transaction):
3415 3429 write_debug = None
3416 3430 if self.delta_config.debug_delta:
3417 3431 write_debug = transaction._report
3418 3432 deltacomputer = deltautil.deltacomputer(
3419 3433 self,
3420 3434 write_debug=write_debug,
3421 3435 debug_info=debug_info,
3422 3436 )
3423 3437 # loop through our set of deltas
3424 3438 for data in deltas:
3425 3439 (
3426 3440 node,
3427 3441 p1,
3428 3442 p2,
3429 3443 linknode,
3430 3444 deltabase,
3431 3445 delta,
3432 3446 flags,
3433 3447 sidedata,
3434 3448 ) = data
3435 3449 link = linkmapper(linknode)
3436 3450 flags = flags or REVIDX_DEFAULT_FLAGS
3437 3451
3438 3452 rev = self.index.get_rev(node)
3439 3453 if rev is not None:
3440 3454 # this can happen if two branches make the same change
3441 3455 self._nodeduplicatecallback(transaction, rev)
3442 3456 if duplicaterevisioncb:
3443 3457 duplicaterevisioncb(self, rev)
3444 3458 empty = False
3445 3459 continue
3446 3460
3447 3461 for p in (p1, p2):
3448 3462 if not self.index.has_node(p):
3449 3463 raise error.LookupError(
3450 3464 p, self.radix, _(b'unknown parent')
3451 3465 )
3452 3466
3453 3467 if not self.index.has_node(deltabase):
3454 3468 raise error.LookupError(
3455 3469 deltabase, self.display_id, _(b'unknown delta base')
3456 3470 )
3457 3471
3458 3472 baserev = self.rev(deltabase)
3459 3473
3460 3474 if baserev != nullrev and self.iscensored(baserev):
3461 3475 # if base is censored, delta must be full replacement in a
3462 3476 # single patch operation
3463 3477 hlen = struct.calcsize(b">lll")
3464 3478 oldlen = self.rawsize(baserev)
3465 3479 newlen = len(delta) - hlen
3466 3480 if delta[:hlen] != mdiff.replacediffheader(
3467 3481 oldlen, newlen
3468 3482 ):
3469 3483 raise error.CensoredBaseError(
3470 3484 self.display_id, self.node(baserev)
3471 3485 )
3472 3486
3473 3487 if not flags and self._peek_iscensored(baserev, delta):
3474 3488 flags |= REVIDX_ISCENSORED
3475 3489
3476 3490 # We assume consumers of addrevisioncb will want to retrieve
3477 3491 # the added revision, which will require a call to
3478 3492 # revision(). revision() will fast path if there is a cache
3479 3493 # hit. So, we tell _addrevision() to always cache in this case.
3480 3494 # We're only using addgroup() in the context of changegroup
3481 3495 # generation so the revision data can always be handled as raw
3482 3496 # by the flagprocessor.
3483 3497 rev = self._addrevision(
3484 3498 node,
3485 3499 None,
3486 3500 transaction,
3487 3501 link,
3488 3502 p1,
3489 3503 p2,
3490 3504 flags,
3491 3505 (baserev, delta, delta_base_reuse_policy),
3492 3506 alwayscache=alwayscache,
3493 3507 deltacomputer=deltacomputer,
3494 3508 sidedata=sidedata,
3495 3509 )
3496 3510
3497 3511 if addrevisioncb:
3498 3512 addrevisioncb(self, rev)
3499 3513 empty = False
3500 3514 finally:
3501 3515 self._adding_group = False
3502 3516 return not empty
3503 3517
3504 3518 def iscensored(self, rev):
3505 3519 """Check if a file revision is censored."""
3506 3520 if not self.feature_config.censorable:
3507 3521 return False
3508 3522
3509 3523 return self.flags(rev) & REVIDX_ISCENSORED
3510 3524
3511 3525 def _peek_iscensored(self, baserev, delta):
3512 3526 """Quickly check if a delta produces a censored revision."""
3513 3527 if not self.feature_config.censorable:
3514 3528 return False
3515 3529
3516 3530 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3517 3531
3518 3532 def getstrippoint(self, minlink):
3519 3533 """find the minimum rev that must be stripped to strip the linkrev
3520 3534
3521 3535 Returns a tuple containing the minimum rev and a set of all revs that
3522 3536 have linkrevs that will be broken by this strip.
3523 3537 """
3524 3538 return storageutil.resolvestripinfo(
3525 3539 minlink,
3526 3540 len(self) - 1,
3527 3541 self.headrevs(),
3528 3542 self.linkrev,
3529 3543 self.parentrevs,
3530 3544 )
3531 3545
3532 3546 def strip(self, minlink, transaction):
3533 3547 """truncate the revlog on the first revision with a linkrev >= minlink
3534 3548
3535 3549 This function is called when we're stripping revision minlink and
3536 3550 its descendants from the repository.
3537 3551
3538 3552 We have to remove all revisions with linkrev >= minlink, because
3539 3553 the equivalent changelog revisions will be renumbered after the
3540 3554 strip.
3541 3555
3542 3556 So we truncate the revlog on the first of these revisions, and
3543 3557 trust that the caller has saved the revisions that shouldn't be
3544 3558 removed and that it'll re-add them after this truncation.
3545 3559 """
3546 3560 if len(self) == 0:
3547 3561 return
3548 3562
3549 3563 rev, _ = self.getstrippoint(minlink)
3550 3564 if rev == len(self):
3551 3565 return
3552 3566
3553 3567 # first truncate the files on disk
3554 3568 data_end = self.start(rev)
3555 3569 if not self._inline:
3556 3570 transaction.add(self._datafile, data_end)
3557 3571 end = rev * self.index.entry_size
3558 3572 else:
3559 3573 end = data_end + (rev * self.index.entry_size)
3560 3574
3561 3575 if self._sidedatafile:
3562 3576 sidedata_end = self.sidedata_cut_off(rev)
3563 3577 transaction.add(self._sidedatafile, sidedata_end)
3564 3578
3565 3579 transaction.add(self._indexfile, end)
3566 3580 if self._docket is not None:
3567 3581 # XXX we could, leverage the docket while stripping. However it is
3568 3582 # not powerfull enough at the time of this comment
3569 3583 self._docket.index_end = end
3570 3584 self._docket.data_end = data_end
3571 3585 self._docket.sidedata_end = sidedata_end
3572 3586 self._docket.write(transaction, stripping=True)
3573 3587
3574 3588 # then reset internal state in memory to forget those revisions
3575 3589 self._chaininfocache = util.lrucachedict(500)
3576 3590 self._inner.clear_cache()
3577 3591
3578 3592 del self.index[rev:-1]
3579 3593
3580 3594 def checksize(self):
3581 3595 """Check size of index and data files
3582 3596
3583 3597 return a (dd, di) tuple.
3584 3598 - dd: extra bytes for the "data" file
3585 3599 - di: extra bytes for the "index" file
3586 3600
3587 3601 A healthy revlog will return (0, 0).
3588 3602 """
3589 3603 expected = 0
3590 3604 if len(self):
3591 3605 expected = max(0, self.end(len(self) - 1))
3592 3606
3593 3607 try:
3594 3608 with self._datafp() as f:
3595 3609 f.seek(0, io.SEEK_END)
3596 3610 actual = f.tell()
3597 3611 dd = actual - expected
3598 3612 except FileNotFoundError:
3599 3613 dd = 0
3600 3614
3601 3615 try:
3602 3616 f = self.opener(self._indexfile)
3603 3617 f.seek(0, io.SEEK_END)
3604 3618 actual = f.tell()
3605 3619 f.close()
3606 3620 s = self.index.entry_size
3607 3621 i = max(0, actual // s)
3608 3622 di = actual - (i * s)
3609 3623 if self._inline:
3610 3624 databytes = 0
3611 3625 for r in self:
3612 3626 databytes += max(0, self.length(r))
3613 3627 dd = 0
3614 3628 di = actual - len(self) * s - databytes
3615 3629 except FileNotFoundError:
3616 3630 di = 0
3617 3631
3618 3632 return (dd, di)
3619 3633
3620 3634 def files(self):
3621 3635 """return list of files that compose this revlog"""
3622 3636 res = [self._indexfile]
3623 3637 if self._docket_file is None:
3624 3638 if not self._inline:
3625 3639 res.append(self._datafile)
3626 3640 else:
3627 3641 res.append(self._docket_file)
3628 3642 res.extend(self._docket.old_index_filepaths(include_empty=False))
3629 3643 if self._docket.data_end:
3630 3644 res.append(self._datafile)
3631 3645 res.extend(self._docket.old_data_filepaths(include_empty=False))
3632 3646 if self._docket.sidedata_end:
3633 3647 res.append(self._sidedatafile)
3634 3648 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3635 3649 return res
3636 3650
3637 3651 def emitrevisions(
3638 3652 self,
3639 3653 nodes,
3640 3654 nodesorder=None,
3641 3655 revisiondata=False,
3642 3656 assumehaveparentrevisions=False,
3643 3657 deltamode=repository.CG_DELTAMODE_STD,
3644 3658 sidedata_helpers=None,
3645 3659 debug_info=None,
3646 3660 ):
3647 3661 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3648 3662 raise error.ProgrammingError(
3649 3663 b'unhandled value for nodesorder: %s' % nodesorder
3650 3664 )
3651 3665
3652 3666 if nodesorder is None and not self.delta_config.general_delta:
3653 3667 nodesorder = b'storage'
3654 3668
3655 3669 if (
3656 3670 not self._storedeltachains
3657 3671 and deltamode != repository.CG_DELTAMODE_PREV
3658 3672 ):
3659 3673 deltamode = repository.CG_DELTAMODE_FULL
3660 3674
3661 3675 return storageutil.emitrevisions(
3662 3676 self,
3663 3677 nodes,
3664 3678 nodesorder,
3665 3679 revlogrevisiondelta,
3666 3680 deltaparentfn=self.deltaparent,
3667 3681 candeltafn=self._candelta,
3668 3682 rawsizefn=self.rawsize,
3669 3683 revdifffn=self.revdiff,
3670 3684 flagsfn=self.flags,
3671 3685 deltamode=deltamode,
3672 3686 revisiondata=revisiondata,
3673 3687 assumehaveparentrevisions=assumehaveparentrevisions,
3674 3688 sidedata_helpers=sidedata_helpers,
3675 3689 debug_info=debug_info,
3676 3690 )
3677 3691
3678 3692 DELTAREUSEALWAYS = b'always'
3679 3693 DELTAREUSESAMEREVS = b'samerevs'
3680 3694 DELTAREUSENEVER = b'never'
3681 3695
3682 3696 DELTAREUSEFULLADD = b'fulladd'
3683 3697
3684 3698 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3685 3699
3686 3700 def clone(
3687 3701 self,
3688 3702 tr,
3689 3703 destrevlog,
3690 3704 addrevisioncb=None,
3691 3705 deltareuse=DELTAREUSESAMEREVS,
3692 3706 forcedeltabothparents=None,
3693 3707 sidedata_helpers=None,
3694 3708 ):
3695 3709 """Copy this revlog to another, possibly with format changes.
3696 3710
3697 3711 The destination revlog will contain the same revisions and nodes.
3698 3712 However, it may not be bit-for-bit identical due to e.g. delta encoding
3699 3713 differences.
3700 3714
3701 3715 The ``deltareuse`` argument control how deltas from the existing revlog
3702 3716 are preserved in the destination revlog. The argument can have the
3703 3717 following values:
3704 3718
3705 3719 DELTAREUSEALWAYS
3706 3720 Deltas will always be reused (if possible), even if the destination
3707 3721 revlog would not select the same revisions for the delta. This is the
3708 3722 fastest mode of operation.
3709 3723 DELTAREUSESAMEREVS
3710 3724 Deltas will be reused if the destination revlog would pick the same
3711 3725 revisions for the delta. This mode strikes a balance between speed
3712 3726 and optimization.
3713 3727 DELTAREUSENEVER
3714 3728 Deltas will never be reused. This is the slowest mode of execution.
3715 3729 This mode can be used to recompute deltas (e.g. if the diff/delta
3716 3730 algorithm changes).
3717 3731 DELTAREUSEFULLADD
3718 3732 Revision will be re-added as if their were new content. This is
3719 3733 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3720 3734 eg: large file detection and handling.
3721 3735
3722 3736 Delta computation can be slow, so the choice of delta reuse policy can
3723 3737 significantly affect run time.
3724 3738
3725 3739 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3726 3740 two extremes. Deltas will be reused if they are appropriate. But if the
3727 3741 delta could choose a better revision, it will do so. This means if you
3728 3742 are converting a non-generaldelta revlog to a generaldelta revlog,
3729 3743 deltas will be recomputed if the delta's parent isn't a parent of the
3730 3744 revision.
3731 3745
3732 3746 In addition to the delta policy, the ``forcedeltabothparents``
3733 3747 argument controls whether to force compute deltas against both parents
3734 3748 for merges. By default, the current default is used.
3735 3749
3736 3750 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3737 3751 `sidedata_helpers`.
3738 3752 """
3739 3753 if deltareuse not in self.DELTAREUSEALL:
3740 3754 raise ValueError(
3741 3755 _(b'value for deltareuse invalid: %s') % deltareuse
3742 3756 )
3743 3757
3744 3758 if len(destrevlog):
3745 3759 raise ValueError(_(b'destination revlog is not empty'))
3746 3760
3747 3761 if getattr(self, 'filteredrevs', None):
3748 3762 raise ValueError(_(b'source revlog has filtered revisions'))
3749 3763 if getattr(destrevlog, 'filteredrevs', None):
3750 3764 raise ValueError(_(b'destination revlog has filtered revisions'))
3751 3765
3752 3766 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3753 3767 # if possible.
3754 3768 old_delta_config = destrevlog.delta_config
3755 3769 destrevlog.delta_config = destrevlog.delta_config.copy()
3756 3770
3757 3771 try:
3758 3772 if deltareuse == self.DELTAREUSEALWAYS:
3759 3773 destrevlog.delta_config.lazy_delta_base = True
3760 3774 destrevlog.delta_config.lazy_delta = True
3761 3775 elif deltareuse == self.DELTAREUSESAMEREVS:
3762 3776 destrevlog.delta_config.lazy_delta_base = False
3763 3777 destrevlog.delta_config.lazy_delta = True
3764 3778 elif deltareuse == self.DELTAREUSENEVER:
3765 3779 destrevlog.delta_config.lazy_delta_base = False
3766 3780 destrevlog.delta_config.lazy_delta = False
3767 3781
3768 3782 delta_both_parents = (
3769 3783 forcedeltabothparents or old_delta_config.delta_both_parents
3770 3784 )
3771 3785 destrevlog.delta_config.delta_both_parents = delta_both_parents
3772 3786
3773 3787 with self.reading(), destrevlog._writing(tr):
3774 3788 self._clone(
3775 3789 tr,
3776 3790 destrevlog,
3777 3791 addrevisioncb,
3778 3792 deltareuse,
3779 3793 forcedeltabothparents,
3780 3794 sidedata_helpers,
3781 3795 )
3782 3796
3783 3797 finally:
3784 3798 destrevlog.delta_config = old_delta_config
3785 3799
3786 3800 def _clone(
3787 3801 self,
3788 3802 tr,
3789 3803 destrevlog,
3790 3804 addrevisioncb,
3791 3805 deltareuse,
3792 3806 forcedeltabothparents,
3793 3807 sidedata_helpers,
3794 3808 ):
3795 3809 """perform the core duty of `revlog.clone` after parameter processing"""
3796 3810 write_debug = None
3797 3811 if self.delta_config.debug_delta:
3798 3812 write_debug = tr._report
3799 3813 deltacomputer = deltautil.deltacomputer(
3800 3814 destrevlog,
3801 3815 write_debug=write_debug,
3802 3816 )
3803 3817 index = self.index
3804 3818 for rev in self:
3805 3819 entry = index[rev]
3806 3820
3807 3821 # Some classes override linkrev to take filtered revs into
3808 3822 # account. Use raw entry from index.
3809 3823 flags = entry[0] & 0xFFFF
3810 3824 linkrev = entry[4]
3811 3825 p1 = index[entry[5]][7]
3812 3826 p2 = index[entry[6]][7]
3813 3827 node = entry[7]
3814 3828
3815 3829 # (Possibly) reuse the delta from the revlog if allowed and
3816 3830 # the revlog chunk is a delta.
3817 3831 cachedelta = None
3818 3832 rawtext = None
3819 3833 if deltareuse == self.DELTAREUSEFULLADD:
3820 3834 text = self._revisiondata(rev)
3821 3835 sidedata = self.sidedata(rev)
3822 3836
3823 3837 if sidedata_helpers is not None:
3824 3838 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3825 3839 self, sidedata_helpers, sidedata, rev
3826 3840 )
3827 3841 flags = flags | new_flags[0] & ~new_flags[1]
3828 3842
3829 3843 destrevlog.addrevision(
3830 3844 text,
3831 3845 tr,
3832 3846 linkrev,
3833 3847 p1,
3834 3848 p2,
3835 3849 cachedelta=cachedelta,
3836 3850 node=node,
3837 3851 flags=flags,
3838 3852 deltacomputer=deltacomputer,
3839 3853 sidedata=sidedata,
3840 3854 )
3841 3855 else:
3842 3856 if destrevlog.delta_config.lazy_delta:
3843 3857 dp = self.deltaparent(rev)
3844 3858 if dp != nullrev:
3845 3859 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3846 3860
3847 3861 sidedata = None
3848 3862 if not cachedelta:
3849 3863 try:
3850 3864 rawtext = self._revisiondata(rev)
3851 3865 except error.CensoredNodeError as censored:
3852 3866 assert flags & REVIDX_ISCENSORED
3853 3867 rawtext = censored.tombstone
3854 3868 sidedata = self.sidedata(rev)
3855 3869 if sidedata is None:
3856 3870 sidedata = self.sidedata(rev)
3857 3871
3858 3872 if sidedata_helpers is not None:
3859 3873 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3860 3874 self, sidedata_helpers, sidedata, rev
3861 3875 )
3862 3876 flags = flags | new_flags[0] & ~new_flags[1]
3863 3877
3864 3878 destrevlog._addrevision(
3865 3879 node,
3866 3880 rawtext,
3867 3881 tr,
3868 3882 linkrev,
3869 3883 p1,
3870 3884 p2,
3871 3885 flags,
3872 3886 cachedelta,
3873 3887 deltacomputer=deltacomputer,
3874 3888 sidedata=sidedata,
3875 3889 )
3876 3890
3877 3891 if addrevisioncb:
3878 3892 addrevisioncb(self, rev, node)
3879 3893
3880 3894 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3881 3895 if self._format_version == REVLOGV0:
3882 3896 raise error.RevlogError(
3883 3897 _(b'cannot censor with version %d revlogs')
3884 3898 % self._format_version
3885 3899 )
3886 3900 elif self._format_version == REVLOGV1:
3887 3901 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3888 3902 else:
3889 3903 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3890 3904
3891 3905 def verifyintegrity(self, state):
3892 3906 """Verifies the integrity of the revlog.
3893 3907
3894 3908 Yields ``revlogproblem`` instances describing problems that are
3895 3909 found.
3896 3910 """
3897 3911 dd, di = self.checksize()
3898 3912 if dd:
3899 3913 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3900 3914 if di:
3901 3915 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3902 3916
3903 3917 version = self._format_version
3904 3918
3905 3919 # The verifier tells us what version revlog we should be.
3906 3920 if version != state[b'expectedversion']:
3907 3921 yield revlogproblem(
3908 3922 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3909 3923 % (self.display_id, version, state[b'expectedversion'])
3910 3924 )
3911 3925
3912 3926 state[b'skipread'] = set()
3913 3927 state[b'safe_renamed'] = set()
3914 3928
3915 3929 for rev in self:
3916 3930 node = self.node(rev)
3917 3931
3918 3932 # Verify contents. 4 cases to care about:
3919 3933 #
3920 3934 # common: the most common case
3921 3935 # rename: with a rename
3922 3936 # meta: file content starts with b'\1\n', the metadata
3923 3937 # header defined in filelog.py, but without a rename
3924 3938 # ext: content stored externally
3925 3939 #
3926 3940 # More formally, their differences are shown below:
3927 3941 #
3928 3942 # | common | rename | meta | ext
3929 3943 # -------------------------------------------------------
3930 3944 # flags() | 0 | 0 | 0 | not 0
3931 3945 # renamed() | False | True | False | ?
3932 3946 # rawtext[0:2]=='\1\n'| False | True | True | ?
3933 3947 #
3934 3948 # "rawtext" means the raw text stored in revlog data, which
3935 3949 # could be retrieved by "rawdata(rev)". "text"
3936 3950 # mentioned below is "revision(rev)".
3937 3951 #
3938 3952 # There are 3 different lengths stored physically:
3939 3953 # 1. L1: rawsize, stored in revlog index
3940 3954 # 2. L2: len(rawtext), stored in revlog data
3941 3955 # 3. L3: len(text), stored in revlog data if flags==0, or
3942 3956 # possibly somewhere else if flags!=0
3943 3957 #
3944 3958 # L1 should be equal to L2. L3 could be different from them.
3945 3959 # "text" may or may not affect commit hash depending on flag
3946 3960 # processors (see flagutil.addflagprocessor).
3947 3961 #
3948 3962 # | common | rename | meta | ext
3949 3963 # -------------------------------------------------
3950 3964 # rawsize() | L1 | L1 | L1 | L1
3951 3965 # size() | L1 | L2-LM | L1(*) | L1 (?)
3952 3966 # len(rawtext) | L2 | L2 | L2 | L2
3953 3967 # len(text) | L2 | L2 | L2 | L3
3954 3968 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3955 3969 #
3956 3970 # LM: length of metadata, depending on rawtext
3957 3971 # (*): not ideal, see comment in filelog.size
3958 3972 # (?): could be "- len(meta)" if the resolved content has
3959 3973 # rename metadata
3960 3974 #
3961 3975 # Checks needed to be done:
3962 3976 # 1. length check: L1 == L2, in all cases.
3963 3977 # 2. hash check: depending on flag processor, we may need to
3964 3978 # use either "text" (external), or "rawtext" (in revlog).
3965 3979
3966 3980 try:
3967 3981 skipflags = state.get(b'skipflags', 0)
3968 3982 if skipflags:
3969 3983 skipflags &= self.flags(rev)
3970 3984
3971 3985 _verify_revision(self, skipflags, state, node)
3972 3986
3973 3987 l1 = self.rawsize(rev)
3974 3988 l2 = len(self.rawdata(node))
3975 3989
3976 3990 if l1 != l2:
3977 3991 yield revlogproblem(
3978 3992 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3979 3993 node=node,
3980 3994 )
3981 3995
3982 3996 except error.CensoredNodeError:
3983 3997 if state[b'erroroncensored']:
3984 3998 yield revlogproblem(
3985 3999 error=_(b'censored file data'), node=node
3986 4000 )
3987 4001 state[b'skipread'].add(node)
3988 4002 except Exception as e:
3989 4003 yield revlogproblem(
3990 4004 error=_(b'unpacking %s: %s')
3991 4005 % (short(node), stringutil.forcebytestr(e)),
3992 4006 node=node,
3993 4007 )
3994 4008 state[b'skipread'].add(node)
3995 4009
3996 4010 def storageinfo(
3997 4011 self,
3998 4012 exclusivefiles=False,
3999 4013 sharedfiles=False,
4000 4014 revisionscount=False,
4001 4015 trackedsize=False,
4002 4016 storedsize=False,
4003 4017 ):
4004 4018 d = {}
4005 4019
4006 4020 if exclusivefiles:
4007 4021 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4008 4022 if not self._inline:
4009 4023 d[b'exclusivefiles'].append((self.opener, self._datafile))
4010 4024
4011 4025 if sharedfiles:
4012 4026 d[b'sharedfiles'] = []
4013 4027
4014 4028 if revisionscount:
4015 4029 d[b'revisionscount'] = len(self)
4016 4030
4017 4031 if trackedsize:
4018 4032 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4019 4033
4020 4034 if storedsize:
4021 4035 d[b'storedsize'] = sum(
4022 4036 self.opener.stat(path).st_size for path in self.files()
4023 4037 )
4024 4038
4025 4039 return d
4026 4040
4027 4041 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4028 4042 if not self.feature_config.has_side_data:
4029 4043 return
4030 4044 # revlog formats with sidedata support does not support inline
4031 4045 assert not self._inline
4032 4046 if not helpers[1] and not helpers[2]:
4033 4047 # Nothing to generate or remove
4034 4048 return
4035 4049
4036 4050 new_entries = []
4037 4051 # append the new sidedata
4038 4052 with self._writing(transaction):
4039 4053 ifh, dfh, sdfh = self._inner._writinghandles
4040 4054 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4041 4055
4042 4056 current_offset = sdfh.tell()
4043 4057 for rev in range(startrev, endrev + 1):
4044 4058 entry = self.index[rev]
4045 4059 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4046 4060 store=self,
4047 4061 sidedata_helpers=helpers,
4048 4062 sidedata={},
4049 4063 rev=rev,
4050 4064 )
4051 4065
4052 4066 serialized_sidedata = sidedatautil.serialize_sidedata(
4053 4067 new_sidedata
4054 4068 )
4055 4069
4056 4070 sidedata_compression_mode = COMP_MODE_INLINE
4057 4071 if serialized_sidedata and self.feature_config.has_side_data:
4058 4072 sidedata_compression_mode = COMP_MODE_PLAIN
4059 4073 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4060 4074 if (
4061 4075 h != b'u'
4062 4076 and comp_sidedata[0] != b'\0'
4063 4077 and len(comp_sidedata) < len(serialized_sidedata)
4064 4078 ):
4065 4079 assert not h
4066 4080 if (
4067 4081 comp_sidedata[0]
4068 4082 == self._docket.default_compression_header
4069 4083 ):
4070 4084 sidedata_compression_mode = COMP_MODE_DEFAULT
4071 4085 serialized_sidedata = comp_sidedata
4072 4086 else:
4073 4087 sidedata_compression_mode = COMP_MODE_INLINE
4074 4088 serialized_sidedata = comp_sidedata
4075 4089 if entry[8] != 0 or entry[9] != 0:
4076 4090 # rewriting entries that already have sidedata is not
4077 4091 # supported yet, because it introduces garbage data in the
4078 4092 # revlog.
4079 4093 msg = b"rewriting existing sidedata is not supported yet"
4080 4094 raise error.Abort(msg)
4081 4095
4082 4096 # Apply (potential) flags to add and to remove after running
4083 4097 # the sidedata helpers
4084 4098 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4085 4099 entry_update = (
4086 4100 current_offset,
4087 4101 len(serialized_sidedata),
4088 4102 new_offset_flags,
4089 4103 sidedata_compression_mode,
4090 4104 )
4091 4105
4092 4106 # the sidedata computation might have move the file cursors around
4093 4107 sdfh.seek(current_offset, os.SEEK_SET)
4094 4108 sdfh.write(serialized_sidedata)
4095 4109 new_entries.append(entry_update)
4096 4110 current_offset += len(serialized_sidedata)
4097 4111 self._docket.sidedata_end = sdfh.tell()
4098 4112
4099 4113 # rewrite the new index entries
4100 4114 ifh.seek(startrev * self.index.entry_size)
4101 4115 for i, e in enumerate(new_entries):
4102 4116 rev = startrev + i
4103 4117 self.index.replace_sidedata_info(rev, *e)
4104 4118 packed = self.index.entry_binary(rev)
4105 4119 if rev == 0 and self._docket is None:
4106 4120 header = self._format_flags | self._format_version
4107 4121 header = self.index.pack_header(header)
4108 4122 packed = header + packed
4109 4123 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now