##// END OF EJS Templates
revlog: avoid exposing delayed index entry too widely in non-inline revlog...
marmoute -
r52058:66417f55 stable
parent child Browse files
Show More
@@ -1,4243 +1,4249 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # The size of the uncompressed cache compared to the largest revision seen.
299 299 uncompressed_cache_factor = attr.ib(default=None)
300 300
301 301 # The number of chunk cached
302 302 uncompressed_cache_count = attr.ib(default=None)
303 303
304 304 # Allow sparse reading of the revlog data
305 305 with_sparse_read = attr.ib(default=False)
306 306 # minimal density of a sparse read chunk
307 307 sr_density_threshold = attr.ib(default=0.50)
308 308 # minimal size of data we skip when performing sparse read
309 309 sr_min_gap_size = attr.ib(default=262144)
310 310
311 311 # are delta encoded against arbitrary bases.
312 312 generaldelta = attr.ib(default=False)
313 313
314 314
315 315 @attr.s()
316 316 class DeltaConfig(_Config):
317 317 """Hold configuration value about how new delta are computed
318 318
319 319 Some attributes are duplicated from DataConfig to help havign each object
320 320 self contained.
321 321 """
322 322
323 323 # can delta be encoded against arbitrary bases.
324 324 general_delta = attr.ib(default=False)
325 325 # Allow sparse writing of the revlog data
326 326 sparse_revlog = attr.ib(default=False)
327 327 # maximum length of a delta chain
328 328 max_chain_len = attr.ib(default=None)
329 329 # Maximum distance between delta chain base start and end
330 330 max_deltachain_span = attr.ib(default=-1)
331 331 # If `upper_bound_comp` is not None, this is the expected maximal gain from
332 332 # compression for the data content.
333 333 upper_bound_comp = attr.ib(default=None)
334 334 # Should we try a delta against both parent
335 335 delta_both_parents = attr.ib(default=True)
336 336 # Test delta base candidate group by chunk of this maximal size.
337 337 candidate_group_chunk_size = attr.ib(default=0)
338 338 # Should we display debug information about delta computation
339 339 debug_delta = attr.ib(default=False)
340 340 # trust incoming delta by default
341 341 lazy_delta = attr.ib(default=True)
342 342 # trust the base of incoming delta by default
343 343 lazy_delta_base = attr.ib(default=False)
344 344
345 345
346 346 class _InnerRevlog:
347 347 """An inner layer of the revlog object
348 348
349 349 That layer exist to be able to delegate some operation to Rust, its
350 350 boundaries are arbitrary and based on what we can delegate to Rust.
351 351 """
352 352
353 353 def __init__(
354 354 self,
355 355 opener,
356 356 index,
357 357 index_file,
358 358 data_file,
359 359 sidedata_file,
360 360 inline,
361 361 data_config,
362 362 delta_config,
363 363 feature_config,
364 364 chunk_cache,
365 365 default_compression_header,
366 366 ):
367 367 self.opener = opener
368 368 self.index = index
369 369
370 370 self.__index_file = index_file
371 371 self.data_file = data_file
372 372 self.sidedata_file = sidedata_file
373 373 self.inline = inline
374 374 self.data_config = data_config
375 375 self.delta_config = delta_config
376 376 self.feature_config = feature_config
377 377
378 378 # used during diverted write.
379 379 self._orig_index_file = None
380 380
381 381 self._default_compression_header = default_compression_header
382 382
383 383 # index
384 384
385 385 # 3-tuple of file handles being used for active writing.
386 386 self._writinghandles = None
387 387
388 388 self._segmentfile = randomaccessfile.randomaccessfile(
389 389 self.opener,
390 390 (self.index_file if self.inline else self.data_file),
391 391 self.data_config.chunk_cache_size,
392 392 chunk_cache,
393 393 )
394 394 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
395 395 self.opener,
396 396 self.sidedata_file,
397 397 self.data_config.chunk_cache_size,
398 398 )
399 399
400 400 # revlog header -> revlog compressor
401 401 self._decompressors = {}
402 402 # 3-tuple of (node, rev, text) for a raw revision.
403 403 self._revisioncache = None
404 404
405 405 # cache some uncompressed chunks
406 406 # rev β†’ uncompressed_chunk
407 407 #
408 408 # the max cost is dynamically updated to be proportionnal to the
409 409 # size of revision we actually encounter.
410 410 self._uncompressed_chunk_cache = None
411 411 if self.data_config.uncompressed_cache_factor is not None:
412 412 self._uncompressed_chunk_cache = util.lrucachedict(
413 413 self.data_config.uncompressed_cache_count,
414 414 maxcost=65536, # some arbitrary initial value
415 415 )
416 416
417 417 self._delay_buffer = None
418 418
419 419 @property
420 420 def index_file(self):
421 421 return self.__index_file
422 422
423 423 @index_file.setter
424 424 def index_file(self, new_index_file):
425 425 self.__index_file = new_index_file
426 426 if self.inline:
427 427 self._segmentfile.filename = new_index_file
428 428
429 429 def __len__(self):
430 430 return len(self.index)
431 431
432 432 def clear_cache(self):
433 433 assert not self.is_delaying
434 434 self._revisioncache = None
435 435 if self._uncompressed_chunk_cache is not None:
436 436 self._uncompressed_chunk_cache.clear()
437 437 self._segmentfile.clear_cache()
438 438 self._segmentfile_sidedata.clear_cache()
439 439
440 440 @property
441 441 def canonical_index_file(self):
442 442 if self._orig_index_file is not None:
443 443 return self._orig_index_file
444 444 return self.index_file
445 445
446 446 @property
447 447 def is_delaying(self):
448 448 """is the revlog is currently delaying the visibility of written data?
449 449
450 450 The delaying mechanism can be either in-memory or written on disk in a
451 451 side-file."""
452 452 return (self._delay_buffer is not None) or (
453 453 self._orig_index_file is not None
454 454 )
455 455
456 456 # Derived from index values.
457 457
458 458 def start(self, rev):
459 459 """the offset of the data chunk for this revision"""
460 460 return int(self.index[rev][0] >> 16)
461 461
462 462 def length(self, rev):
463 463 """the length of the data chunk for this revision"""
464 464 return self.index[rev][1]
465 465
466 466 def end(self, rev):
467 467 """the end of the data chunk for this revision"""
468 468 return self.start(rev) + self.length(rev)
469 469
470 470 def deltaparent(self, rev):
471 471 """return deltaparent of the given revision"""
472 472 base = self.index[rev][3]
473 473 if base == rev:
474 474 return nullrev
475 475 elif self.delta_config.general_delta:
476 476 return base
477 477 else:
478 478 return rev - 1
479 479
480 480 def issnapshot(self, rev):
481 481 """tells whether rev is a snapshot"""
482 482 if not self.delta_config.sparse_revlog:
483 483 return self.deltaparent(rev) == nullrev
484 484 elif hasattr(self.index, 'issnapshot'):
485 485 # directly assign the method to cache the testing and access
486 486 self.issnapshot = self.index.issnapshot
487 487 return self.issnapshot(rev)
488 488 if rev == nullrev:
489 489 return True
490 490 entry = self.index[rev]
491 491 base = entry[3]
492 492 if base == rev:
493 493 return True
494 494 if base == nullrev:
495 495 return True
496 496 p1 = entry[5]
497 497 while self.length(p1) == 0:
498 498 b = self.deltaparent(p1)
499 499 if b == p1:
500 500 break
501 501 p1 = b
502 502 p2 = entry[6]
503 503 while self.length(p2) == 0:
504 504 b = self.deltaparent(p2)
505 505 if b == p2:
506 506 break
507 507 p2 = b
508 508 if base == p1 or base == p2:
509 509 return False
510 510 return self.issnapshot(base)
511 511
512 512 def _deltachain(self, rev, stoprev=None):
513 513 """Obtain the delta chain for a revision.
514 514
515 515 ``stoprev`` specifies a revision to stop at. If not specified, we
516 516 stop at the base of the chain.
517 517
518 518 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
519 519 revs in ascending order and ``stopped`` is a bool indicating whether
520 520 ``stoprev`` was hit.
521 521 """
522 522 generaldelta = self.delta_config.general_delta
523 523 # Try C implementation.
524 524 try:
525 525 return self.index.deltachain(rev, stoprev, generaldelta)
526 526 except AttributeError:
527 527 pass
528 528
529 529 chain = []
530 530
531 531 # Alias to prevent attribute lookup in tight loop.
532 532 index = self.index
533 533
534 534 iterrev = rev
535 535 e = index[iterrev]
536 536 while iterrev != e[3] and iterrev != stoprev:
537 537 chain.append(iterrev)
538 538 if generaldelta:
539 539 iterrev = e[3]
540 540 else:
541 541 iterrev -= 1
542 542 e = index[iterrev]
543 543
544 544 if iterrev == stoprev:
545 545 stopped = True
546 546 else:
547 547 chain.append(iterrev)
548 548 stopped = False
549 549
550 550 chain.reverse()
551 551 return chain, stopped
552 552
553 553 @util.propertycache
554 554 def _compressor(self):
555 555 engine = util.compengines[self.feature_config.compression_engine]
556 556 return engine.revlogcompressor(
557 557 self.feature_config.compression_engine_options
558 558 )
559 559
560 560 @util.propertycache
561 561 def _decompressor(self):
562 562 """the default decompressor"""
563 563 if self._default_compression_header is None:
564 564 return None
565 565 t = self._default_compression_header
566 566 c = self._get_decompressor(t)
567 567 return c.decompress
568 568
569 569 def _get_decompressor(self, t):
570 570 try:
571 571 compressor = self._decompressors[t]
572 572 except KeyError:
573 573 try:
574 574 engine = util.compengines.forrevlogheader(t)
575 575 compressor = engine.revlogcompressor(
576 576 self.feature_config.compression_engine_options
577 577 )
578 578 self._decompressors[t] = compressor
579 579 except KeyError:
580 580 raise error.RevlogError(
581 581 _(b'unknown compression type %s') % binascii.hexlify(t)
582 582 )
583 583 return compressor
584 584
585 585 def compress(self, data):
586 586 """Generate a possibly-compressed representation of data."""
587 587 if not data:
588 588 return b'', data
589 589
590 590 compressed = self._compressor.compress(data)
591 591
592 592 if compressed:
593 593 # The revlog compressor added the header in the returned data.
594 594 return b'', compressed
595 595
596 596 if data[0:1] == b'\0':
597 597 return b'', data
598 598 return b'u', data
599 599
600 600 def decompress(self, data):
601 601 """Decompress a revlog chunk.
602 602
603 603 The chunk is expected to begin with a header identifying the
604 604 format type so it can be routed to an appropriate decompressor.
605 605 """
606 606 if not data:
607 607 return data
608 608
609 609 # Revlogs are read much more frequently than they are written and many
610 610 # chunks only take microseconds to decompress, so performance is
611 611 # important here.
612 612 #
613 613 # We can make a few assumptions about revlogs:
614 614 #
615 615 # 1) the majority of chunks will be compressed (as opposed to inline
616 616 # raw data).
617 617 # 2) decompressing *any* data will likely by at least 10x slower than
618 618 # returning raw inline data.
619 619 # 3) we want to prioritize common and officially supported compression
620 620 # engines
621 621 #
622 622 # It follows that we want to optimize for "decompress compressed data
623 623 # when encoded with common and officially supported compression engines"
624 624 # case over "raw data" and "data encoded by less common or non-official
625 625 # compression engines." That is why we have the inline lookup first
626 626 # followed by the compengines lookup.
627 627 #
628 628 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
629 629 # compressed chunks. And this matters for changelog and manifest reads.
630 630 t = data[0:1]
631 631
632 632 if t == b'x':
633 633 try:
634 634 return _zlibdecompress(data)
635 635 except zlib.error as e:
636 636 raise error.RevlogError(
637 637 _(b'revlog decompress error: %s')
638 638 % stringutil.forcebytestr(e)
639 639 )
640 640 # '\0' is more common than 'u' so it goes first.
641 641 elif t == b'\0':
642 642 return data
643 643 elif t == b'u':
644 644 return util.buffer(data, 1)
645 645
646 646 compressor = self._get_decompressor(t)
647 647
648 648 return compressor.decompress(data)
649 649
650 650 @contextlib.contextmanager
651 651 def reading(self):
652 652 """Context manager that keeps data and sidedata files open for reading"""
653 653 if len(self.index) == 0:
654 654 yield # nothing to be read
655 655 else:
656 656 with self._segmentfile.reading():
657 657 with self._segmentfile_sidedata.reading():
658 658 yield
659 659
660 660 @property
661 661 def is_writing(self):
662 662 """True is a writing context is open"""
663 663 return self._writinghandles is not None
664 664
665 665 @property
666 666 def is_open(self):
667 667 """True if any file handle is being held
668 668
669 669 Used for assert and debug in the python code"""
670 670 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
671 671
672 672 @contextlib.contextmanager
673 673 def writing(self, transaction, data_end=None, sidedata_end=None):
674 674 """Open the revlog files for writing
675 675
676 676 Add content to a revlog should be done within such context.
677 677 """
678 678 if self.is_writing:
679 679 yield
680 680 else:
681 681 ifh = dfh = sdfh = None
682 682 try:
683 683 r = len(self.index)
684 684 # opening the data file.
685 685 dsize = 0
686 686 if r:
687 687 dsize = self.end(r - 1)
688 688 dfh = None
689 689 if not self.inline:
690 690 try:
691 691 dfh = self.opener(self.data_file, mode=b"r+")
692 692 if data_end is None:
693 693 dfh.seek(0, os.SEEK_END)
694 694 else:
695 695 dfh.seek(data_end, os.SEEK_SET)
696 696 except FileNotFoundError:
697 697 dfh = self.opener(self.data_file, mode=b"w+")
698 698 transaction.add(self.data_file, dsize)
699 699 if self.sidedata_file is not None:
700 700 assert sidedata_end is not None
701 701 # revlog-v2 does not inline, help Pytype
702 702 assert dfh is not None
703 703 try:
704 704 sdfh = self.opener(self.sidedata_file, mode=b"r+")
705 705 dfh.seek(sidedata_end, os.SEEK_SET)
706 706 except FileNotFoundError:
707 707 sdfh = self.opener(self.sidedata_file, mode=b"w+")
708 708 transaction.add(self.sidedata_file, sidedata_end)
709 709
710 710 # opening the index file.
711 711 isize = r * self.index.entry_size
712 712 ifh = self.__index_write_fp()
713 713 if self.inline:
714 714 transaction.add(self.index_file, dsize + isize)
715 715 else:
716 716 transaction.add(self.index_file, isize)
717 717 # exposing all file handle for writing.
718 718 self._writinghandles = (ifh, dfh, sdfh)
719 719 self._segmentfile.writing_handle = ifh if self.inline else dfh
720 720 self._segmentfile_sidedata.writing_handle = sdfh
721 721 yield
722 722 finally:
723 723 self._writinghandles = None
724 724 self._segmentfile.writing_handle = None
725 725 self._segmentfile_sidedata.writing_handle = None
726 726 if dfh is not None:
727 727 dfh.close()
728 728 if sdfh is not None:
729 729 sdfh.close()
730 730 # closing the index file last to avoid exposing referent to
731 731 # potential unflushed data content.
732 732 if ifh is not None:
733 733 ifh.close()
734 734
735 735 def __index_write_fp(self, index_end=None):
736 736 """internal method to open the index file for writing
737 737
738 738 You should not use this directly and use `_writing` instead
739 739 """
740 740 try:
741 741 if self._delay_buffer is None:
742 742 f = self.opener(
743 743 self.index_file,
744 744 mode=b"r+",
745 745 checkambig=self.data_config.check_ambig,
746 746 )
747 747 else:
748 748 # check_ambig affect we way we open file for writing, however
749 749 # here, we do not actually open a file for writting as write
750 750 # will appened to a delay_buffer. So check_ambig is not
751 751 # meaningful and unneeded here.
752 752 f = randomaccessfile.appender(
753 753 self.opener, self.index_file, b"r+", self._delay_buffer
754 754 )
755 755 if index_end is None:
756 756 f.seek(0, os.SEEK_END)
757 757 else:
758 758 f.seek(index_end, os.SEEK_SET)
759 759 return f
760 760 except FileNotFoundError:
761 761 if self._delay_buffer is None:
762 762 return self.opener(
763 763 self.index_file,
764 764 mode=b"w+",
765 765 checkambig=self.data_config.check_ambig,
766 766 )
767 767 else:
768 768 return randomaccessfile.appender(
769 769 self.opener, self.index_file, b"w+", self._delay_buffer
770 770 )
771 771
772 772 def __index_new_fp(self):
773 773 """internal method to create a new index file for writing
774 774
775 775 You should not use this unless you are upgrading from inline revlog
776 776 """
777 777 return self.opener(
778 778 self.index_file,
779 779 mode=b"w",
780 780 checkambig=self.data_config.check_ambig,
781 781 atomictemp=True,
782 782 )
783 783
784 784 def split_inline(self, tr, header, new_index_file_path=None):
785 785 """split the data of an inline revlog into an index and a data file"""
786 786 assert self._delay_buffer is None
787 787 existing_handles = False
788 788 if self._writinghandles is not None:
789 789 existing_handles = True
790 790 fp = self._writinghandles[0]
791 791 fp.flush()
792 792 fp.close()
793 793 # We can't use the cached file handle after close(). So prevent
794 794 # its usage.
795 795 self._writinghandles = None
796 796 self._segmentfile.writing_handle = None
797 797 # No need to deal with sidedata writing handle as it is only
798 798 # relevant with revlog-v2 which is never inline, not reaching
799 799 # this code
800 800
801 801 new_dfh = self.opener(self.data_file, mode=b"w+")
802 802 new_dfh.truncate(0) # drop any potentially existing data
803 803 try:
804 804 with self.reading():
805 805 for r in range(len(self.index)):
806 806 new_dfh.write(self.get_segment_for_revs(r, r)[1])
807 807 new_dfh.flush()
808 808
809 809 if new_index_file_path is not None:
810 810 self.index_file = new_index_file_path
811 811 with self.__index_new_fp() as fp:
812 812 self.inline = False
813 813 for i in range(len(self.index)):
814 814 e = self.index.entry_binary(i)
815 815 if i == 0:
816 816 packed_header = self.index.pack_header(header)
817 817 e = packed_header + e
818 818 fp.write(e)
819 819
820 820 # If we don't use side-write, the temp file replace the real
821 821 # index when we exit the context manager
822 822
823 823 self._segmentfile = randomaccessfile.randomaccessfile(
824 824 self.opener,
825 825 self.data_file,
826 826 self.data_config.chunk_cache_size,
827 827 )
828 828
829 829 if existing_handles:
830 830 # switched from inline to conventional reopen the index
831 831 ifh = self.__index_write_fp()
832 832 self._writinghandles = (ifh, new_dfh, None)
833 833 self._segmentfile.writing_handle = new_dfh
834 834 new_dfh = None
835 835 # No need to deal with sidedata writing handle as it is only
836 836 # relevant with revlog-v2 which is never inline, not reaching
837 837 # this code
838 838 finally:
839 839 if new_dfh is not None:
840 840 new_dfh.close()
841 841 return self.index_file
842 842
843 843 def get_segment_for_revs(self, startrev, endrev):
844 844 """Obtain a segment of raw data corresponding to a range of revisions.
845 845
846 846 Accepts the start and end revisions and an optional already-open
847 847 file handle to be used for reading. If the file handle is read, its
848 848 seek position will not be preserved.
849 849
850 850 Requests for data may be satisfied by a cache.
851 851
852 852 Returns a 2-tuple of (offset, data) for the requested range of
853 853 revisions. Offset is the integer offset from the beginning of the
854 854 revlog and data is a str or buffer of the raw byte data.
855 855
856 856 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
857 857 to determine where each revision's data begins and ends.
858 858
859 859 API: we should consider making this a private part of the InnerRevlog
860 860 at some point.
861 861 """
862 862 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
863 863 # (functions are expensive).
864 864 index = self.index
865 865 istart = index[startrev]
866 866 start = int(istart[0] >> 16)
867 867 if startrev == endrev:
868 868 end = start + istart[1]
869 869 else:
870 870 iend = index[endrev]
871 871 end = int(iend[0] >> 16) + iend[1]
872 872
873 873 if self.inline:
874 874 start += (startrev + 1) * self.index.entry_size
875 875 end += (endrev + 1) * self.index.entry_size
876 876 length = end - start
877 877
878 878 return start, self._segmentfile.read_chunk(start, length)
879 879
880 880 def _chunk(self, rev):
881 881 """Obtain a single decompressed chunk for a revision.
882 882
883 883 Accepts an integer revision and an optional already-open file handle
884 884 to be used for reading. If used, the seek position of the file will not
885 885 be preserved.
886 886
887 887 Returns a str holding uncompressed data for the requested revision.
888 888 """
889 889 if self._uncompressed_chunk_cache is not None:
890 890 uncomp = self._uncompressed_chunk_cache.get(rev)
891 891 if uncomp is not None:
892 892 return uncomp
893 893
894 894 compression_mode = self.index[rev][10]
895 895 data = self.get_segment_for_revs(rev, rev)[1]
896 896 if compression_mode == COMP_MODE_PLAIN:
897 897 uncomp = data
898 898 elif compression_mode == COMP_MODE_DEFAULT:
899 899 uncomp = self._decompressor(data)
900 900 elif compression_mode == COMP_MODE_INLINE:
901 901 uncomp = self.decompress(data)
902 902 else:
903 903 msg = b'unknown compression mode %d'
904 904 msg %= compression_mode
905 905 raise error.RevlogError(msg)
906 906 if self._uncompressed_chunk_cache is not None:
907 907 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
908 908 return uncomp
909 909
910 910 def _chunks(self, revs, targetsize=None):
911 911 """Obtain decompressed chunks for the specified revisions.
912 912
913 913 Accepts an iterable of numeric revisions that are assumed to be in
914 914 ascending order. Also accepts an optional already-open file handle
915 915 to be used for reading. If used, the seek position of the file will
916 916 not be preserved.
917 917
918 918 This function is similar to calling ``self._chunk()`` multiple times,
919 919 but is faster.
920 920
921 921 Returns a list with decompressed data for each requested revision.
922 922 """
923 923 if not revs:
924 924 return []
925 925 start = self.start
926 926 length = self.length
927 927 inline = self.inline
928 928 iosize = self.index.entry_size
929 929 buffer = util.buffer
930 930
931 931 fetched_revs = []
932 932 fadd = fetched_revs.append
933 933
934 934 chunks = []
935 935 ladd = chunks.append
936 936
937 937 if self._uncompressed_chunk_cache is None:
938 938 fetched_revs = revs
939 939 else:
940 940 for rev in revs:
941 941 cached_value = self._uncompressed_chunk_cache.get(rev)
942 942 if cached_value is None:
943 943 fadd(rev)
944 944 else:
945 945 ladd((rev, cached_value))
946 946
947 947 if not fetched_revs:
948 948 slicedchunks = ()
949 949 elif not self.data_config.with_sparse_read:
950 950 slicedchunks = (fetched_revs,)
951 951 else:
952 952 slicedchunks = deltautil.slicechunk(
953 953 self,
954 954 fetched_revs,
955 955 targetsize=targetsize,
956 956 )
957 957
958 958 for revschunk in slicedchunks:
959 959 firstrev = revschunk[0]
960 960 # Skip trailing revisions with empty diff
961 961 for lastrev in revschunk[::-1]:
962 962 if length(lastrev) != 0:
963 963 break
964 964
965 965 try:
966 966 offset, data = self.get_segment_for_revs(firstrev, lastrev)
967 967 except OverflowError:
968 968 # issue4215 - we can't cache a run of chunks greater than
969 969 # 2G on Windows
970 970 for rev in revschunk:
971 971 ladd((rev, self._chunk(rev)))
972 972
973 973 decomp = self.decompress
974 974 # self._decompressor might be None, but will not be used in that case
975 975 def_decomp = self._decompressor
976 976 for rev in revschunk:
977 977 chunkstart = start(rev)
978 978 if inline:
979 979 chunkstart += (rev + 1) * iosize
980 980 chunklength = length(rev)
981 981 comp_mode = self.index[rev][10]
982 982 c = buffer(data, chunkstart - offset, chunklength)
983 983 if comp_mode == COMP_MODE_PLAIN:
984 984 c = c
985 985 elif comp_mode == COMP_MODE_INLINE:
986 986 c = decomp(c)
987 987 elif comp_mode == COMP_MODE_DEFAULT:
988 988 c = def_decomp(c)
989 989 else:
990 990 msg = b'unknown compression mode %d'
991 991 msg %= comp_mode
992 992 raise error.RevlogError(msg)
993 993 ladd((rev, c))
994 994 if self._uncompressed_chunk_cache is not None:
995 995 self._uncompressed_chunk_cache.insert(rev, c, len(c))
996 996
997 997 chunks.sort()
998 998 return [x[1] for x in chunks]
999 999
1000 1000 def raw_text(self, node, rev):
1001 1001 """return the possibly unvalidated rawtext for a revision
1002 1002
1003 1003 returns (rev, rawtext, validated)
1004 1004 """
1005 1005
1006 1006 # revision in the cache (could be useful to apply delta)
1007 1007 cachedrev = None
1008 1008 # An intermediate text to apply deltas to
1009 1009 basetext = None
1010 1010
1011 1011 # Check if we have the entry in cache
1012 1012 # The cache entry looks like (node, rev, rawtext)
1013 1013 if self._revisioncache:
1014 1014 cachedrev = self._revisioncache[1]
1015 1015
1016 1016 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1017 1017 if stopped:
1018 1018 basetext = self._revisioncache[2]
1019 1019
1020 1020 # drop cache to save memory, the caller is expected to
1021 1021 # update self._inner._revisioncache after validating the text
1022 1022 self._revisioncache = None
1023 1023
1024 1024 targetsize = None
1025 1025 rawsize = self.index[rev][2]
1026 1026 if 0 <= rawsize:
1027 1027 targetsize = 4 * rawsize
1028 1028
1029 1029 if self._uncompressed_chunk_cache is not None:
1030 1030 # dynamically update the uncompressed_chunk_cache size to the
1031 1031 # largest revision we saw in this revlog.
1032 1032 factor = self.data_config.uncompressed_cache_factor
1033 1033 candidate_size = rawsize * factor
1034 1034 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1035 1035 self._uncompressed_chunk_cache.maxcost = candidate_size
1036 1036
1037 1037 bins = self._chunks(chain, targetsize=targetsize)
1038 1038 if basetext is None:
1039 1039 basetext = bytes(bins[0])
1040 1040 bins = bins[1:]
1041 1041
1042 1042 rawtext = mdiff.patches(basetext, bins)
1043 1043 del basetext # let us have a chance to free memory early
1044 1044 return (rev, rawtext, False)
1045 1045
1046 1046 def sidedata(self, rev, sidedata_end):
1047 1047 """Return the sidedata for a given revision number."""
1048 1048 index_entry = self.index[rev]
1049 1049 sidedata_offset = index_entry[8]
1050 1050 sidedata_size = index_entry[9]
1051 1051
1052 1052 if self.inline:
1053 1053 sidedata_offset += self.index.entry_size * (1 + rev)
1054 1054 if sidedata_size == 0:
1055 1055 return {}
1056 1056
1057 1057 if sidedata_end < sidedata_offset + sidedata_size:
1058 1058 filename = self.sidedata_file
1059 1059 end = sidedata_end
1060 1060 offset = sidedata_offset
1061 1061 length = sidedata_size
1062 1062 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1063 1063 raise error.RevlogError(m)
1064 1064
1065 1065 comp_segment = self._segmentfile_sidedata.read_chunk(
1066 1066 sidedata_offset, sidedata_size
1067 1067 )
1068 1068
1069 1069 comp = self.index[rev][11]
1070 1070 if comp == COMP_MODE_PLAIN:
1071 1071 segment = comp_segment
1072 1072 elif comp == COMP_MODE_DEFAULT:
1073 1073 segment = self._decompressor(comp_segment)
1074 1074 elif comp == COMP_MODE_INLINE:
1075 1075 segment = self.decompress(comp_segment)
1076 1076 else:
1077 1077 msg = b'unknown compression mode %d'
1078 1078 msg %= comp
1079 1079 raise error.RevlogError(msg)
1080 1080
1081 1081 sidedata = sidedatautil.deserialize_sidedata(segment)
1082 1082 return sidedata
1083 1083
1084 1084 def write_entry(
1085 1085 self,
1086 1086 transaction,
1087 1087 entry,
1088 1088 data,
1089 1089 link,
1090 1090 offset,
1091 1091 sidedata,
1092 1092 sidedata_offset,
1093 1093 index_end,
1094 1094 data_end,
1095 1095 sidedata_end,
1096 1096 ):
1097 1097 # Files opened in a+ mode have inconsistent behavior on various
1098 1098 # platforms. Windows requires that a file positioning call be made
1099 1099 # when the file handle transitions between reads and writes. See
1100 1100 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1101 1101 # platforms, Python or the platform itself can be buggy. Some versions
1102 1102 # of Solaris have been observed to not append at the end of the file
1103 1103 # if the file was seeked to before the end. See issue4943 for more.
1104 1104 #
1105 1105 # We work around this issue by inserting a seek() before writing.
1106 1106 # Note: This is likely not necessary on Python 3. However, because
1107 1107 # the file handle is reused for reads and may be seeked there, we need
1108 1108 # to be careful before changing this.
1109 1109 if self._writinghandles is None:
1110 1110 msg = b'adding revision outside `revlog._writing` context'
1111 1111 raise error.ProgrammingError(msg)
1112 1112 ifh, dfh, sdfh = self._writinghandles
1113 1113 if index_end is None:
1114 1114 ifh.seek(0, os.SEEK_END)
1115 1115 else:
1116 1116 ifh.seek(index_end, os.SEEK_SET)
1117 1117 if dfh:
1118 1118 if data_end is None:
1119 1119 dfh.seek(0, os.SEEK_END)
1120 1120 else:
1121 1121 dfh.seek(data_end, os.SEEK_SET)
1122 1122 if sdfh:
1123 1123 sdfh.seek(sidedata_end, os.SEEK_SET)
1124 1124
1125 1125 curr = len(self.index) - 1
1126 1126 if not self.inline:
1127 1127 transaction.add(self.data_file, offset)
1128 1128 if self.sidedata_file:
1129 1129 transaction.add(self.sidedata_file, sidedata_offset)
1130 1130 transaction.add(self.canonical_index_file, curr * len(entry))
1131 1131 if data[0]:
1132 1132 dfh.write(data[0])
1133 1133 dfh.write(data[1])
1134 1134 if sidedata:
1135 1135 sdfh.write(sidedata)
1136 1136 if self._delay_buffer is None:
1137 1137 ifh.write(entry)
1138 1138 else:
1139 1139 self._delay_buffer.append(entry)
1140 1140 else:
1141 1141 offset += curr * self.index.entry_size
1142 1142 transaction.add(self.canonical_index_file, offset)
1143 1143 assert not sidedata
1144 1144 if self._delay_buffer is None:
1145 1145 ifh.write(entry)
1146 1146 ifh.write(data[0])
1147 1147 ifh.write(data[1])
1148 1148 else:
1149 1149 self._delay_buffer.append(entry)
1150 1150 self._delay_buffer.append(data[0])
1151 1151 self._delay_buffer.append(data[1])
1152 1152 return (
1153 1153 ifh.tell(),
1154 1154 dfh.tell() if dfh else None,
1155 1155 sdfh.tell() if sdfh else None,
1156 1156 )
1157 1157
1158 1158 def _divert_index(self):
1159 1159 return self.index_file + b'.a'
1160 1160
1161 1161 def delay(self):
1162 1162 assert not self.is_open
1163 1163 if self._delay_buffer is not None or self._orig_index_file is not None:
1164 1164 # delay or divert already in place
1165 1165 return None
1166 1166 elif len(self.index) == 0:
1167 1167 self._orig_index_file = self.index_file
1168 1168 self.index_file = self._divert_index()
1169 1169 self._segmentfile.filename = self.index_file
1170 1170 assert self._orig_index_file is not None
1171 1171 assert self.index_file is not None
1172 1172 if self.opener.exists(self.index_file):
1173 1173 self.opener.unlink(self.index_file)
1174 1174 return self.index_file
1175 1175 else:
1176 self._segmentfile._delay_buffer = self._delay_buffer = []
1176 self._delay_buffer = []
1177 if self.inline:
1178 self._segmentfile._delay_buffer = self._delay_buffer
1177 1179 return None
1178 1180
1179 1181 def write_pending(self):
1180 1182 assert not self.is_open
1181 1183 if self._orig_index_file is not None:
1182 1184 return None, True
1183 1185 any_pending = False
1184 1186 pending_index_file = self._divert_index()
1185 1187 if self.opener.exists(pending_index_file):
1186 1188 self.opener.unlink(pending_index_file)
1187 1189 util.copyfile(
1188 1190 self.opener.join(self.index_file),
1189 1191 self.opener.join(pending_index_file),
1190 1192 )
1191 1193 if self._delay_buffer:
1192 1194 with self.opener(pending_index_file, b'r+') as ifh:
1193 1195 ifh.seek(0, os.SEEK_END)
1194 1196 ifh.write(b"".join(self._delay_buffer))
1195 1197 any_pending = True
1196 self._segmentfile._delay_buffer = self._delay_buffer = None
1198 self._delay_buffer = None
1199 if self.inline:
1200 self._segmentfile._delay_buffer = self._delay_buffer
1201 else:
1202 assert self._segmentfile._delay_buffer is None
1197 1203 self._orig_index_file = self.index_file
1198 1204 self.index_file = pending_index_file
1199 1205 self._segmentfile.filename = self.index_file
1200 1206 return self.index_file, any_pending
1201 1207
1202 1208 def finalize_pending(self):
1203 1209 assert not self.is_open
1204 1210
1205 1211 delay = self._delay_buffer is not None
1206 1212 divert = self._orig_index_file is not None
1207 1213
1208 1214 if delay and divert:
1209 1215 assert False, "unreachable"
1210 1216 elif delay:
1211 1217 if self._delay_buffer:
1212 1218 with self.opener(self.index_file, b'r+') as ifh:
1213 1219 ifh.seek(0, os.SEEK_END)
1214 1220 ifh.write(b"".join(self._delay_buffer))
1215 1221 self._segmentfile._delay_buffer = self._delay_buffer = None
1216 1222 elif divert:
1217 1223 if self.opener.exists(self.index_file):
1218 1224 self.opener.rename(
1219 1225 self.index_file,
1220 1226 self._orig_index_file,
1221 1227 checkambig=True,
1222 1228 )
1223 1229 self.index_file = self._orig_index_file
1224 1230 self._orig_index_file = None
1225 1231 self._segmentfile.filename = self.index_file
1226 1232 else:
1227 1233 msg = b"not delay or divert found on this revlog"
1228 1234 raise error.ProgrammingError(msg)
1229 1235 return self.canonical_index_file
1230 1236
1231 1237
1232 1238 class revlog:
1233 1239 """
1234 1240 the underlying revision storage object
1235 1241
1236 1242 A revlog consists of two parts, an index and the revision data.
1237 1243
1238 1244 The index is a file with a fixed record size containing
1239 1245 information on each revision, including its nodeid (hash), the
1240 1246 nodeids of its parents, the position and offset of its data within
1241 1247 the data file, and the revision it's based on. Finally, each entry
1242 1248 contains a linkrev entry that can serve as a pointer to external
1243 1249 data.
1244 1250
1245 1251 The revision data itself is a linear collection of data chunks.
1246 1252 Each chunk represents a revision and is usually represented as a
1247 1253 delta against the previous chunk. To bound lookup time, runs of
1248 1254 deltas are limited to about 2 times the length of the original
1249 1255 version data. This makes retrieval of a version proportional to
1250 1256 its size, or O(1) relative to the number of revisions.
1251 1257
1252 1258 Both pieces of the revlog are written to in an append-only
1253 1259 fashion, which means we never need to rewrite a file to insert or
1254 1260 remove data, and can use some simple techniques to avoid the need
1255 1261 for locking while reading.
1256 1262
1257 1263 If checkambig, indexfile is opened with checkambig=True at
1258 1264 writing, to avoid file stat ambiguity.
1259 1265
1260 1266 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1261 1267 index will be mmapped rather than read if it is larger than the
1262 1268 configured threshold.
1263 1269
1264 1270 If censorable is True, the revlog can have censored revisions.
1265 1271
1266 1272 If `upperboundcomp` is not None, this is the expected maximal gain from
1267 1273 compression for the data content.
1268 1274
1269 1275 `concurrencychecker` is an optional function that receives 3 arguments: a
1270 1276 file handle, a filename, and an expected position. It should check whether
1271 1277 the current position in the file handle is valid, and log/warn/fail (by
1272 1278 raising).
1273 1279
1274 1280 See mercurial/revlogutils/contants.py for details about the content of an
1275 1281 index entry.
1276 1282 """
1277 1283
1278 1284 _flagserrorclass = error.RevlogError
1279 1285
1280 1286 @staticmethod
1281 1287 def is_inline_index(header_bytes):
1282 1288 """Determine if a revlog is inline from the initial bytes of the index"""
1283 1289 if len(header_bytes) == 0:
1284 1290 return True
1285 1291
1286 1292 header = INDEX_HEADER.unpack(header_bytes)[0]
1287 1293
1288 1294 _format_flags = header & ~0xFFFF
1289 1295 _format_version = header & 0xFFFF
1290 1296
1291 1297 features = FEATURES_BY_VERSION[_format_version]
1292 1298 return features[b'inline'](_format_flags)
1293 1299
1294 1300 def __init__(
1295 1301 self,
1296 1302 opener,
1297 1303 target,
1298 1304 radix,
1299 1305 postfix=None, # only exist for `tmpcensored` now
1300 1306 checkambig=False,
1301 1307 mmaplargeindex=False,
1302 1308 censorable=False,
1303 1309 upperboundcomp=None,
1304 1310 persistentnodemap=False,
1305 1311 concurrencychecker=None,
1306 1312 trypending=False,
1307 1313 try_split=False,
1308 1314 canonical_parent_order=True,
1309 1315 data_config=None,
1310 1316 delta_config=None,
1311 1317 feature_config=None,
1312 1318 may_inline=True, # may inline new revlog
1313 1319 ):
1314 1320 """
1315 1321 create a revlog object
1316 1322
1317 1323 opener is a function that abstracts the file opening operation
1318 1324 and can be used to implement COW semantics or the like.
1319 1325
1320 1326 `target`: a (KIND, ID) tuple that identify the content stored in
1321 1327 this revlog. It help the rest of the code to understand what the revlog
1322 1328 is about without having to resort to heuristic and index filename
1323 1329 analysis. Note: that this must be reliably be set by normal code, but
1324 1330 that test, debug, or performance measurement code might not set this to
1325 1331 accurate value.
1326 1332 """
1327 1333
1328 1334 self.radix = radix
1329 1335
1330 1336 self._docket_file = None
1331 1337 self._indexfile = None
1332 1338 self._datafile = None
1333 1339 self._sidedatafile = None
1334 1340 self._nodemap_file = None
1335 1341 self.postfix = postfix
1336 1342 self._trypending = trypending
1337 1343 self._try_split = try_split
1338 1344 self._may_inline = may_inline
1339 1345 self.opener = opener
1340 1346 if persistentnodemap:
1341 1347 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1342 1348
1343 1349 assert target[0] in ALL_KINDS
1344 1350 assert len(target) == 2
1345 1351 self.target = target
1346 1352 if feature_config is not None:
1347 1353 self.feature_config = feature_config.copy()
1348 1354 elif b'feature-config' in self.opener.options:
1349 1355 self.feature_config = self.opener.options[b'feature-config'].copy()
1350 1356 else:
1351 1357 self.feature_config = FeatureConfig()
1352 1358 self.feature_config.censorable = censorable
1353 1359 self.feature_config.canonical_parent_order = canonical_parent_order
1354 1360 if data_config is not None:
1355 1361 self.data_config = data_config.copy()
1356 1362 elif b'data-config' in self.opener.options:
1357 1363 self.data_config = self.opener.options[b'data-config'].copy()
1358 1364 else:
1359 1365 self.data_config = DataConfig()
1360 1366 self.data_config.check_ambig = checkambig
1361 1367 self.data_config.mmap_large_index = mmaplargeindex
1362 1368 if delta_config is not None:
1363 1369 self.delta_config = delta_config.copy()
1364 1370 elif b'delta-config' in self.opener.options:
1365 1371 self.delta_config = self.opener.options[b'delta-config'].copy()
1366 1372 else:
1367 1373 self.delta_config = DeltaConfig()
1368 1374 self.delta_config.upper_bound_comp = upperboundcomp
1369 1375
1370 1376 # Maps rev to chain base rev.
1371 1377 self._chainbasecache = util.lrucachedict(100)
1372 1378
1373 1379 self.index = None
1374 1380 self._docket = None
1375 1381 self._nodemap_docket = None
1376 1382 # Mapping of partial identifiers to full nodes.
1377 1383 self._pcache = {}
1378 1384
1379 1385 # other optionnals features
1380 1386
1381 1387 # Make copy of flag processors so each revlog instance can support
1382 1388 # custom flags.
1383 1389 self._flagprocessors = dict(flagutil.flagprocessors)
1384 1390 # prevent nesting of addgroup
1385 1391 self._adding_group = None
1386 1392
1387 1393 chunk_cache = self._loadindex()
1388 1394 self._load_inner(chunk_cache)
1389 1395 self._concurrencychecker = concurrencychecker
1390 1396
1391 1397 @property
1392 1398 def _generaldelta(self):
1393 1399 """temporary compatibility proxy"""
1394 1400 util.nouideprecwarn(
1395 1401 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1396 1402 )
1397 1403 return self.delta_config.general_delta
1398 1404
1399 1405 @property
1400 1406 def _checkambig(self):
1401 1407 """temporary compatibility proxy"""
1402 1408 util.nouideprecwarn(
1403 1409 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1404 1410 )
1405 1411 return self.data_config.check_ambig
1406 1412
1407 1413 @property
1408 1414 def _mmaplargeindex(self):
1409 1415 """temporary compatibility proxy"""
1410 1416 util.nouideprecwarn(
1411 1417 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1412 1418 )
1413 1419 return self.data_config.mmap_large_index
1414 1420
1415 1421 @property
1416 1422 def _censorable(self):
1417 1423 """temporary compatibility proxy"""
1418 1424 util.nouideprecwarn(
1419 1425 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1420 1426 )
1421 1427 return self.feature_config.censorable
1422 1428
1423 1429 @property
1424 1430 def _chunkcachesize(self):
1425 1431 """temporary compatibility proxy"""
1426 1432 util.nouideprecwarn(
1427 1433 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1428 1434 )
1429 1435 return self.data_config.chunk_cache_size
1430 1436
1431 1437 @property
1432 1438 def _maxchainlen(self):
1433 1439 """temporary compatibility proxy"""
1434 1440 util.nouideprecwarn(
1435 1441 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1436 1442 )
1437 1443 return self.delta_config.max_chain_len
1438 1444
1439 1445 @property
1440 1446 def _deltabothparents(self):
1441 1447 """temporary compatibility proxy"""
1442 1448 util.nouideprecwarn(
1443 1449 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1444 1450 )
1445 1451 return self.delta_config.delta_both_parents
1446 1452
1447 1453 @property
1448 1454 def _candidate_group_chunk_size(self):
1449 1455 """temporary compatibility proxy"""
1450 1456 util.nouideprecwarn(
1451 1457 b"use revlog.delta_config.candidate_group_chunk_size",
1452 1458 b"6.6",
1453 1459 stacklevel=2,
1454 1460 )
1455 1461 return self.delta_config.candidate_group_chunk_size
1456 1462
1457 1463 @property
1458 1464 def _debug_delta(self):
1459 1465 """temporary compatibility proxy"""
1460 1466 util.nouideprecwarn(
1461 1467 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1462 1468 )
1463 1469 return self.delta_config.debug_delta
1464 1470
1465 1471 @property
1466 1472 def _compengine(self):
1467 1473 """temporary compatibility proxy"""
1468 1474 util.nouideprecwarn(
1469 1475 b"use revlog.feature_config.compression_engine",
1470 1476 b"6.6",
1471 1477 stacklevel=2,
1472 1478 )
1473 1479 return self.feature_config.compression_engine
1474 1480
1475 1481 @property
1476 1482 def upperboundcomp(self):
1477 1483 """temporary compatibility proxy"""
1478 1484 util.nouideprecwarn(
1479 1485 b"use revlog.delta_config.upper_bound_comp",
1480 1486 b"6.6",
1481 1487 stacklevel=2,
1482 1488 )
1483 1489 return self.delta_config.upper_bound_comp
1484 1490
1485 1491 @property
1486 1492 def _compengineopts(self):
1487 1493 """temporary compatibility proxy"""
1488 1494 util.nouideprecwarn(
1489 1495 b"use revlog.feature_config.compression_engine_options",
1490 1496 b"6.6",
1491 1497 stacklevel=2,
1492 1498 )
1493 1499 return self.feature_config.compression_engine_options
1494 1500
1495 1501 @property
1496 1502 def _maxdeltachainspan(self):
1497 1503 """temporary compatibility proxy"""
1498 1504 util.nouideprecwarn(
1499 1505 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1500 1506 )
1501 1507 return self.delta_config.max_deltachain_span
1502 1508
1503 1509 @property
1504 1510 def _withsparseread(self):
1505 1511 """temporary compatibility proxy"""
1506 1512 util.nouideprecwarn(
1507 1513 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1508 1514 )
1509 1515 return self.data_config.with_sparse_read
1510 1516
1511 1517 @property
1512 1518 def _sparserevlog(self):
1513 1519 """temporary compatibility proxy"""
1514 1520 util.nouideprecwarn(
1515 1521 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1516 1522 )
1517 1523 return self.delta_config.sparse_revlog
1518 1524
1519 1525 @property
1520 1526 def hassidedata(self):
1521 1527 """temporary compatibility proxy"""
1522 1528 util.nouideprecwarn(
1523 1529 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1524 1530 )
1525 1531 return self.feature_config.has_side_data
1526 1532
1527 1533 @property
1528 1534 def _srdensitythreshold(self):
1529 1535 """temporary compatibility proxy"""
1530 1536 util.nouideprecwarn(
1531 1537 b"use revlog.data_config.sr_density_threshold",
1532 1538 b"6.6",
1533 1539 stacklevel=2,
1534 1540 )
1535 1541 return self.data_config.sr_density_threshold
1536 1542
1537 1543 @property
1538 1544 def _srmingapsize(self):
1539 1545 """temporary compatibility proxy"""
1540 1546 util.nouideprecwarn(
1541 1547 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1542 1548 )
1543 1549 return self.data_config.sr_min_gap_size
1544 1550
1545 1551 @property
1546 1552 def _compute_rank(self):
1547 1553 """temporary compatibility proxy"""
1548 1554 util.nouideprecwarn(
1549 1555 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1550 1556 )
1551 1557 return self.feature_config.compute_rank
1552 1558
1553 1559 @property
1554 1560 def canonical_parent_order(self):
1555 1561 """temporary compatibility proxy"""
1556 1562 util.nouideprecwarn(
1557 1563 b"use revlog.feature_config.canonical_parent_order",
1558 1564 b"6.6",
1559 1565 stacklevel=2,
1560 1566 )
1561 1567 return self.feature_config.canonical_parent_order
1562 1568
1563 1569 @property
1564 1570 def _lazydelta(self):
1565 1571 """temporary compatibility proxy"""
1566 1572 util.nouideprecwarn(
1567 1573 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1568 1574 )
1569 1575 return self.delta_config.lazy_delta
1570 1576
1571 1577 @property
1572 1578 def _lazydeltabase(self):
1573 1579 """temporary compatibility proxy"""
1574 1580 util.nouideprecwarn(
1575 1581 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1576 1582 )
1577 1583 return self.delta_config.lazy_delta_base
1578 1584
1579 1585 def _init_opts(self):
1580 1586 """process options (from above/config) to setup associated default revlog mode
1581 1587
1582 1588 These values might be affected when actually reading on disk information.
1583 1589
1584 1590 The relevant values are returned for use in _loadindex().
1585 1591
1586 1592 * newversionflags:
1587 1593 version header to use if we need to create a new revlog
1588 1594
1589 1595 * mmapindexthreshold:
1590 1596 minimal index size for start to use mmap
1591 1597
1592 1598 * force_nodemap:
1593 1599 force the usage of a "development" version of the nodemap code
1594 1600 """
1595 1601 opts = self.opener.options
1596 1602
1597 1603 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1598 1604 new_header = CHANGELOGV2
1599 1605 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1600 1606 self.feature_config.compute_rank = compute_rank
1601 1607 elif b'revlogv2' in opts:
1602 1608 new_header = REVLOGV2
1603 1609 elif b'revlogv1' in opts:
1604 1610 new_header = REVLOGV1
1605 1611 if self._may_inline:
1606 1612 new_header |= FLAG_INLINE_DATA
1607 1613 if b'generaldelta' in opts:
1608 1614 new_header |= FLAG_GENERALDELTA
1609 1615 elif b'revlogv0' in self.opener.options:
1610 1616 new_header = REVLOGV0
1611 1617 else:
1612 1618 new_header = REVLOG_DEFAULT_VERSION
1613 1619
1614 1620 mmapindexthreshold = None
1615 1621 if self.data_config.mmap_large_index:
1616 1622 mmapindexthreshold = self.data_config.mmap_index_threshold
1617 1623 if self.feature_config.enable_ellipsis:
1618 1624 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1619 1625
1620 1626 # revlog v0 doesn't have flag processors
1621 1627 for flag, processor in opts.get(b'flagprocessors', {}).items():
1622 1628 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1623 1629
1624 1630 chunk_cache_size = self.data_config.chunk_cache_size
1625 1631 if chunk_cache_size <= 0:
1626 1632 raise error.RevlogError(
1627 1633 _(b'revlog chunk cache size %r is not greater than 0')
1628 1634 % chunk_cache_size
1629 1635 )
1630 1636 elif chunk_cache_size & (chunk_cache_size - 1):
1631 1637 raise error.RevlogError(
1632 1638 _(b'revlog chunk cache size %r is not a power of 2')
1633 1639 % chunk_cache_size
1634 1640 )
1635 1641 force_nodemap = opts.get(b'devel-force-nodemap', False)
1636 1642 return new_header, mmapindexthreshold, force_nodemap
1637 1643
1638 1644 def _get_data(self, filepath, mmap_threshold, size=None):
1639 1645 """return a file content with or without mmap
1640 1646
1641 1647 If the file is missing return the empty string"""
1642 1648 try:
1643 1649 with self.opener(filepath) as fp:
1644 1650 if mmap_threshold is not None:
1645 1651 file_size = self.opener.fstat(fp).st_size
1646 1652 if file_size >= mmap_threshold:
1647 1653 if size is not None:
1648 1654 # avoid potentiel mmap crash
1649 1655 size = min(file_size, size)
1650 1656 # TODO: should .close() to release resources without
1651 1657 # relying on Python GC
1652 1658 if size is None:
1653 1659 return util.buffer(util.mmapread(fp))
1654 1660 else:
1655 1661 return util.buffer(util.mmapread(fp, size))
1656 1662 if size is None:
1657 1663 return fp.read()
1658 1664 else:
1659 1665 return fp.read(size)
1660 1666 except FileNotFoundError:
1661 1667 return b''
1662 1668
1663 1669 def get_streams(self, max_linkrev, force_inline=False):
1664 1670 """return a list of streams that represent this revlog
1665 1671
1666 1672 This is used by stream-clone to do bytes to bytes copies of a repository.
1667 1673
1668 1674 This streams data for all revisions that refer to a changelog revision up
1669 1675 to `max_linkrev`.
1670 1676
1671 1677 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1672 1678
1673 1679 It returns is a list of three-tuple:
1674 1680
1675 1681 [
1676 1682 (filename, bytes_stream, stream_size),
1677 1683 …
1678 1684 ]
1679 1685 """
1680 1686 n = len(self)
1681 1687 index = self.index
1682 1688 while n > 0:
1683 1689 linkrev = index[n - 1][4]
1684 1690 if linkrev < max_linkrev:
1685 1691 break
1686 1692 # note: this loop will rarely go through multiple iterations, since
1687 1693 # it only traverses commits created during the current streaming
1688 1694 # pull operation.
1689 1695 #
1690 1696 # If this become a problem, using a binary search should cap the
1691 1697 # runtime of this.
1692 1698 n = n - 1
1693 1699 if n == 0:
1694 1700 # no data to send
1695 1701 return []
1696 1702 index_size = n * index.entry_size
1697 1703 data_size = self.end(n - 1)
1698 1704
1699 1705 # XXX we might have been split (or stripped) since the object
1700 1706 # initialization, We need to close this race too, but having a way to
1701 1707 # pre-open the file we feed to the revlog and never closing them before
1702 1708 # we are done streaming.
1703 1709
1704 1710 if self._inline:
1705 1711
1706 1712 def get_stream():
1707 1713 with self.opener(self._indexfile, mode=b"r") as fp:
1708 1714 yield None
1709 1715 size = index_size + data_size
1710 1716 if size <= 65536:
1711 1717 yield fp.read(size)
1712 1718 else:
1713 1719 yield from util.filechunkiter(fp, limit=size)
1714 1720
1715 1721 inline_stream = get_stream()
1716 1722 next(inline_stream)
1717 1723 return [
1718 1724 (self._indexfile, inline_stream, index_size + data_size),
1719 1725 ]
1720 1726 elif force_inline:
1721 1727
1722 1728 def get_stream():
1723 1729 with self.reading():
1724 1730 yield None
1725 1731
1726 1732 for rev in range(n):
1727 1733 idx = self.index.entry_binary(rev)
1728 1734 if rev == 0 and self._docket is None:
1729 1735 # re-inject the inline flag
1730 1736 header = self._format_flags
1731 1737 header |= self._format_version
1732 1738 header |= FLAG_INLINE_DATA
1733 1739 header = self.index.pack_header(header)
1734 1740 idx = header + idx
1735 1741 yield idx
1736 1742 yield self._inner.get_segment_for_revs(rev, rev)[1]
1737 1743
1738 1744 inline_stream = get_stream()
1739 1745 next(inline_stream)
1740 1746 return [
1741 1747 (self._indexfile, inline_stream, index_size + data_size),
1742 1748 ]
1743 1749 else:
1744 1750
1745 1751 def get_index_stream():
1746 1752 with self.opener(self._indexfile, mode=b"r") as fp:
1747 1753 yield None
1748 1754 if index_size <= 65536:
1749 1755 yield fp.read(index_size)
1750 1756 else:
1751 1757 yield from util.filechunkiter(fp, limit=index_size)
1752 1758
1753 1759 def get_data_stream():
1754 1760 with self._datafp() as fp:
1755 1761 yield None
1756 1762 if data_size <= 65536:
1757 1763 yield fp.read(data_size)
1758 1764 else:
1759 1765 yield from util.filechunkiter(fp, limit=data_size)
1760 1766
1761 1767 index_stream = get_index_stream()
1762 1768 next(index_stream)
1763 1769 data_stream = get_data_stream()
1764 1770 next(data_stream)
1765 1771 return [
1766 1772 (self._datafile, data_stream, data_size),
1767 1773 (self._indexfile, index_stream, index_size),
1768 1774 ]
1769 1775
1770 1776 def _loadindex(self, docket=None):
1771 1777
1772 1778 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1773 1779
1774 1780 if self.postfix is not None:
1775 1781 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1776 1782 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1777 1783 entry_point = b'%s.i.a' % self.radix
1778 1784 elif self._try_split and self.opener.exists(self._split_index_file):
1779 1785 entry_point = self._split_index_file
1780 1786 else:
1781 1787 entry_point = b'%s.i' % self.radix
1782 1788
1783 1789 if docket is not None:
1784 1790 self._docket = docket
1785 1791 self._docket_file = entry_point
1786 1792 else:
1787 1793 self._initempty = True
1788 1794 entry_data = self._get_data(entry_point, mmapindexthreshold)
1789 1795 if len(entry_data) > 0:
1790 1796 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1791 1797 self._initempty = False
1792 1798 else:
1793 1799 header = new_header
1794 1800
1795 1801 self._format_flags = header & ~0xFFFF
1796 1802 self._format_version = header & 0xFFFF
1797 1803
1798 1804 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1799 1805 if supported_flags is None:
1800 1806 msg = _(b'unknown version (%d) in revlog %s')
1801 1807 msg %= (self._format_version, self.display_id)
1802 1808 raise error.RevlogError(msg)
1803 1809 elif self._format_flags & ~supported_flags:
1804 1810 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1805 1811 display_flag = self._format_flags >> 16
1806 1812 msg %= (display_flag, self._format_version, self.display_id)
1807 1813 raise error.RevlogError(msg)
1808 1814
1809 1815 features = FEATURES_BY_VERSION[self._format_version]
1810 1816 self._inline = features[b'inline'](self._format_flags)
1811 1817 self.delta_config.general_delta = features[b'generaldelta'](
1812 1818 self._format_flags
1813 1819 )
1814 1820 self.feature_config.has_side_data = features[b'sidedata']
1815 1821
1816 1822 if not features[b'docket']:
1817 1823 self._indexfile = entry_point
1818 1824 index_data = entry_data
1819 1825 else:
1820 1826 self._docket_file = entry_point
1821 1827 if self._initempty:
1822 1828 self._docket = docketutil.default_docket(self, header)
1823 1829 else:
1824 1830 self._docket = docketutil.parse_docket(
1825 1831 self, entry_data, use_pending=self._trypending
1826 1832 )
1827 1833
1828 1834 if self._docket is not None:
1829 1835 self._indexfile = self._docket.index_filepath()
1830 1836 index_data = b''
1831 1837 index_size = self._docket.index_end
1832 1838 if index_size > 0:
1833 1839 index_data = self._get_data(
1834 1840 self._indexfile, mmapindexthreshold, size=index_size
1835 1841 )
1836 1842 if len(index_data) < index_size:
1837 1843 msg = _(b'too few index data for %s: got %d, expected %d')
1838 1844 msg %= (self.display_id, len(index_data), index_size)
1839 1845 raise error.RevlogError(msg)
1840 1846
1841 1847 self._inline = False
1842 1848 # generaldelta implied by version 2 revlogs.
1843 1849 self.delta_config.general_delta = True
1844 1850 # the logic for persistent nodemap will be dealt with within the
1845 1851 # main docket, so disable it for now.
1846 1852 self._nodemap_file = None
1847 1853
1848 1854 if self._docket is not None:
1849 1855 self._datafile = self._docket.data_filepath()
1850 1856 self._sidedatafile = self._docket.sidedata_filepath()
1851 1857 elif self.postfix is None:
1852 1858 self._datafile = b'%s.d' % self.radix
1853 1859 else:
1854 1860 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1855 1861
1856 1862 self.nodeconstants = sha1nodeconstants
1857 1863 self.nullid = self.nodeconstants.nullid
1858 1864
1859 1865 # sparse-revlog can't be on without general-delta (issue6056)
1860 1866 if not self.delta_config.general_delta:
1861 1867 self.delta_config.sparse_revlog = False
1862 1868
1863 1869 self._storedeltachains = True
1864 1870
1865 1871 devel_nodemap = (
1866 1872 self._nodemap_file
1867 1873 and force_nodemap
1868 1874 and parse_index_v1_nodemap is not None
1869 1875 )
1870 1876
1871 1877 use_rust_index = False
1872 1878 if rustrevlog is not None:
1873 1879 if self._nodemap_file is not None:
1874 1880 use_rust_index = True
1875 1881 else:
1876 1882 use_rust_index = self.opener.options.get(b'rust.index')
1877 1883
1878 1884 self._parse_index = parse_index_v1
1879 1885 if self._format_version == REVLOGV0:
1880 1886 self._parse_index = revlogv0.parse_index_v0
1881 1887 elif self._format_version == REVLOGV2:
1882 1888 self._parse_index = parse_index_v2
1883 1889 elif self._format_version == CHANGELOGV2:
1884 1890 self._parse_index = parse_index_cl_v2
1885 1891 elif devel_nodemap:
1886 1892 self._parse_index = parse_index_v1_nodemap
1887 1893 elif use_rust_index:
1888 1894 self._parse_index = parse_index_v1_mixed
1889 1895 try:
1890 1896 d = self._parse_index(index_data, self._inline)
1891 1897 index, chunkcache = d
1892 1898 use_nodemap = (
1893 1899 not self._inline
1894 1900 and self._nodemap_file is not None
1895 1901 and hasattr(index, 'update_nodemap_data')
1896 1902 )
1897 1903 if use_nodemap:
1898 1904 nodemap_data = nodemaputil.persisted_data(self)
1899 1905 if nodemap_data is not None:
1900 1906 docket = nodemap_data[0]
1901 1907 if (
1902 1908 len(d[0]) > docket.tip_rev
1903 1909 and d[0][docket.tip_rev][7] == docket.tip_node
1904 1910 ):
1905 1911 # no changelog tampering
1906 1912 self._nodemap_docket = docket
1907 1913 index.update_nodemap_data(*nodemap_data)
1908 1914 except (ValueError, IndexError):
1909 1915 raise error.RevlogError(
1910 1916 _(b"index %s is corrupted") % self.display_id
1911 1917 )
1912 1918 self.index = index
1913 1919 # revnum -> (chain-length, sum-delta-length)
1914 1920 self._chaininfocache = util.lrucachedict(500)
1915 1921
1916 1922 return chunkcache
1917 1923
1918 1924 def _load_inner(self, chunk_cache):
1919 1925 if self._docket is None:
1920 1926 default_compression_header = None
1921 1927 else:
1922 1928 default_compression_header = self._docket.default_compression_header
1923 1929
1924 1930 self._inner = _InnerRevlog(
1925 1931 opener=self.opener,
1926 1932 index=self.index,
1927 1933 index_file=self._indexfile,
1928 1934 data_file=self._datafile,
1929 1935 sidedata_file=self._sidedatafile,
1930 1936 inline=self._inline,
1931 1937 data_config=self.data_config,
1932 1938 delta_config=self.delta_config,
1933 1939 feature_config=self.feature_config,
1934 1940 chunk_cache=chunk_cache,
1935 1941 default_compression_header=default_compression_header,
1936 1942 )
1937 1943
1938 1944 def get_revlog(self):
1939 1945 """simple function to mirror API of other not-really-revlog API"""
1940 1946 return self
1941 1947
1942 1948 @util.propertycache
1943 1949 def revlog_kind(self):
1944 1950 return self.target[0]
1945 1951
1946 1952 @util.propertycache
1947 1953 def display_id(self):
1948 1954 """The public facing "ID" of the revlog that we use in message"""
1949 1955 if self.revlog_kind == KIND_FILELOG:
1950 1956 # Reference the file without the "data/" prefix, so it is familiar
1951 1957 # to the user.
1952 1958 return self.target[1]
1953 1959 else:
1954 1960 return self.radix
1955 1961
1956 1962 def _datafp(self, mode=b'r'):
1957 1963 """file object for the revlog's data file"""
1958 1964 return self.opener(self._datafile, mode=mode)
1959 1965
1960 1966 def tiprev(self):
1961 1967 return len(self.index) - 1
1962 1968
1963 1969 def tip(self):
1964 1970 return self.node(self.tiprev())
1965 1971
1966 1972 def __contains__(self, rev):
1967 1973 return 0 <= rev < len(self)
1968 1974
1969 1975 def __len__(self):
1970 1976 return len(self.index)
1971 1977
1972 1978 def __iter__(self):
1973 1979 return iter(range(len(self)))
1974 1980
1975 1981 def revs(self, start=0, stop=None):
1976 1982 """iterate over all rev in this revlog (from start to stop)"""
1977 1983 return storageutil.iterrevs(len(self), start=start, stop=stop)
1978 1984
1979 1985 def hasnode(self, node):
1980 1986 try:
1981 1987 self.rev(node)
1982 1988 return True
1983 1989 except KeyError:
1984 1990 return False
1985 1991
1986 1992 def _candelta(self, baserev, rev):
1987 1993 """whether two revisions (baserev, rev) can be delta-ed or not"""
1988 1994 # Disable delta if either rev requires a content-changing flag
1989 1995 # processor (ex. LFS). This is because such flag processor can alter
1990 1996 # the rawtext content that the delta will be based on, and two clients
1991 1997 # could have a same revlog node with different flags (i.e. different
1992 1998 # rawtext contents) and the delta could be incompatible.
1993 1999 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1994 2000 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1995 2001 ):
1996 2002 return False
1997 2003 return True
1998 2004
1999 2005 def update_caches(self, transaction):
2000 2006 """update on disk cache
2001 2007
2002 2008 If a transaction is passed, the update may be delayed to transaction
2003 2009 commit."""
2004 2010 if self._nodemap_file is not None:
2005 2011 if transaction is None:
2006 2012 nodemaputil.update_persistent_nodemap(self)
2007 2013 else:
2008 2014 nodemaputil.setup_persistent_nodemap(transaction, self)
2009 2015
2010 2016 def clearcaches(self):
2011 2017 """Clear in-memory caches"""
2012 2018 self._chainbasecache.clear()
2013 2019 self._inner.clear_cache()
2014 2020 self._pcache = {}
2015 2021 self._nodemap_docket = None
2016 2022 self.index.clearcaches()
2017 2023 # The python code is the one responsible for validating the docket, we
2018 2024 # end up having to refresh it here.
2019 2025 use_nodemap = (
2020 2026 not self._inline
2021 2027 and self._nodemap_file is not None
2022 2028 and hasattr(self.index, 'update_nodemap_data')
2023 2029 )
2024 2030 if use_nodemap:
2025 2031 nodemap_data = nodemaputil.persisted_data(self)
2026 2032 if nodemap_data is not None:
2027 2033 self._nodemap_docket = nodemap_data[0]
2028 2034 self.index.update_nodemap_data(*nodemap_data)
2029 2035
2030 2036 def rev(self, node):
2031 2037 """return the revision number associated with a <nodeid>"""
2032 2038 try:
2033 2039 return self.index.rev(node)
2034 2040 except TypeError:
2035 2041 raise
2036 2042 except error.RevlogError:
2037 2043 # parsers.c radix tree lookup failed
2038 2044 if (
2039 2045 node == self.nodeconstants.wdirid
2040 2046 or node in self.nodeconstants.wdirfilenodeids
2041 2047 ):
2042 2048 raise error.WdirUnsupported
2043 2049 raise error.LookupError(node, self.display_id, _(b'no node'))
2044 2050
2045 2051 # Accessors for index entries.
2046 2052
2047 2053 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
2048 2054 # are flags.
2049 2055 def start(self, rev):
2050 2056 return int(self.index[rev][0] >> 16)
2051 2057
2052 2058 def sidedata_cut_off(self, rev):
2053 2059 sd_cut_off = self.index[rev][8]
2054 2060 if sd_cut_off != 0:
2055 2061 return sd_cut_off
2056 2062 # This is some annoying dance, because entries without sidedata
2057 2063 # currently use 0 as their ofsset. (instead of previous-offset +
2058 2064 # previous-size)
2059 2065 #
2060 2066 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
2061 2067 # In the meantime, we need this.
2062 2068 while 0 <= rev:
2063 2069 e = self.index[rev]
2064 2070 if e[9] != 0:
2065 2071 return e[8] + e[9]
2066 2072 rev -= 1
2067 2073 return 0
2068 2074
2069 2075 def flags(self, rev):
2070 2076 return self.index[rev][0] & 0xFFFF
2071 2077
2072 2078 def length(self, rev):
2073 2079 return self.index[rev][1]
2074 2080
2075 2081 def sidedata_length(self, rev):
2076 2082 if not self.feature_config.has_side_data:
2077 2083 return 0
2078 2084 return self.index[rev][9]
2079 2085
2080 2086 def rawsize(self, rev):
2081 2087 """return the length of the uncompressed text for a given revision"""
2082 2088 l = self.index[rev][2]
2083 2089 if l >= 0:
2084 2090 return l
2085 2091
2086 2092 t = self.rawdata(rev)
2087 2093 return len(t)
2088 2094
2089 2095 def size(self, rev):
2090 2096 """length of non-raw text (processed by a "read" flag processor)"""
2091 2097 # fast path: if no "read" flag processor could change the content,
2092 2098 # size is rawsize. note: ELLIPSIS is known to not change the content.
2093 2099 flags = self.flags(rev)
2094 2100 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2095 2101 return self.rawsize(rev)
2096 2102
2097 2103 return len(self.revision(rev))
2098 2104
2099 2105 def fast_rank(self, rev):
2100 2106 """Return the rank of a revision if already known, or None otherwise.
2101 2107
2102 2108 The rank of a revision is the size of the sub-graph it defines as a
2103 2109 head. Equivalently, the rank of a revision `r` is the size of the set
2104 2110 `ancestors(r)`, `r` included.
2105 2111
2106 2112 This method returns the rank retrieved from the revlog in constant
2107 2113 time. It makes no attempt at computing unknown values for versions of
2108 2114 the revlog which do not persist the rank.
2109 2115 """
2110 2116 rank = self.index[rev][ENTRY_RANK]
2111 2117 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2112 2118 return None
2113 2119 if rev == nullrev:
2114 2120 return 0 # convention
2115 2121 return rank
2116 2122
2117 2123 def chainbase(self, rev):
2118 2124 base = self._chainbasecache.get(rev)
2119 2125 if base is not None:
2120 2126 return base
2121 2127
2122 2128 index = self.index
2123 2129 iterrev = rev
2124 2130 base = index[iterrev][3]
2125 2131 while base != iterrev:
2126 2132 iterrev = base
2127 2133 base = index[iterrev][3]
2128 2134
2129 2135 self._chainbasecache[rev] = base
2130 2136 return base
2131 2137
2132 2138 def linkrev(self, rev):
2133 2139 return self.index[rev][4]
2134 2140
2135 2141 def parentrevs(self, rev):
2136 2142 try:
2137 2143 entry = self.index[rev]
2138 2144 except IndexError:
2139 2145 if rev == wdirrev:
2140 2146 raise error.WdirUnsupported
2141 2147 raise
2142 2148
2143 2149 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2144 2150 return entry[6], entry[5]
2145 2151 else:
2146 2152 return entry[5], entry[6]
2147 2153
2148 2154 # fast parentrevs(rev) where rev isn't filtered
2149 2155 _uncheckedparentrevs = parentrevs
2150 2156
2151 2157 def node(self, rev):
2152 2158 try:
2153 2159 return self.index[rev][7]
2154 2160 except IndexError:
2155 2161 if rev == wdirrev:
2156 2162 raise error.WdirUnsupported
2157 2163 raise
2158 2164
2159 2165 # Derived from index values.
2160 2166
2161 2167 def end(self, rev):
2162 2168 return self.start(rev) + self.length(rev)
2163 2169
2164 2170 def parents(self, node):
2165 2171 i = self.index
2166 2172 d = i[self.rev(node)]
2167 2173 # inline node() to avoid function call overhead
2168 2174 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2169 2175 return i[d[6]][7], i[d[5]][7]
2170 2176 else:
2171 2177 return i[d[5]][7], i[d[6]][7]
2172 2178
2173 2179 def chainlen(self, rev):
2174 2180 return self._chaininfo(rev)[0]
2175 2181
2176 2182 def _chaininfo(self, rev):
2177 2183 chaininfocache = self._chaininfocache
2178 2184 if rev in chaininfocache:
2179 2185 return chaininfocache[rev]
2180 2186 index = self.index
2181 2187 generaldelta = self.delta_config.general_delta
2182 2188 iterrev = rev
2183 2189 e = index[iterrev]
2184 2190 clen = 0
2185 2191 compresseddeltalen = 0
2186 2192 while iterrev != e[3]:
2187 2193 clen += 1
2188 2194 compresseddeltalen += e[1]
2189 2195 if generaldelta:
2190 2196 iterrev = e[3]
2191 2197 else:
2192 2198 iterrev -= 1
2193 2199 if iterrev in chaininfocache:
2194 2200 t = chaininfocache[iterrev]
2195 2201 clen += t[0]
2196 2202 compresseddeltalen += t[1]
2197 2203 break
2198 2204 e = index[iterrev]
2199 2205 else:
2200 2206 # Add text length of base since decompressing that also takes
2201 2207 # work. For cache hits the length is already included.
2202 2208 compresseddeltalen += e[1]
2203 2209 r = (clen, compresseddeltalen)
2204 2210 chaininfocache[rev] = r
2205 2211 return r
2206 2212
2207 2213 def _deltachain(self, rev, stoprev=None):
2208 2214 return self._inner._deltachain(rev, stoprev=stoprev)
2209 2215
2210 2216 def ancestors(self, revs, stoprev=0, inclusive=False):
2211 2217 """Generate the ancestors of 'revs' in reverse revision order.
2212 2218 Does not generate revs lower than stoprev.
2213 2219
2214 2220 See the documentation for ancestor.lazyancestors for more details."""
2215 2221
2216 2222 # first, make sure start revisions aren't filtered
2217 2223 revs = list(revs)
2218 2224 checkrev = self.node
2219 2225 for r in revs:
2220 2226 checkrev(r)
2221 2227 # and we're sure ancestors aren't filtered as well
2222 2228
2223 2229 if rustancestor is not None and self.index.rust_ext_compat:
2224 2230 lazyancestors = rustancestor.LazyAncestors
2225 2231 arg = self.index
2226 2232 else:
2227 2233 lazyancestors = ancestor.lazyancestors
2228 2234 arg = self._uncheckedparentrevs
2229 2235 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2230 2236
2231 2237 def descendants(self, revs):
2232 2238 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2233 2239
2234 2240 def findcommonmissing(self, common=None, heads=None):
2235 2241 """Return a tuple of the ancestors of common and the ancestors of heads
2236 2242 that are not ancestors of common. In revset terminology, we return the
2237 2243 tuple:
2238 2244
2239 2245 ::common, (::heads) - (::common)
2240 2246
2241 2247 The list is sorted by revision number, meaning it is
2242 2248 topologically sorted.
2243 2249
2244 2250 'heads' and 'common' are both lists of node IDs. If heads is
2245 2251 not supplied, uses all of the revlog's heads. If common is not
2246 2252 supplied, uses nullid."""
2247 2253 if common is None:
2248 2254 common = [self.nullid]
2249 2255 if heads is None:
2250 2256 heads = self.heads()
2251 2257
2252 2258 common = [self.rev(n) for n in common]
2253 2259 heads = [self.rev(n) for n in heads]
2254 2260
2255 2261 # we want the ancestors, but inclusive
2256 2262 class lazyset:
2257 2263 def __init__(self, lazyvalues):
2258 2264 self.addedvalues = set()
2259 2265 self.lazyvalues = lazyvalues
2260 2266
2261 2267 def __contains__(self, value):
2262 2268 return value in self.addedvalues or value in self.lazyvalues
2263 2269
2264 2270 def __iter__(self):
2265 2271 added = self.addedvalues
2266 2272 for r in added:
2267 2273 yield r
2268 2274 for r in self.lazyvalues:
2269 2275 if not r in added:
2270 2276 yield r
2271 2277
2272 2278 def add(self, value):
2273 2279 self.addedvalues.add(value)
2274 2280
2275 2281 def update(self, values):
2276 2282 self.addedvalues.update(values)
2277 2283
2278 2284 has = lazyset(self.ancestors(common))
2279 2285 has.add(nullrev)
2280 2286 has.update(common)
2281 2287
2282 2288 # take all ancestors from heads that aren't in has
2283 2289 missing = set()
2284 2290 visit = collections.deque(r for r in heads if r not in has)
2285 2291 while visit:
2286 2292 r = visit.popleft()
2287 2293 if r in missing:
2288 2294 continue
2289 2295 else:
2290 2296 missing.add(r)
2291 2297 for p in self.parentrevs(r):
2292 2298 if p not in has:
2293 2299 visit.append(p)
2294 2300 missing = list(missing)
2295 2301 missing.sort()
2296 2302 return has, [self.node(miss) for miss in missing]
2297 2303
2298 2304 def incrementalmissingrevs(self, common=None):
2299 2305 """Return an object that can be used to incrementally compute the
2300 2306 revision numbers of the ancestors of arbitrary sets that are not
2301 2307 ancestors of common. This is an ancestor.incrementalmissingancestors
2302 2308 object.
2303 2309
2304 2310 'common' is a list of revision numbers. If common is not supplied, uses
2305 2311 nullrev.
2306 2312 """
2307 2313 if common is None:
2308 2314 common = [nullrev]
2309 2315
2310 2316 if rustancestor is not None and self.index.rust_ext_compat:
2311 2317 return rustancestor.MissingAncestors(self.index, common)
2312 2318 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2313 2319
2314 2320 def findmissingrevs(self, common=None, heads=None):
2315 2321 """Return the revision numbers of the ancestors of heads that
2316 2322 are not ancestors of common.
2317 2323
2318 2324 More specifically, return a list of revision numbers corresponding to
2319 2325 nodes N such that every N satisfies the following constraints:
2320 2326
2321 2327 1. N is an ancestor of some node in 'heads'
2322 2328 2. N is not an ancestor of any node in 'common'
2323 2329
2324 2330 The list is sorted by revision number, meaning it is
2325 2331 topologically sorted.
2326 2332
2327 2333 'heads' and 'common' are both lists of revision numbers. If heads is
2328 2334 not supplied, uses all of the revlog's heads. If common is not
2329 2335 supplied, uses nullid."""
2330 2336 if common is None:
2331 2337 common = [nullrev]
2332 2338 if heads is None:
2333 2339 heads = self.headrevs()
2334 2340
2335 2341 inc = self.incrementalmissingrevs(common=common)
2336 2342 return inc.missingancestors(heads)
2337 2343
2338 2344 def findmissing(self, common=None, heads=None):
2339 2345 """Return the ancestors of heads that are not ancestors of common.
2340 2346
2341 2347 More specifically, return a list of nodes N such that every N
2342 2348 satisfies the following constraints:
2343 2349
2344 2350 1. N is an ancestor of some node in 'heads'
2345 2351 2. N is not an ancestor of any node in 'common'
2346 2352
2347 2353 The list is sorted by revision number, meaning it is
2348 2354 topologically sorted.
2349 2355
2350 2356 'heads' and 'common' are both lists of node IDs. If heads is
2351 2357 not supplied, uses all of the revlog's heads. If common is not
2352 2358 supplied, uses nullid."""
2353 2359 if common is None:
2354 2360 common = [self.nullid]
2355 2361 if heads is None:
2356 2362 heads = self.heads()
2357 2363
2358 2364 common = [self.rev(n) for n in common]
2359 2365 heads = [self.rev(n) for n in heads]
2360 2366
2361 2367 inc = self.incrementalmissingrevs(common=common)
2362 2368 return [self.node(r) for r in inc.missingancestors(heads)]
2363 2369
2364 2370 def nodesbetween(self, roots=None, heads=None):
2365 2371 """Return a topological path from 'roots' to 'heads'.
2366 2372
2367 2373 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2368 2374 topologically sorted list of all nodes N that satisfy both of
2369 2375 these constraints:
2370 2376
2371 2377 1. N is a descendant of some node in 'roots'
2372 2378 2. N is an ancestor of some node in 'heads'
2373 2379
2374 2380 Every node is considered to be both a descendant and an ancestor
2375 2381 of itself, so every reachable node in 'roots' and 'heads' will be
2376 2382 included in 'nodes'.
2377 2383
2378 2384 'outroots' is the list of reachable nodes in 'roots', i.e., the
2379 2385 subset of 'roots' that is returned in 'nodes'. Likewise,
2380 2386 'outheads' is the subset of 'heads' that is also in 'nodes'.
2381 2387
2382 2388 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2383 2389 unspecified, uses nullid as the only root. If 'heads' is
2384 2390 unspecified, uses list of all of the revlog's heads."""
2385 2391 nonodes = ([], [], [])
2386 2392 if roots is not None:
2387 2393 roots = list(roots)
2388 2394 if not roots:
2389 2395 return nonodes
2390 2396 lowestrev = min([self.rev(n) for n in roots])
2391 2397 else:
2392 2398 roots = [self.nullid] # Everybody's a descendant of nullid
2393 2399 lowestrev = nullrev
2394 2400 if (lowestrev == nullrev) and (heads is None):
2395 2401 # We want _all_ the nodes!
2396 2402 return (
2397 2403 [self.node(r) for r in self],
2398 2404 [self.nullid],
2399 2405 list(self.heads()),
2400 2406 )
2401 2407 if heads is None:
2402 2408 # All nodes are ancestors, so the latest ancestor is the last
2403 2409 # node.
2404 2410 highestrev = len(self) - 1
2405 2411 # Set ancestors to None to signal that every node is an ancestor.
2406 2412 ancestors = None
2407 2413 # Set heads to an empty dictionary for later discovery of heads
2408 2414 heads = {}
2409 2415 else:
2410 2416 heads = list(heads)
2411 2417 if not heads:
2412 2418 return nonodes
2413 2419 ancestors = set()
2414 2420 # Turn heads into a dictionary so we can remove 'fake' heads.
2415 2421 # Also, later we will be using it to filter out the heads we can't
2416 2422 # find from roots.
2417 2423 heads = dict.fromkeys(heads, False)
2418 2424 # Start at the top and keep marking parents until we're done.
2419 2425 nodestotag = set(heads)
2420 2426 # Remember where the top was so we can use it as a limit later.
2421 2427 highestrev = max([self.rev(n) for n in nodestotag])
2422 2428 while nodestotag:
2423 2429 # grab a node to tag
2424 2430 n = nodestotag.pop()
2425 2431 # Never tag nullid
2426 2432 if n == self.nullid:
2427 2433 continue
2428 2434 # A node's revision number represents its place in a
2429 2435 # topologically sorted list of nodes.
2430 2436 r = self.rev(n)
2431 2437 if r >= lowestrev:
2432 2438 if n not in ancestors:
2433 2439 # If we are possibly a descendant of one of the roots
2434 2440 # and we haven't already been marked as an ancestor
2435 2441 ancestors.add(n) # Mark as ancestor
2436 2442 # Add non-nullid parents to list of nodes to tag.
2437 2443 nodestotag.update(
2438 2444 [p for p in self.parents(n) if p != self.nullid]
2439 2445 )
2440 2446 elif n in heads: # We've seen it before, is it a fake head?
2441 2447 # So it is, real heads should not be the ancestors of
2442 2448 # any other heads.
2443 2449 heads.pop(n)
2444 2450 if not ancestors:
2445 2451 return nonodes
2446 2452 # Now that we have our set of ancestors, we want to remove any
2447 2453 # roots that are not ancestors.
2448 2454
2449 2455 # If one of the roots was nullid, everything is included anyway.
2450 2456 if lowestrev > nullrev:
2451 2457 # But, since we weren't, let's recompute the lowest rev to not
2452 2458 # include roots that aren't ancestors.
2453 2459
2454 2460 # Filter out roots that aren't ancestors of heads
2455 2461 roots = [root for root in roots if root in ancestors]
2456 2462 # Recompute the lowest revision
2457 2463 if roots:
2458 2464 lowestrev = min([self.rev(root) for root in roots])
2459 2465 else:
2460 2466 # No more roots? Return empty list
2461 2467 return nonodes
2462 2468 else:
2463 2469 # We are descending from nullid, and don't need to care about
2464 2470 # any other roots.
2465 2471 lowestrev = nullrev
2466 2472 roots = [self.nullid]
2467 2473 # Transform our roots list into a set.
2468 2474 descendants = set(roots)
2469 2475 # Also, keep the original roots so we can filter out roots that aren't
2470 2476 # 'real' roots (i.e. are descended from other roots).
2471 2477 roots = descendants.copy()
2472 2478 # Our topologically sorted list of output nodes.
2473 2479 orderedout = []
2474 2480 # Don't start at nullid since we don't want nullid in our output list,
2475 2481 # and if nullid shows up in descendants, empty parents will look like
2476 2482 # they're descendants.
2477 2483 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2478 2484 n = self.node(r)
2479 2485 isdescendant = False
2480 2486 if lowestrev == nullrev: # Everybody is a descendant of nullid
2481 2487 isdescendant = True
2482 2488 elif n in descendants:
2483 2489 # n is already a descendant
2484 2490 isdescendant = True
2485 2491 # This check only needs to be done here because all the roots
2486 2492 # will start being marked is descendants before the loop.
2487 2493 if n in roots:
2488 2494 # If n was a root, check if it's a 'real' root.
2489 2495 p = tuple(self.parents(n))
2490 2496 # If any of its parents are descendants, it's not a root.
2491 2497 if (p[0] in descendants) or (p[1] in descendants):
2492 2498 roots.remove(n)
2493 2499 else:
2494 2500 p = tuple(self.parents(n))
2495 2501 # A node is a descendant if either of its parents are
2496 2502 # descendants. (We seeded the dependents list with the roots
2497 2503 # up there, remember?)
2498 2504 if (p[0] in descendants) or (p[1] in descendants):
2499 2505 descendants.add(n)
2500 2506 isdescendant = True
2501 2507 if isdescendant and ((ancestors is None) or (n in ancestors)):
2502 2508 # Only include nodes that are both descendants and ancestors.
2503 2509 orderedout.append(n)
2504 2510 if (ancestors is not None) and (n in heads):
2505 2511 # We're trying to figure out which heads are reachable
2506 2512 # from roots.
2507 2513 # Mark this head as having been reached
2508 2514 heads[n] = True
2509 2515 elif ancestors is None:
2510 2516 # Otherwise, we're trying to discover the heads.
2511 2517 # Assume this is a head because if it isn't, the next step
2512 2518 # will eventually remove it.
2513 2519 heads[n] = True
2514 2520 # But, obviously its parents aren't.
2515 2521 for p in self.parents(n):
2516 2522 heads.pop(p, None)
2517 2523 heads = [head for head, flag in heads.items() if flag]
2518 2524 roots = list(roots)
2519 2525 assert orderedout
2520 2526 assert roots
2521 2527 assert heads
2522 2528 return (orderedout, roots, heads)
2523 2529
2524 2530 def headrevs(self, revs=None):
2525 2531 if revs is None:
2526 2532 try:
2527 2533 return self.index.headrevs()
2528 2534 except AttributeError:
2529 2535 return self._headrevs()
2530 2536 if rustdagop is not None and self.index.rust_ext_compat:
2531 2537 return rustdagop.headrevs(self.index, revs)
2532 2538 return dagop.headrevs(revs, self._uncheckedparentrevs)
2533 2539
2534 2540 def computephases(self, roots):
2535 2541 return self.index.computephasesmapsets(roots)
2536 2542
2537 2543 def _headrevs(self):
2538 2544 count = len(self)
2539 2545 if not count:
2540 2546 return [nullrev]
2541 2547 # we won't iter over filtered rev so nobody is a head at start
2542 2548 ishead = [0] * (count + 1)
2543 2549 index = self.index
2544 2550 for r in self:
2545 2551 ishead[r] = 1 # I may be an head
2546 2552 e = index[r]
2547 2553 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2548 2554 return [r for r, val in enumerate(ishead) if val]
2549 2555
2550 2556 def heads(self, start=None, stop=None):
2551 2557 """return the list of all nodes that have no children
2552 2558
2553 2559 if start is specified, only heads that are descendants of
2554 2560 start will be returned
2555 2561 if stop is specified, it will consider all the revs from stop
2556 2562 as if they had no children
2557 2563 """
2558 2564 if start is None and stop is None:
2559 2565 if not len(self):
2560 2566 return [self.nullid]
2561 2567 return [self.node(r) for r in self.headrevs()]
2562 2568
2563 2569 if start is None:
2564 2570 start = nullrev
2565 2571 else:
2566 2572 start = self.rev(start)
2567 2573
2568 2574 stoprevs = {self.rev(n) for n in stop or []}
2569 2575
2570 2576 revs = dagop.headrevssubset(
2571 2577 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2572 2578 )
2573 2579
2574 2580 return [self.node(rev) for rev in revs]
2575 2581
2576 2582 def children(self, node):
2577 2583 """find the children of a given node"""
2578 2584 c = []
2579 2585 p = self.rev(node)
2580 2586 for r in self.revs(start=p + 1):
2581 2587 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2582 2588 if prevs:
2583 2589 for pr in prevs:
2584 2590 if pr == p:
2585 2591 c.append(self.node(r))
2586 2592 elif p == nullrev:
2587 2593 c.append(self.node(r))
2588 2594 return c
2589 2595
2590 2596 def commonancestorsheads(self, a, b):
2591 2597 """calculate all the heads of the common ancestors of nodes a and b"""
2592 2598 a, b = self.rev(a), self.rev(b)
2593 2599 ancs = self._commonancestorsheads(a, b)
2594 2600 return pycompat.maplist(self.node, ancs)
2595 2601
2596 2602 def _commonancestorsheads(self, *revs):
2597 2603 """calculate all the heads of the common ancestors of revs"""
2598 2604 try:
2599 2605 ancs = self.index.commonancestorsheads(*revs)
2600 2606 except (AttributeError, OverflowError): # C implementation failed
2601 2607 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2602 2608 return ancs
2603 2609
2604 2610 def isancestor(self, a, b):
2605 2611 """return True if node a is an ancestor of node b
2606 2612
2607 2613 A revision is considered an ancestor of itself."""
2608 2614 a, b = self.rev(a), self.rev(b)
2609 2615 return self.isancestorrev(a, b)
2610 2616
2611 2617 def isancestorrev(self, a, b):
2612 2618 """return True if revision a is an ancestor of revision b
2613 2619
2614 2620 A revision is considered an ancestor of itself.
2615 2621
2616 2622 The implementation of this is trivial but the use of
2617 2623 reachableroots is not."""
2618 2624 if a == nullrev:
2619 2625 return True
2620 2626 elif a == b:
2621 2627 return True
2622 2628 elif a > b:
2623 2629 return False
2624 2630 return bool(self.reachableroots(a, [b], [a], includepath=False))
2625 2631
2626 2632 def reachableroots(self, minroot, heads, roots, includepath=False):
2627 2633 """return (heads(::(<roots> and <roots>::<heads>)))
2628 2634
2629 2635 If includepath is True, return (<roots>::<heads>)."""
2630 2636 try:
2631 2637 return self.index.reachableroots2(
2632 2638 minroot, heads, roots, includepath
2633 2639 )
2634 2640 except AttributeError:
2635 2641 return dagop._reachablerootspure(
2636 2642 self.parentrevs, minroot, roots, heads, includepath
2637 2643 )
2638 2644
2639 2645 def ancestor(self, a, b):
2640 2646 """calculate the "best" common ancestor of nodes a and b"""
2641 2647
2642 2648 a, b = self.rev(a), self.rev(b)
2643 2649 try:
2644 2650 ancs = self.index.ancestors(a, b)
2645 2651 except (AttributeError, OverflowError):
2646 2652 ancs = ancestor.ancestors(self.parentrevs, a, b)
2647 2653 if ancs:
2648 2654 # choose a consistent winner when there's a tie
2649 2655 return min(map(self.node, ancs))
2650 2656 return self.nullid
2651 2657
2652 2658 def _match(self, id):
2653 2659 if isinstance(id, int):
2654 2660 # rev
2655 2661 return self.node(id)
2656 2662 if len(id) == self.nodeconstants.nodelen:
2657 2663 # possibly a binary node
2658 2664 # odds of a binary node being all hex in ASCII are 1 in 10**25
2659 2665 try:
2660 2666 node = id
2661 2667 self.rev(node) # quick search the index
2662 2668 return node
2663 2669 except error.LookupError:
2664 2670 pass # may be partial hex id
2665 2671 try:
2666 2672 # str(rev)
2667 2673 rev = int(id)
2668 2674 if b"%d" % rev != id:
2669 2675 raise ValueError
2670 2676 if rev < 0:
2671 2677 rev = len(self) + rev
2672 2678 if rev < 0 or rev >= len(self):
2673 2679 raise ValueError
2674 2680 return self.node(rev)
2675 2681 except (ValueError, OverflowError):
2676 2682 pass
2677 2683 if len(id) == 2 * self.nodeconstants.nodelen:
2678 2684 try:
2679 2685 # a full hex nodeid?
2680 2686 node = bin(id)
2681 2687 self.rev(node)
2682 2688 return node
2683 2689 except (binascii.Error, error.LookupError):
2684 2690 pass
2685 2691
2686 2692 def _partialmatch(self, id):
2687 2693 # we don't care wdirfilenodeids as they should be always full hash
2688 2694 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2689 2695 ambiguous = False
2690 2696 try:
2691 2697 partial = self.index.partialmatch(id)
2692 2698 if partial and self.hasnode(partial):
2693 2699 if maybewdir:
2694 2700 # single 'ff...' match in radix tree, ambiguous with wdir
2695 2701 ambiguous = True
2696 2702 else:
2697 2703 return partial
2698 2704 elif maybewdir:
2699 2705 # no 'ff...' match in radix tree, wdir identified
2700 2706 raise error.WdirUnsupported
2701 2707 else:
2702 2708 return None
2703 2709 except error.RevlogError:
2704 2710 # parsers.c radix tree lookup gave multiple matches
2705 2711 # fast path: for unfiltered changelog, radix tree is accurate
2706 2712 if not getattr(self, 'filteredrevs', None):
2707 2713 ambiguous = True
2708 2714 # fall through to slow path that filters hidden revisions
2709 2715 except (AttributeError, ValueError):
2710 2716 # we are pure python, or key is not hex
2711 2717 pass
2712 2718 if ambiguous:
2713 2719 raise error.AmbiguousPrefixLookupError(
2714 2720 id, self.display_id, _(b'ambiguous identifier')
2715 2721 )
2716 2722
2717 2723 if id in self._pcache:
2718 2724 return self._pcache[id]
2719 2725
2720 2726 if len(id) <= 40:
2721 2727 # hex(node)[:...]
2722 2728 l = len(id) // 2 * 2 # grab an even number of digits
2723 2729 try:
2724 2730 # we're dropping the last digit, so let's check that it's hex,
2725 2731 # to avoid the expensive computation below if it's not
2726 2732 if len(id) % 2 > 0:
2727 2733 if not (id[-1] in hexdigits):
2728 2734 return None
2729 2735 prefix = bin(id[:l])
2730 2736 except binascii.Error:
2731 2737 pass
2732 2738 else:
2733 2739 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2734 2740 nl = [
2735 2741 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2736 2742 ]
2737 2743 if self.nodeconstants.nullhex.startswith(id):
2738 2744 nl.append(self.nullid)
2739 2745 if len(nl) > 0:
2740 2746 if len(nl) == 1 and not maybewdir:
2741 2747 self._pcache[id] = nl[0]
2742 2748 return nl[0]
2743 2749 raise error.AmbiguousPrefixLookupError(
2744 2750 id, self.display_id, _(b'ambiguous identifier')
2745 2751 )
2746 2752 if maybewdir:
2747 2753 raise error.WdirUnsupported
2748 2754 return None
2749 2755
2750 2756 def lookup(self, id):
2751 2757 """locate a node based on:
2752 2758 - revision number or str(revision number)
2753 2759 - nodeid or subset of hex nodeid
2754 2760 """
2755 2761 n = self._match(id)
2756 2762 if n is not None:
2757 2763 return n
2758 2764 n = self._partialmatch(id)
2759 2765 if n:
2760 2766 return n
2761 2767
2762 2768 raise error.LookupError(id, self.display_id, _(b'no match found'))
2763 2769
2764 2770 def shortest(self, node, minlength=1):
2765 2771 """Find the shortest unambiguous prefix that matches node."""
2766 2772
2767 2773 def isvalid(prefix):
2768 2774 try:
2769 2775 matchednode = self._partialmatch(prefix)
2770 2776 except error.AmbiguousPrefixLookupError:
2771 2777 return False
2772 2778 except error.WdirUnsupported:
2773 2779 # single 'ff...' match
2774 2780 return True
2775 2781 if matchednode is None:
2776 2782 raise error.LookupError(node, self.display_id, _(b'no node'))
2777 2783 return True
2778 2784
2779 2785 def maybewdir(prefix):
2780 2786 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2781 2787
2782 2788 hexnode = hex(node)
2783 2789
2784 2790 def disambiguate(hexnode, minlength):
2785 2791 """Disambiguate against wdirid."""
2786 2792 for length in range(minlength, len(hexnode) + 1):
2787 2793 prefix = hexnode[:length]
2788 2794 if not maybewdir(prefix):
2789 2795 return prefix
2790 2796
2791 2797 if not getattr(self, 'filteredrevs', None):
2792 2798 try:
2793 2799 length = max(self.index.shortest(node), minlength)
2794 2800 return disambiguate(hexnode, length)
2795 2801 except error.RevlogError:
2796 2802 if node != self.nodeconstants.wdirid:
2797 2803 raise error.LookupError(
2798 2804 node, self.display_id, _(b'no node')
2799 2805 )
2800 2806 except AttributeError:
2801 2807 # Fall through to pure code
2802 2808 pass
2803 2809
2804 2810 if node == self.nodeconstants.wdirid:
2805 2811 for length in range(minlength, len(hexnode) + 1):
2806 2812 prefix = hexnode[:length]
2807 2813 if isvalid(prefix):
2808 2814 return prefix
2809 2815
2810 2816 for length in range(minlength, len(hexnode) + 1):
2811 2817 prefix = hexnode[:length]
2812 2818 if isvalid(prefix):
2813 2819 return disambiguate(hexnode, length)
2814 2820
2815 2821 def cmp(self, node, text):
2816 2822 """compare text with a given file revision
2817 2823
2818 2824 returns True if text is different than what is stored.
2819 2825 """
2820 2826 p1, p2 = self.parents(node)
2821 2827 return storageutil.hashrevisionsha1(text, p1, p2) != node
2822 2828
2823 2829 def deltaparent(self, rev):
2824 2830 """return deltaparent of the given revision"""
2825 2831 base = self.index[rev][3]
2826 2832 if base == rev:
2827 2833 return nullrev
2828 2834 elif self.delta_config.general_delta:
2829 2835 return base
2830 2836 else:
2831 2837 return rev - 1
2832 2838
2833 2839 def issnapshot(self, rev):
2834 2840 """tells whether rev is a snapshot"""
2835 2841 ret = self._inner.issnapshot(rev)
2836 2842 self.issnapshot = self._inner.issnapshot
2837 2843 return ret
2838 2844
2839 2845 def snapshotdepth(self, rev):
2840 2846 """number of snapshot in the chain before this one"""
2841 2847 if not self.issnapshot(rev):
2842 2848 raise error.ProgrammingError(b'revision %d not a snapshot')
2843 2849 return len(self._inner._deltachain(rev)[0]) - 1
2844 2850
2845 2851 def revdiff(self, rev1, rev2):
2846 2852 """return or calculate a delta between two revisions
2847 2853
2848 2854 The delta calculated is in binary form and is intended to be written to
2849 2855 revlog data directly. So this function needs raw revision data.
2850 2856 """
2851 2857 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2852 2858 return bytes(self._inner._chunk(rev2))
2853 2859
2854 2860 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2855 2861
2856 2862 def revision(self, nodeorrev):
2857 2863 """return an uncompressed revision of a given node or revision
2858 2864 number.
2859 2865 """
2860 2866 return self._revisiondata(nodeorrev)
2861 2867
2862 2868 def sidedata(self, nodeorrev):
2863 2869 """a map of extra data related to the changeset but not part of the hash
2864 2870
2865 2871 This function currently return a dictionary. However, more advanced
2866 2872 mapping object will likely be used in the future for a more
2867 2873 efficient/lazy code.
2868 2874 """
2869 2875 # deal with <nodeorrev> argument type
2870 2876 if isinstance(nodeorrev, int):
2871 2877 rev = nodeorrev
2872 2878 else:
2873 2879 rev = self.rev(nodeorrev)
2874 2880 return self._sidedata(rev)
2875 2881
2876 2882 def _rawtext(self, node, rev):
2877 2883 """return the possibly unvalidated rawtext for a revision
2878 2884
2879 2885 returns (rev, rawtext, validated)
2880 2886 """
2881 2887 # Check if we have the entry in cache
2882 2888 # The cache entry looks like (node, rev, rawtext)
2883 2889 if self._inner._revisioncache:
2884 2890 if self._inner._revisioncache[0] == node:
2885 2891 return (rev, self._inner._revisioncache[2], True)
2886 2892
2887 2893 if rev is None:
2888 2894 rev = self.rev(node)
2889 2895
2890 2896 return self._inner.raw_text(node, rev)
2891 2897
2892 2898 def _revisiondata(self, nodeorrev, raw=False):
2893 2899 # deal with <nodeorrev> argument type
2894 2900 if isinstance(nodeorrev, int):
2895 2901 rev = nodeorrev
2896 2902 node = self.node(rev)
2897 2903 else:
2898 2904 node = nodeorrev
2899 2905 rev = None
2900 2906
2901 2907 # fast path the special `nullid` rev
2902 2908 if node == self.nullid:
2903 2909 return b""
2904 2910
2905 2911 # ``rawtext`` is the text as stored inside the revlog. Might be the
2906 2912 # revision or might need to be processed to retrieve the revision.
2907 2913 rev, rawtext, validated = self._rawtext(node, rev)
2908 2914
2909 2915 if raw and validated:
2910 2916 # if we don't want to process the raw text and that raw
2911 2917 # text is cached, we can exit early.
2912 2918 return rawtext
2913 2919 if rev is None:
2914 2920 rev = self.rev(node)
2915 2921 # the revlog's flag for this revision
2916 2922 # (usually alter its state or content)
2917 2923 flags = self.flags(rev)
2918 2924
2919 2925 if validated and flags == REVIDX_DEFAULT_FLAGS:
2920 2926 # no extra flags set, no flag processor runs, text = rawtext
2921 2927 return rawtext
2922 2928
2923 2929 if raw:
2924 2930 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2925 2931 text = rawtext
2926 2932 else:
2927 2933 r = flagutil.processflagsread(self, rawtext, flags)
2928 2934 text, validatehash = r
2929 2935 if validatehash:
2930 2936 self.checkhash(text, node, rev=rev)
2931 2937 if not validated:
2932 2938 self._inner._revisioncache = (node, rev, rawtext)
2933 2939
2934 2940 return text
2935 2941
2936 2942 def _sidedata(self, rev):
2937 2943 """Return the sidedata for a given revision number."""
2938 2944 sidedata_end = None
2939 2945 if self._docket is not None:
2940 2946 sidedata_end = self._docket.sidedata_end
2941 2947 return self._inner.sidedata(rev, sidedata_end)
2942 2948
2943 2949 def rawdata(self, nodeorrev):
2944 2950 """return an uncompressed raw data of a given node or revision number."""
2945 2951 return self._revisiondata(nodeorrev, raw=True)
2946 2952
2947 2953 def hash(self, text, p1, p2):
2948 2954 """Compute a node hash.
2949 2955
2950 2956 Available as a function so that subclasses can replace the hash
2951 2957 as needed.
2952 2958 """
2953 2959 return storageutil.hashrevisionsha1(text, p1, p2)
2954 2960
2955 2961 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2956 2962 """Check node hash integrity.
2957 2963
2958 2964 Available as a function so that subclasses can extend hash mismatch
2959 2965 behaviors as needed.
2960 2966 """
2961 2967 try:
2962 2968 if p1 is None and p2 is None:
2963 2969 p1, p2 = self.parents(node)
2964 2970 if node != self.hash(text, p1, p2):
2965 2971 # Clear the revision cache on hash failure. The revision cache
2966 2972 # only stores the raw revision and clearing the cache does have
2967 2973 # the side-effect that we won't have a cache hit when the raw
2968 2974 # revision data is accessed. But this case should be rare and
2969 2975 # it is extra work to teach the cache about the hash
2970 2976 # verification state.
2971 2977 if (
2972 2978 self._inner._revisioncache
2973 2979 and self._inner._revisioncache[0] == node
2974 2980 ):
2975 2981 self._inner._revisioncache = None
2976 2982
2977 2983 revornode = rev
2978 2984 if revornode is None:
2979 2985 revornode = templatefilters.short(hex(node))
2980 2986 raise error.RevlogError(
2981 2987 _(b"integrity check failed on %s:%s")
2982 2988 % (self.display_id, pycompat.bytestr(revornode))
2983 2989 )
2984 2990 except error.RevlogError:
2985 2991 if self.feature_config.censorable and storageutil.iscensoredtext(
2986 2992 text
2987 2993 ):
2988 2994 raise error.CensoredNodeError(self.display_id, node, text)
2989 2995 raise
2990 2996
2991 2997 @property
2992 2998 def _split_index_file(self):
2993 2999 """the path where to expect the index of an ongoing splitting operation
2994 3000
2995 3001 The file will only exist if a splitting operation is in progress, but
2996 3002 it is always expected at the same location."""
2997 3003 parts = self.radix.split(b'/')
2998 3004 if len(parts) > 1:
2999 3005 # adds a '-s' prefix to the ``data/` or `meta/` base
3000 3006 head = parts[0] + b'-s'
3001 3007 mids = parts[1:-1]
3002 3008 tail = parts[-1] + b'.i'
3003 3009 pieces = [head] + mids + [tail]
3004 3010 return b'/'.join(pieces)
3005 3011 else:
3006 3012 # the revlog is stored at the root of the store (changelog or
3007 3013 # manifest), no risk of collision.
3008 3014 return self.radix + b'.i.s'
3009 3015
3010 3016 def _enforceinlinesize(self, tr, side_write=True):
3011 3017 """Check if the revlog is too big for inline and convert if so.
3012 3018
3013 3019 This should be called after revisions are added to the revlog. If the
3014 3020 revlog has grown too large to be an inline revlog, it will convert it
3015 3021 to use multiple index and data files.
3016 3022 """
3017 3023 tiprev = len(self) - 1
3018 3024 total_size = self.start(tiprev) + self.length(tiprev)
3019 3025 if not self._inline or total_size < _maxinline:
3020 3026 return
3021 3027
3022 3028 if self._docket is not None:
3023 3029 msg = b"inline revlog should not have a docket"
3024 3030 raise error.ProgrammingError(msg)
3025 3031
3026 3032 troffset = tr.findoffset(self._inner.canonical_index_file)
3027 3033 if troffset is None:
3028 3034 raise error.RevlogError(
3029 3035 _(b"%s not found in the transaction") % self._indexfile
3030 3036 )
3031 3037 if troffset:
3032 3038 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
3033 3039 tr.add(self._datafile, 0)
3034 3040
3035 3041 new_index_file_path = None
3036 3042 if side_write:
3037 3043 old_index_file_path = self._indexfile
3038 3044 new_index_file_path = self._split_index_file
3039 3045 opener = self.opener
3040 3046 weak_self = weakref.ref(self)
3041 3047
3042 3048 # the "split" index replace the real index when the transaction is
3043 3049 # finalized
3044 3050 def finalize_callback(tr):
3045 3051 opener.rename(
3046 3052 new_index_file_path,
3047 3053 old_index_file_path,
3048 3054 checkambig=True,
3049 3055 )
3050 3056 maybe_self = weak_self()
3051 3057 if maybe_self is not None:
3052 3058 maybe_self._indexfile = old_index_file_path
3053 3059 maybe_self._inner.index_file = maybe_self._indexfile
3054 3060
3055 3061 def abort_callback(tr):
3056 3062 maybe_self = weak_self()
3057 3063 if maybe_self is not None:
3058 3064 maybe_self._indexfile = old_index_file_path
3059 3065 maybe_self._inner.inline = True
3060 3066 maybe_self._inner.index_file = old_index_file_path
3061 3067
3062 3068 tr.registertmp(new_index_file_path)
3063 3069 if self.target[1] is not None:
3064 3070 callback_id = b'000-revlog-split-%d-%s' % self.target
3065 3071 else:
3066 3072 callback_id = b'000-revlog-split-%d' % self.target[0]
3067 3073 tr.addfinalize(callback_id, finalize_callback)
3068 3074 tr.addabort(callback_id, abort_callback)
3069 3075
3070 3076 self._format_flags &= ~FLAG_INLINE_DATA
3071 3077 self._inner.split_inline(
3072 3078 tr,
3073 3079 self._format_flags | self._format_version,
3074 3080 new_index_file_path=new_index_file_path,
3075 3081 )
3076 3082
3077 3083 self._inline = False
3078 3084 if new_index_file_path is not None:
3079 3085 self._indexfile = new_index_file_path
3080 3086
3081 3087 nodemaputil.setup_persistent_nodemap(tr, self)
3082 3088
3083 3089 def _nodeduplicatecallback(self, transaction, node):
3084 3090 """called when trying to add a node already stored."""
3085 3091
3086 3092 @contextlib.contextmanager
3087 3093 def reading(self):
3088 3094 with self._inner.reading():
3089 3095 yield
3090 3096
3091 3097 @contextlib.contextmanager
3092 3098 def _writing(self, transaction):
3093 3099 if self._trypending:
3094 3100 msg = b'try to write in a `trypending` revlog: %s'
3095 3101 msg %= self.display_id
3096 3102 raise error.ProgrammingError(msg)
3097 3103 if self._inner.is_writing:
3098 3104 yield
3099 3105 else:
3100 3106 data_end = None
3101 3107 sidedata_end = None
3102 3108 if self._docket is not None:
3103 3109 data_end = self._docket.data_end
3104 3110 sidedata_end = self._docket.sidedata_end
3105 3111 with self._inner.writing(
3106 3112 transaction,
3107 3113 data_end=data_end,
3108 3114 sidedata_end=sidedata_end,
3109 3115 ):
3110 3116 yield
3111 3117 if self._docket is not None:
3112 3118 self._write_docket(transaction)
3113 3119
3114 3120 @property
3115 3121 def is_delaying(self):
3116 3122 return self._inner.is_delaying
3117 3123
3118 3124 def _write_docket(self, transaction):
3119 3125 """write the current docket on disk
3120 3126
3121 3127 Exist as a method to help changelog to implement transaction logic
3122 3128
3123 3129 We could also imagine using the same transaction logic for all revlog
3124 3130 since docket are cheap."""
3125 3131 self._docket.write(transaction)
3126 3132
3127 3133 def addrevision(
3128 3134 self,
3129 3135 text,
3130 3136 transaction,
3131 3137 link,
3132 3138 p1,
3133 3139 p2,
3134 3140 cachedelta=None,
3135 3141 node=None,
3136 3142 flags=REVIDX_DEFAULT_FLAGS,
3137 3143 deltacomputer=None,
3138 3144 sidedata=None,
3139 3145 ):
3140 3146 """add a revision to the log
3141 3147
3142 3148 text - the revision data to add
3143 3149 transaction - the transaction object used for rollback
3144 3150 link - the linkrev data to add
3145 3151 p1, p2 - the parent nodeids of the revision
3146 3152 cachedelta - an optional precomputed delta
3147 3153 node - nodeid of revision; typically node is not specified, and it is
3148 3154 computed by default as hash(text, p1, p2), however subclasses might
3149 3155 use different hashing method (and override checkhash() in such case)
3150 3156 flags - the known flags to set on the revision
3151 3157 deltacomputer - an optional deltacomputer instance shared between
3152 3158 multiple calls
3153 3159 """
3154 3160 if link == nullrev:
3155 3161 raise error.RevlogError(
3156 3162 _(b"attempted to add linkrev -1 to %s") % self.display_id
3157 3163 )
3158 3164
3159 3165 if sidedata is None:
3160 3166 sidedata = {}
3161 3167 elif sidedata and not self.feature_config.has_side_data:
3162 3168 raise error.ProgrammingError(
3163 3169 _(b"trying to add sidedata to a revlog who don't support them")
3164 3170 )
3165 3171
3166 3172 if flags:
3167 3173 node = node or self.hash(text, p1, p2)
3168 3174
3169 3175 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3170 3176
3171 3177 # If the flag processor modifies the revision data, ignore any provided
3172 3178 # cachedelta.
3173 3179 if rawtext != text:
3174 3180 cachedelta = None
3175 3181
3176 3182 if len(rawtext) > _maxentrysize:
3177 3183 raise error.RevlogError(
3178 3184 _(
3179 3185 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3180 3186 )
3181 3187 % (self.display_id, len(rawtext))
3182 3188 )
3183 3189
3184 3190 node = node or self.hash(rawtext, p1, p2)
3185 3191 rev = self.index.get_rev(node)
3186 3192 if rev is not None:
3187 3193 return rev
3188 3194
3189 3195 if validatehash:
3190 3196 self.checkhash(rawtext, node, p1=p1, p2=p2)
3191 3197
3192 3198 return self.addrawrevision(
3193 3199 rawtext,
3194 3200 transaction,
3195 3201 link,
3196 3202 p1,
3197 3203 p2,
3198 3204 node,
3199 3205 flags,
3200 3206 cachedelta=cachedelta,
3201 3207 deltacomputer=deltacomputer,
3202 3208 sidedata=sidedata,
3203 3209 )
3204 3210
3205 3211 def addrawrevision(
3206 3212 self,
3207 3213 rawtext,
3208 3214 transaction,
3209 3215 link,
3210 3216 p1,
3211 3217 p2,
3212 3218 node,
3213 3219 flags,
3214 3220 cachedelta=None,
3215 3221 deltacomputer=None,
3216 3222 sidedata=None,
3217 3223 ):
3218 3224 """add a raw revision with known flags, node and parents
3219 3225 useful when reusing a revision not stored in this revlog (ex: received
3220 3226 over wire, or read from an external bundle).
3221 3227 """
3222 3228 with self._writing(transaction):
3223 3229 return self._addrevision(
3224 3230 node,
3225 3231 rawtext,
3226 3232 transaction,
3227 3233 link,
3228 3234 p1,
3229 3235 p2,
3230 3236 flags,
3231 3237 cachedelta,
3232 3238 deltacomputer=deltacomputer,
3233 3239 sidedata=sidedata,
3234 3240 )
3235 3241
3236 3242 def compress(self, data):
3237 3243 return self._inner.compress(data)
3238 3244
3239 3245 def decompress(self, data):
3240 3246 return self._inner.decompress(data)
3241 3247
3242 3248 def _addrevision(
3243 3249 self,
3244 3250 node,
3245 3251 rawtext,
3246 3252 transaction,
3247 3253 link,
3248 3254 p1,
3249 3255 p2,
3250 3256 flags,
3251 3257 cachedelta,
3252 3258 alwayscache=False,
3253 3259 deltacomputer=None,
3254 3260 sidedata=None,
3255 3261 ):
3256 3262 """internal function to add revisions to the log
3257 3263
3258 3264 see addrevision for argument descriptions.
3259 3265
3260 3266 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3261 3267
3262 3268 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3263 3269 be used.
3264 3270
3265 3271 invariants:
3266 3272 - rawtext is optional (can be None); if not set, cachedelta must be set.
3267 3273 if both are set, they must correspond to each other.
3268 3274 """
3269 3275 if node == self.nullid:
3270 3276 raise error.RevlogError(
3271 3277 _(b"%s: attempt to add null revision") % self.display_id
3272 3278 )
3273 3279 if (
3274 3280 node == self.nodeconstants.wdirid
3275 3281 or node in self.nodeconstants.wdirfilenodeids
3276 3282 ):
3277 3283 raise error.RevlogError(
3278 3284 _(b"%s: attempt to add wdir revision") % self.display_id
3279 3285 )
3280 3286 if self._inner._writinghandles is None:
3281 3287 msg = b'adding revision outside `revlog._writing` context'
3282 3288 raise error.ProgrammingError(msg)
3283 3289
3284 3290 btext = [rawtext]
3285 3291
3286 3292 curr = len(self)
3287 3293 prev = curr - 1
3288 3294
3289 3295 offset = self._get_data_offset(prev)
3290 3296
3291 3297 if self._concurrencychecker:
3292 3298 ifh, dfh, sdfh = self._inner._writinghandles
3293 3299 # XXX no checking for the sidedata file
3294 3300 if self._inline:
3295 3301 # offset is "as if" it were in the .d file, so we need to add on
3296 3302 # the size of the entry metadata.
3297 3303 self._concurrencychecker(
3298 3304 ifh, self._indexfile, offset + curr * self.index.entry_size
3299 3305 )
3300 3306 else:
3301 3307 # Entries in the .i are a consistent size.
3302 3308 self._concurrencychecker(
3303 3309 ifh, self._indexfile, curr * self.index.entry_size
3304 3310 )
3305 3311 self._concurrencychecker(dfh, self._datafile, offset)
3306 3312
3307 3313 p1r, p2r = self.rev(p1), self.rev(p2)
3308 3314
3309 3315 # full versions are inserted when the needed deltas
3310 3316 # become comparable to the uncompressed text
3311 3317 if rawtext is None:
3312 3318 # need rawtext size, before changed by flag processors, which is
3313 3319 # the non-raw size. use revlog explicitly to avoid filelog's extra
3314 3320 # logic that might remove metadata size.
3315 3321 textlen = mdiff.patchedsize(
3316 3322 revlog.size(self, cachedelta[0]), cachedelta[1]
3317 3323 )
3318 3324 else:
3319 3325 textlen = len(rawtext)
3320 3326
3321 3327 if deltacomputer is None:
3322 3328 write_debug = None
3323 3329 if self.delta_config.debug_delta:
3324 3330 write_debug = transaction._report
3325 3331 deltacomputer = deltautil.deltacomputer(
3326 3332 self, write_debug=write_debug
3327 3333 )
3328 3334
3329 3335 if cachedelta is not None and len(cachedelta) == 2:
3330 3336 # If the cached delta has no information about how it should be
3331 3337 # reused, add the default reuse instruction according to the
3332 3338 # revlog's configuration.
3333 3339 if (
3334 3340 self.delta_config.general_delta
3335 3341 and self.delta_config.lazy_delta_base
3336 3342 ):
3337 3343 delta_base_reuse = DELTA_BASE_REUSE_TRY
3338 3344 else:
3339 3345 delta_base_reuse = DELTA_BASE_REUSE_NO
3340 3346 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3341 3347
3342 3348 revinfo = revlogutils.revisioninfo(
3343 3349 node,
3344 3350 p1,
3345 3351 p2,
3346 3352 btext,
3347 3353 textlen,
3348 3354 cachedelta,
3349 3355 flags,
3350 3356 )
3351 3357
3352 3358 deltainfo = deltacomputer.finddeltainfo(revinfo)
3353 3359
3354 3360 compression_mode = COMP_MODE_INLINE
3355 3361 if self._docket is not None:
3356 3362 default_comp = self._docket.default_compression_header
3357 3363 r = deltautil.delta_compression(default_comp, deltainfo)
3358 3364 compression_mode, deltainfo = r
3359 3365
3360 3366 sidedata_compression_mode = COMP_MODE_INLINE
3361 3367 if sidedata and self.feature_config.has_side_data:
3362 3368 sidedata_compression_mode = COMP_MODE_PLAIN
3363 3369 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3364 3370 sidedata_offset = self._docket.sidedata_end
3365 3371 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3366 3372 if (
3367 3373 h != b'u'
3368 3374 and comp_sidedata[0:1] != b'\0'
3369 3375 and len(comp_sidedata) < len(serialized_sidedata)
3370 3376 ):
3371 3377 assert not h
3372 3378 if (
3373 3379 comp_sidedata[0:1]
3374 3380 == self._docket.default_compression_header
3375 3381 ):
3376 3382 sidedata_compression_mode = COMP_MODE_DEFAULT
3377 3383 serialized_sidedata = comp_sidedata
3378 3384 else:
3379 3385 sidedata_compression_mode = COMP_MODE_INLINE
3380 3386 serialized_sidedata = comp_sidedata
3381 3387 else:
3382 3388 serialized_sidedata = b""
3383 3389 # Don't store the offset if the sidedata is empty, that way
3384 3390 # we can easily detect empty sidedata and they will be no different
3385 3391 # than ones we manually add.
3386 3392 sidedata_offset = 0
3387 3393
3388 3394 rank = RANK_UNKNOWN
3389 3395 if self.feature_config.compute_rank:
3390 3396 if (p1r, p2r) == (nullrev, nullrev):
3391 3397 rank = 1
3392 3398 elif p1r != nullrev and p2r == nullrev:
3393 3399 rank = 1 + self.fast_rank(p1r)
3394 3400 elif p1r == nullrev and p2r != nullrev:
3395 3401 rank = 1 + self.fast_rank(p2r)
3396 3402 else: # merge node
3397 3403 if rustdagop is not None and self.index.rust_ext_compat:
3398 3404 rank = rustdagop.rank(self.index, p1r, p2r)
3399 3405 else:
3400 3406 pmin, pmax = sorted((p1r, p2r))
3401 3407 rank = 1 + self.fast_rank(pmax)
3402 3408 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3403 3409
3404 3410 e = revlogutils.entry(
3405 3411 flags=flags,
3406 3412 data_offset=offset,
3407 3413 data_compressed_length=deltainfo.deltalen,
3408 3414 data_uncompressed_length=textlen,
3409 3415 data_compression_mode=compression_mode,
3410 3416 data_delta_base=deltainfo.base,
3411 3417 link_rev=link,
3412 3418 parent_rev_1=p1r,
3413 3419 parent_rev_2=p2r,
3414 3420 node_id=node,
3415 3421 sidedata_offset=sidedata_offset,
3416 3422 sidedata_compressed_length=len(serialized_sidedata),
3417 3423 sidedata_compression_mode=sidedata_compression_mode,
3418 3424 rank=rank,
3419 3425 )
3420 3426
3421 3427 self.index.append(e)
3422 3428 entry = self.index.entry_binary(curr)
3423 3429 if curr == 0 and self._docket is None:
3424 3430 header = self._format_flags | self._format_version
3425 3431 header = self.index.pack_header(header)
3426 3432 entry = header + entry
3427 3433 self._writeentry(
3428 3434 transaction,
3429 3435 entry,
3430 3436 deltainfo.data,
3431 3437 link,
3432 3438 offset,
3433 3439 serialized_sidedata,
3434 3440 sidedata_offset,
3435 3441 )
3436 3442
3437 3443 rawtext = btext[0]
3438 3444
3439 3445 if alwayscache and rawtext is None:
3440 3446 rawtext = deltacomputer.buildtext(revinfo)
3441 3447
3442 3448 if type(rawtext) == bytes: # only accept immutable objects
3443 3449 self._inner._revisioncache = (node, curr, rawtext)
3444 3450 self._chainbasecache[curr] = deltainfo.chainbase
3445 3451 return curr
3446 3452
3447 3453 def _get_data_offset(self, prev):
3448 3454 """Returns the current offset in the (in-transaction) data file.
3449 3455 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3450 3456 file to store that information: since sidedata can be rewritten to the
3451 3457 end of the data file within a transaction, you can have cases where, for
3452 3458 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3453 3459 to `n - 1`'s sidedata being written after `n`'s data.
3454 3460
3455 3461 TODO cache this in a docket file before getting out of experimental."""
3456 3462 if self._docket is None:
3457 3463 return self.end(prev)
3458 3464 else:
3459 3465 return self._docket.data_end
3460 3466
3461 3467 def _writeentry(
3462 3468 self,
3463 3469 transaction,
3464 3470 entry,
3465 3471 data,
3466 3472 link,
3467 3473 offset,
3468 3474 sidedata,
3469 3475 sidedata_offset,
3470 3476 ):
3471 3477 # Files opened in a+ mode have inconsistent behavior on various
3472 3478 # platforms. Windows requires that a file positioning call be made
3473 3479 # when the file handle transitions between reads and writes. See
3474 3480 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3475 3481 # platforms, Python or the platform itself can be buggy. Some versions
3476 3482 # of Solaris have been observed to not append at the end of the file
3477 3483 # if the file was seeked to before the end. See issue4943 for more.
3478 3484 #
3479 3485 # We work around this issue by inserting a seek() before writing.
3480 3486 # Note: This is likely not necessary on Python 3. However, because
3481 3487 # the file handle is reused for reads and may be seeked there, we need
3482 3488 # to be careful before changing this.
3483 3489 index_end = data_end = sidedata_end = None
3484 3490 if self._docket is not None:
3485 3491 index_end = self._docket.index_end
3486 3492 data_end = self._docket.data_end
3487 3493 sidedata_end = self._docket.sidedata_end
3488 3494
3489 3495 files_end = self._inner.write_entry(
3490 3496 transaction,
3491 3497 entry,
3492 3498 data,
3493 3499 link,
3494 3500 offset,
3495 3501 sidedata,
3496 3502 sidedata_offset,
3497 3503 index_end,
3498 3504 data_end,
3499 3505 sidedata_end,
3500 3506 )
3501 3507 self._enforceinlinesize(transaction)
3502 3508 if self._docket is not None:
3503 3509 self._docket.index_end = files_end[0]
3504 3510 self._docket.data_end = files_end[1]
3505 3511 self._docket.sidedata_end = files_end[2]
3506 3512
3507 3513 nodemaputil.setup_persistent_nodemap(transaction, self)
3508 3514
3509 3515 def addgroup(
3510 3516 self,
3511 3517 deltas,
3512 3518 linkmapper,
3513 3519 transaction,
3514 3520 alwayscache=False,
3515 3521 addrevisioncb=None,
3516 3522 duplicaterevisioncb=None,
3517 3523 debug_info=None,
3518 3524 delta_base_reuse_policy=None,
3519 3525 ):
3520 3526 """
3521 3527 add a delta group
3522 3528
3523 3529 given a set of deltas, add them to the revision log. the
3524 3530 first delta is against its parent, which should be in our
3525 3531 log, the rest are against the previous delta.
3526 3532
3527 3533 If ``addrevisioncb`` is defined, it will be called with arguments of
3528 3534 this revlog and the node that was added.
3529 3535 """
3530 3536
3531 3537 if self._adding_group:
3532 3538 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3533 3539
3534 3540 # read the default delta-base reuse policy from revlog config if the
3535 3541 # group did not specify one.
3536 3542 if delta_base_reuse_policy is None:
3537 3543 if (
3538 3544 self.delta_config.general_delta
3539 3545 and self.delta_config.lazy_delta_base
3540 3546 ):
3541 3547 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3542 3548 else:
3543 3549 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3544 3550
3545 3551 self._adding_group = True
3546 3552 empty = True
3547 3553 try:
3548 3554 with self._writing(transaction):
3549 3555 write_debug = None
3550 3556 if self.delta_config.debug_delta:
3551 3557 write_debug = transaction._report
3552 3558 deltacomputer = deltautil.deltacomputer(
3553 3559 self,
3554 3560 write_debug=write_debug,
3555 3561 debug_info=debug_info,
3556 3562 )
3557 3563 # loop through our set of deltas
3558 3564 for data in deltas:
3559 3565 (
3560 3566 node,
3561 3567 p1,
3562 3568 p2,
3563 3569 linknode,
3564 3570 deltabase,
3565 3571 delta,
3566 3572 flags,
3567 3573 sidedata,
3568 3574 ) = data
3569 3575 link = linkmapper(linknode)
3570 3576 flags = flags or REVIDX_DEFAULT_FLAGS
3571 3577
3572 3578 rev = self.index.get_rev(node)
3573 3579 if rev is not None:
3574 3580 # this can happen if two branches make the same change
3575 3581 self._nodeduplicatecallback(transaction, rev)
3576 3582 if duplicaterevisioncb:
3577 3583 duplicaterevisioncb(self, rev)
3578 3584 empty = False
3579 3585 continue
3580 3586
3581 3587 for p in (p1, p2):
3582 3588 if not self.index.has_node(p):
3583 3589 raise error.LookupError(
3584 3590 p, self.radix, _(b'unknown parent')
3585 3591 )
3586 3592
3587 3593 if not self.index.has_node(deltabase):
3588 3594 raise error.LookupError(
3589 3595 deltabase, self.display_id, _(b'unknown delta base')
3590 3596 )
3591 3597
3592 3598 baserev = self.rev(deltabase)
3593 3599
3594 3600 if baserev != nullrev and self.iscensored(baserev):
3595 3601 # if base is censored, delta must be full replacement in a
3596 3602 # single patch operation
3597 3603 hlen = struct.calcsize(b">lll")
3598 3604 oldlen = self.rawsize(baserev)
3599 3605 newlen = len(delta) - hlen
3600 3606 if delta[:hlen] != mdiff.replacediffheader(
3601 3607 oldlen, newlen
3602 3608 ):
3603 3609 raise error.CensoredBaseError(
3604 3610 self.display_id, self.node(baserev)
3605 3611 )
3606 3612
3607 3613 if not flags and self._peek_iscensored(baserev, delta):
3608 3614 flags |= REVIDX_ISCENSORED
3609 3615
3610 3616 # We assume consumers of addrevisioncb will want to retrieve
3611 3617 # the added revision, which will require a call to
3612 3618 # revision(). revision() will fast path if there is a cache
3613 3619 # hit. So, we tell _addrevision() to always cache in this case.
3614 3620 # We're only using addgroup() in the context of changegroup
3615 3621 # generation so the revision data can always be handled as raw
3616 3622 # by the flagprocessor.
3617 3623 rev = self._addrevision(
3618 3624 node,
3619 3625 None,
3620 3626 transaction,
3621 3627 link,
3622 3628 p1,
3623 3629 p2,
3624 3630 flags,
3625 3631 (baserev, delta, delta_base_reuse_policy),
3626 3632 alwayscache=alwayscache,
3627 3633 deltacomputer=deltacomputer,
3628 3634 sidedata=sidedata,
3629 3635 )
3630 3636
3631 3637 if addrevisioncb:
3632 3638 addrevisioncb(self, rev)
3633 3639 empty = False
3634 3640 finally:
3635 3641 self._adding_group = False
3636 3642 return not empty
3637 3643
3638 3644 def iscensored(self, rev):
3639 3645 """Check if a file revision is censored."""
3640 3646 if not self.feature_config.censorable:
3641 3647 return False
3642 3648
3643 3649 return self.flags(rev) & REVIDX_ISCENSORED
3644 3650
3645 3651 def _peek_iscensored(self, baserev, delta):
3646 3652 """Quickly check if a delta produces a censored revision."""
3647 3653 if not self.feature_config.censorable:
3648 3654 return False
3649 3655
3650 3656 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3651 3657
3652 3658 def getstrippoint(self, minlink):
3653 3659 """find the minimum rev that must be stripped to strip the linkrev
3654 3660
3655 3661 Returns a tuple containing the minimum rev and a set of all revs that
3656 3662 have linkrevs that will be broken by this strip.
3657 3663 """
3658 3664 return storageutil.resolvestripinfo(
3659 3665 minlink,
3660 3666 len(self) - 1,
3661 3667 self.headrevs(),
3662 3668 self.linkrev,
3663 3669 self.parentrevs,
3664 3670 )
3665 3671
3666 3672 def strip(self, minlink, transaction):
3667 3673 """truncate the revlog on the first revision with a linkrev >= minlink
3668 3674
3669 3675 This function is called when we're stripping revision minlink and
3670 3676 its descendants from the repository.
3671 3677
3672 3678 We have to remove all revisions with linkrev >= minlink, because
3673 3679 the equivalent changelog revisions will be renumbered after the
3674 3680 strip.
3675 3681
3676 3682 So we truncate the revlog on the first of these revisions, and
3677 3683 trust that the caller has saved the revisions that shouldn't be
3678 3684 removed and that it'll re-add them after this truncation.
3679 3685 """
3680 3686 if len(self) == 0:
3681 3687 return
3682 3688
3683 3689 rev, _ = self.getstrippoint(minlink)
3684 3690 if rev == len(self):
3685 3691 return
3686 3692
3687 3693 # first truncate the files on disk
3688 3694 data_end = self.start(rev)
3689 3695 if not self._inline:
3690 3696 transaction.add(self._datafile, data_end)
3691 3697 end = rev * self.index.entry_size
3692 3698 else:
3693 3699 end = data_end + (rev * self.index.entry_size)
3694 3700
3695 3701 if self._sidedatafile:
3696 3702 sidedata_end = self.sidedata_cut_off(rev)
3697 3703 transaction.add(self._sidedatafile, sidedata_end)
3698 3704
3699 3705 transaction.add(self._indexfile, end)
3700 3706 if self._docket is not None:
3701 3707 # XXX we could, leverage the docket while stripping. However it is
3702 3708 # not powerfull enough at the time of this comment
3703 3709 self._docket.index_end = end
3704 3710 self._docket.data_end = data_end
3705 3711 self._docket.sidedata_end = sidedata_end
3706 3712 self._docket.write(transaction, stripping=True)
3707 3713
3708 3714 # then reset internal state in memory to forget those revisions
3709 3715 self._chaininfocache = util.lrucachedict(500)
3710 3716 self._inner.clear_cache()
3711 3717
3712 3718 del self.index[rev:-1]
3713 3719
3714 3720 def checksize(self):
3715 3721 """Check size of index and data files
3716 3722
3717 3723 return a (dd, di) tuple.
3718 3724 - dd: extra bytes for the "data" file
3719 3725 - di: extra bytes for the "index" file
3720 3726
3721 3727 A healthy revlog will return (0, 0).
3722 3728 """
3723 3729 expected = 0
3724 3730 if len(self):
3725 3731 expected = max(0, self.end(len(self) - 1))
3726 3732
3727 3733 try:
3728 3734 with self._datafp() as f:
3729 3735 f.seek(0, io.SEEK_END)
3730 3736 actual = f.tell()
3731 3737 dd = actual - expected
3732 3738 except FileNotFoundError:
3733 3739 dd = 0
3734 3740
3735 3741 try:
3736 3742 f = self.opener(self._indexfile)
3737 3743 f.seek(0, io.SEEK_END)
3738 3744 actual = f.tell()
3739 3745 f.close()
3740 3746 s = self.index.entry_size
3741 3747 i = max(0, actual // s)
3742 3748 di = actual - (i * s)
3743 3749 if self._inline:
3744 3750 databytes = 0
3745 3751 for r in self:
3746 3752 databytes += max(0, self.length(r))
3747 3753 dd = 0
3748 3754 di = actual - len(self) * s - databytes
3749 3755 except FileNotFoundError:
3750 3756 di = 0
3751 3757
3752 3758 return (dd, di)
3753 3759
3754 3760 def files(self):
3755 3761 """return list of files that compose this revlog"""
3756 3762 res = [self._indexfile]
3757 3763 if self._docket_file is None:
3758 3764 if not self._inline:
3759 3765 res.append(self._datafile)
3760 3766 else:
3761 3767 res.append(self._docket_file)
3762 3768 res.extend(self._docket.old_index_filepaths(include_empty=False))
3763 3769 if self._docket.data_end:
3764 3770 res.append(self._datafile)
3765 3771 res.extend(self._docket.old_data_filepaths(include_empty=False))
3766 3772 if self._docket.sidedata_end:
3767 3773 res.append(self._sidedatafile)
3768 3774 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3769 3775 return res
3770 3776
3771 3777 def emitrevisions(
3772 3778 self,
3773 3779 nodes,
3774 3780 nodesorder=None,
3775 3781 revisiondata=False,
3776 3782 assumehaveparentrevisions=False,
3777 3783 deltamode=repository.CG_DELTAMODE_STD,
3778 3784 sidedata_helpers=None,
3779 3785 debug_info=None,
3780 3786 ):
3781 3787 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3782 3788 raise error.ProgrammingError(
3783 3789 b'unhandled value for nodesorder: %s' % nodesorder
3784 3790 )
3785 3791
3786 3792 if nodesorder is None and not self.delta_config.general_delta:
3787 3793 nodesorder = b'storage'
3788 3794
3789 3795 if (
3790 3796 not self._storedeltachains
3791 3797 and deltamode != repository.CG_DELTAMODE_PREV
3792 3798 ):
3793 3799 deltamode = repository.CG_DELTAMODE_FULL
3794 3800
3795 3801 return storageutil.emitrevisions(
3796 3802 self,
3797 3803 nodes,
3798 3804 nodesorder,
3799 3805 revlogrevisiondelta,
3800 3806 deltaparentfn=self.deltaparent,
3801 3807 candeltafn=self._candelta,
3802 3808 rawsizefn=self.rawsize,
3803 3809 revdifffn=self.revdiff,
3804 3810 flagsfn=self.flags,
3805 3811 deltamode=deltamode,
3806 3812 revisiondata=revisiondata,
3807 3813 assumehaveparentrevisions=assumehaveparentrevisions,
3808 3814 sidedata_helpers=sidedata_helpers,
3809 3815 debug_info=debug_info,
3810 3816 )
3811 3817
3812 3818 DELTAREUSEALWAYS = b'always'
3813 3819 DELTAREUSESAMEREVS = b'samerevs'
3814 3820 DELTAREUSENEVER = b'never'
3815 3821
3816 3822 DELTAREUSEFULLADD = b'fulladd'
3817 3823
3818 3824 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3819 3825
3820 3826 def clone(
3821 3827 self,
3822 3828 tr,
3823 3829 destrevlog,
3824 3830 addrevisioncb=None,
3825 3831 deltareuse=DELTAREUSESAMEREVS,
3826 3832 forcedeltabothparents=None,
3827 3833 sidedata_helpers=None,
3828 3834 ):
3829 3835 """Copy this revlog to another, possibly with format changes.
3830 3836
3831 3837 The destination revlog will contain the same revisions and nodes.
3832 3838 However, it may not be bit-for-bit identical due to e.g. delta encoding
3833 3839 differences.
3834 3840
3835 3841 The ``deltareuse`` argument control how deltas from the existing revlog
3836 3842 are preserved in the destination revlog. The argument can have the
3837 3843 following values:
3838 3844
3839 3845 DELTAREUSEALWAYS
3840 3846 Deltas will always be reused (if possible), even if the destination
3841 3847 revlog would not select the same revisions for the delta. This is the
3842 3848 fastest mode of operation.
3843 3849 DELTAREUSESAMEREVS
3844 3850 Deltas will be reused if the destination revlog would pick the same
3845 3851 revisions for the delta. This mode strikes a balance between speed
3846 3852 and optimization.
3847 3853 DELTAREUSENEVER
3848 3854 Deltas will never be reused. This is the slowest mode of execution.
3849 3855 This mode can be used to recompute deltas (e.g. if the diff/delta
3850 3856 algorithm changes).
3851 3857 DELTAREUSEFULLADD
3852 3858 Revision will be re-added as if their were new content. This is
3853 3859 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3854 3860 eg: large file detection and handling.
3855 3861
3856 3862 Delta computation can be slow, so the choice of delta reuse policy can
3857 3863 significantly affect run time.
3858 3864
3859 3865 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3860 3866 two extremes. Deltas will be reused if they are appropriate. But if the
3861 3867 delta could choose a better revision, it will do so. This means if you
3862 3868 are converting a non-generaldelta revlog to a generaldelta revlog,
3863 3869 deltas will be recomputed if the delta's parent isn't a parent of the
3864 3870 revision.
3865 3871
3866 3872 In addition to the delta policy, the ``forcedeltabothparents``
3867 3873 argument controls whether to force compute deltas against both parents
3868 3874 for merges. By default, the current default is used.
3869 3875
3870 3876 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3871 3877 `sidedata_helpers`.
3872 3878 """
3873 3879 if deltareuse not in self.DELTAREUSEALL:
3874 3880 raise ValueError(
3875 3881 _(b'value for deltareuse invalid: %s') % deltareuse
3876 3882 )
3877 3883
3878 3884 if len(destrevlog):
3879 3885 raise ValueError(_(b'destination revlog is not empty'))
3880 3886
3881 3887 if getattr(self, 'filteredrevs', None):
3882 3888 raise ValueError(_(b'source revlog has filtered revisions'))
3883 3889 if getattr(destrevlog, 'filteredrevs', None):
3884 3890 raise ValueError(_(b'destination revlog has filtered revisions'))
3885 3891
3886 3892 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3887 3893 # if possible.
3888 3894 old_delta_config = destrevlog.delta_config
3889 3895 destrevlog.delta_config = destrevlog.delta_config.copy()
3890 3896
3891 3897 try:
3892 3898 if deltareuse == self.DELTAREUSEALWAYS:
3893 3899 destrevlog.delta_config.lazy_delta_base = True
3894 3900 destrevlog.delta_config.lazy_delta = True
3895 3901 elif deltareuse == self.DELTAREUSESAMEREVS:
3896 3902 destrevlog.delta_config.lazy_delta_base = False
3897 3903 destrevlog.delta_config.lazy_delta = True
3898 3904 elif deltareuse == self.DELTAREUSENEVER:
3899 3905 destrevlog.delta_config.lazy_delta_base = False
3900 3906 destrevlog.delta_config.lazy_delta = False
3901 3907
3902 3908 delta_both_parents = (
3903 3909 forcedeltabothparents or old_delta_config.delta_both_parents
3904 3910 )
3905 3911 destrevlog.delta_config.delta_both_parents = delta_both_parents
3906 3912
3907 3913 with self.reading(), destrevlog._writing(tr):
3908 3914 self._clone(
3909 3915 tr,
3910 3916 destrevlog,
3911 3917 addrevisioncb,
3912 3918 deltareuse,
3913 3919 forcedeltabothparents,
3914 3920 sidedata_helpers,
3915 3921 )
3916 3922
3917 3923 finally:
3918 3924 destrevlog.delta_config = old_delta_config
3919 3925
3920 3926 def _clone(
3921 3927 self,
3922 3928 tr,
3923 3929 destrevlog,
3924 3930 addrevisioncb,
3925 3931 deltareuse,
3926 3932 forcedeltabothparents,
3927 3933 sidedata_helpers,
3928 3934 ):
3929 3935 """perform the core duty of `revlog.clone` after parameter processing"""
3930 3936 write_debug = None
3931 3937 if self.delta_config.debug_delta:
3932 3938 write_debug = tr._report
3933 3939 deltacomputer = deltautil.deltacomputer(
3934 3940 destrevlog,
3935 3941 write_debug=write_debug,
3936 3942 )
3937 3943 index = self.index
3938 3944 for rev in self:
3939 3945 entry = index[rev]
3940 3946
3941 3947 # Some classes override linkrev to take filtered revs into
3942 3948 # account. Use raw entry from index.
3943 3949 flags = entry[0] & 0xFFFF
3944 3950 linkrev = entry[4]
3945 3951 p1 = index[entry[5]][7]
3946 3952 p2 = index[entry[6]][7]
3947 3953 node = entry[7]
3948 3954
3949 3955 # (Possibly) reuse the delta from the revlog if allowed and
3950 3956 # the revlog chunk is a delta.
3951 3957 cachedelta = None
3952 3958 rawtext = None
3953 3959 if deltareuse == self.DELTAREUSEFULLADD:
3954 3960 text = self._revisiondata(rev)
3955 3961 sidedata = self.sidedata(rev)
3956 3962
3957 3963 if sidedata_helpers is not None:
3958 3964 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3959 3965 self, sidedata_helpers, sidedata, rev
3960 3966 )
3961 3967 flags = flags | new_flags[0] & ~new_flags[1]
3962 3968
3963 3969 destrevlog.addrevision(
3964 3970 text,
3965 3971 tr,
3966 3972 linkrev,
3967 3973 p1,
3968 3974 p2,
3969 3975 cachedelta=cachedelta,
3970 3976 node=node,
3971 3977 flags=flags,
3972 3978 deltacomputer=deltacomputer,
3973 3979 sidedata=sidedata,
3974 3980 )
3975 3981 else:
3976 3982 if destrevlog.delta_config.lazy_delta:
3977 3983 dp = self.deltaparent(rev)
3978 3984 if dp != nullrev:
3979 3985 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3980 3986
3981 3987 sidedata = None
3982 3988 if not cachedelta:
3983 3989 try:
3984 3990 rawtext = self._revisiondata(rev)
3985 3991 except error.CensoredNodeError as censored:
3986 3992 assert flags & REVIDX_ISCENSORED
3987 3993 rawtext = censored.tombstone
3988 3994 sidedata = self.sidedata(rev)
3989 3995 if sidedata is None:
3990 3996 sidedata = self.sidedata(rev)
3991 3997
3992 3998 if sidedata_helpers is not None:
3993 3999 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3994 4000 self, sidedata_helpers, sidedata, rev
3995 4001 )
3996 4002 flags = flags | new_flags[0] & ~new_flags[1]
3997 4003
3998 4004 destrevlog._addrevision(
3999 4005 node,
4000 4006 rawtext,
4001 4007 tr,
4002 4008 linkrev,
4003 4009 p1,
4004 4010 p2,
4005 4011 flags,
4006 4012 cachedelta,
4007 4013 deltacomputer=deltacomputer,
4008 4014 sidedata=sidedata,
4009 4015 )
4010 4016
4011 4017 if addrevisioncb:
4012 4018 addrevisioncb(self, rev, node)
4013 4019
4014 4020 def censorrevision(self, tr, censornode, tombstone=b''):
4015 4021 if self._format_version == REVLOGV0:
4016 4022 raise error.RevlogError(
4017 4023 _(b'cannot censor with version %d revlogs')
4018 4024 % self._format_version
4019 4025 )
4020 4026 elif self._format_version == REVLOGV1:
4021 4027 rewrite.v1_censor(self, tr, censornode, tombstone)
4022 4028 else:
4023 4029 rewrite.v2_censor(self, tr, censornode, tombstone)
4024 4030
4025 4031 def verifyintegrity(self, state):
4026 4032 """Verifies the integrity of the revlog.
4027 4033
4028 4034 Yields ``revlogproblem`` instances describing problems that are
4029 4035 found.
4030 4036 """
4031 4037 dd, di = self.checksize()
4032 4038 if dd:
4033 4039 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
4034 4040 if di:
4035 4041 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
4036 4042
4037 4043 version = self._format_version
4038 4044
4039 4045 # The verifier tells us what version revlog we should be.
4040 4046 if version != state[b'expectedversion']:
4041 4047 yield revlogproblem(
4042 4048 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
4043 4049 % (self.display_id, version, state[b'expectedversion'])
4044 4050 )
4045 4051
4046 4052 state[b'skipread'] = set()
4047 4053 state[b'safe_renamed'] = set()
4048 4054
4049 4055 for rev in self:
4050 4056 node = self.node(rev)
4051 4057
4052 4058 # Verify contents. 4 cases to care about:
4053 4059 #
4054 4060 # common: the most common case
4055 4061 # rename: with a rename
4056 4062 # meta: file content starts with b'\1\n', the metadata
4057 4063 # header defined in filelog.py, but without a rename
4058 4064 # ext: content stored externally
4059 4065 #
4060 4066 # More formally, their differences are shown below:
4061 4067 #
4062 4068 # | common | rename | meta | ext
4063 4069 # -------------------------------------------------------
4064 4070 # flags() | 0 | 0 | 0 | not 0
4065 4071 # renamed() | False | True | False | ?
4066 4072 # rawtext[0:2]=='\1\n'| False | True | True | ?
4067 4073 #
4068 4074 # "rawtext" means the raw text stored in revlog data, which
4069 4075 # could be retrieved by "rawdata(rev)". "text"
4070 4076 # mentioned below is "revision(rev)".
4071 4077 #
4072 4078 # There are 3 different lengths stored physically:
4073 4079 # 1. L1: rawsize, stored in revlog index
4074 4080 # 2. L2: len(rawtext), stored in revlog data
4075 4081 # 3. L3: len(text), stored in revlog data if flags==0, or
4076 4082 # possibly somewhere else if flags!=0
4077 4083 #
4078 4084 # L1 should be equal to L2. L3 could be different from them.
4079 4085 # "text" may or may not affect commit hash depending on flag
4080 4086 # processors (see flagutil.addflagprocessor).
4081 4087 #
4082 4088 # | common | rename | meta | ext
4083 4089 # -------------------------------------------------
4084 4090 # rawsize() | L1 | L1 | L1 | L1
4085 4091 # size() | L1 | L2-LM | L1(*) | L1 (?)
4086 4092 # len(rawtext) | L2 | L2 | L2 | L2
4087 4093 # len(text) | L2 | L2 | L2 | L3
4088 4094 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4089 4095 #
4090 4096 # LM: length of metadata, depending on rawtext
4091 4097 # (*): not ideal, see comment in filelog.size
4092 4098 # (?): could be "- len(meta)" if the resolved content has
4093 4099 # rename metadata
4094 4100 #
4095 4101 # Checks needed to be done:
4096 4102 # 1. length check: L1 == L2, in all cases.
4097 4103 # 2. hash check: depending on flag processor, we may need to
4098 4104 # use either "text" (external), or "rawtext" (in revlog).
4099 4105
4100 4106 try:
4101 4107 skipflags = state.get(b'skipflags', 0)
4102 4108 if skipflags:
4103 4109 skipflags &= self.flags(rev)
4104 4110
4105 4111 _verify_revision(self, skipflags, state, node)
4106 4112
4107 4113 l1 = self.rawsize(rev)
4108 4114 l2 = len(self.rawdata(node))
4109 4115
4110 4116 if l1 != l2:
4111 4117 yield revlogproblem(
4112 4118 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4113 4119 node=node,
4114 4120 )
4115 4121
4116 4122 except error.CensoredNodeError:
4117 4123 if state[b'erroroncensored']:
4118 4124 yield revlogproblem(
4119 4125 error=_(b'censored file data'), node=node
4120 4126 )
4121 4127 state[b'skipread'].add(node)
4122 4128 except Exception as e:
4123 4129 yield revlogproblem(
4124 4130 error=_(b'unpacking %s: %s')
4125 4131 % (short(node), stringutil.forcebytestr(e)),
4126 4132 node=node,
4127 4133 )
4128 4134 state[b'skipread'].add(node)
4129 4135
4130 4136 def storageinfo(
4131 4137 self,
4132 4138 exclusivefiles=False,
4133 4139 sharedfiles=False,
4134 4140 revisionscount=False,
4135 4141 trackedsize=False,
4136 4142 storedsize=False,
4137 4143 ):
4138 4144 d = {}
4139 4145
4140 4146 if exclusivefiles:
4141 4147 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4142 4148 if not self._inline:
4143 4149 d[b'exclusivefiles'].append((self.opener, self._datafile))
4144 4150
4145 4151 if sharedfiles:
4146 4152 d[b'sharedfiles'] = []
4147 4153
4148 4154 if revisionscount:
4149 4155 d[b'revisionscount'] = len(self)
4150 4156
4151 4157 if trackedsize:
4152 4158 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4153 4159
4154 4160 if storedsize:
4155 4161 d[b'storedsize'] = sum(
4156 4162 self.opener.stat(path).st_size for path in self.files()
4157 4163 )
4158 4164
4159 4165 return d
4160 4166
4161 4167 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4162 4168 if not self.feature_config.has_side_data:
4163 4169 return
4164 4170 # revlog formats with sidedata support does not support inline
4165 4171 assert not self._inline
4166 4172 if not helpers[1] and not helpers[2]:
4167 4173 # Nothing to generate or remove
4168 4174 return
4169 4175
4170 4176 new_entries = []
4171 4177 # append the new sidedata
4172 4178 with self._writing(transaction):
4173 4179 ifh, dfh, sdfh = self._inner._writinghandles
4174 4180 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4175 4181
4176 4182 current_offset = sdfh.tell()
4177 4183 for rev in range(startrev, endrev + 1):
4178 4184 entry = self.index[rev]
4179 4185 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4180 4186 store=self,
4181 4187 sidedata_helpers=helpers,
4182 4188 sidedata={},
4183 4189 rev=rev,
4184 4190 )
4185 4191
4186 4192 serialized_sidedata = sidedatautil.serialize_sidedata(
4187 4193 new_sidedata
4188 4194 )
4189 4195
4190 4196 sidedata_compression_mode = COMP_MODE_INLINE
4191 4197 if serialized_sidedata and self.feature_config.has_side_data:
4192 4198 sidedata_compression_mode = COMP_MODE_PLAIN
4193 4199 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4194 4200 if (
4195 4201 h != b'u'
4196 4202 and comp_sidedata[0] != b'\0'
4197 4203 and len(comp_sidedata) < len(serialized_sidedata)
4198 4204 ):
4199 4205 assert not h
4200 4206 if (
4201 4207 comp_sidedata[0]
4202 4208 == self._docket.default_compression_header
4203 4209 ):
4204 4210 sidedata_compression_mode = COMP_MODE_DEFAULT
4205 4211 serialized_sidedata = comp_sidedata
4206 4212 else:
4207 4213 sidedata_compression_mode = COMP_MODE_INLINE
4208 4214 serialized_sidedata = comp_sidedata
4209 4215 if entry[8] != 0 or entry[9] != 0:
4210 4216 # rewriting entries that already have sidedata is not
4211 4217 # supported yet, because it introduces garbage data in the
4212 4218 # revlog.
4213 4219 msg = b"rewriting existing sidedata is not supported yet"
4214 4220 raise error.Abort(msg)
4215 4221
4216 4222 # Apply (potential) flags to add and to remove after running
4217 4223 # the sidedata helpers
4218 4224 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4219 4225 entry_update = (
4220 4226 current_offset,
4221 4227 len(serialized_sidedata),
4222 4228 new_offset_flags,
4223 4229 sidedata_compression_mode,
4224 4230 )
4225 4231
4226 4232 # the sidedata computation might have move the file cursors around
4227 4233 sdfh.seek(current_offset, os.SEEK_SET)
4228 4234 sdfh.write(serialized_sidedata)
4229 4235 new_entries.append(entry_update)
4230 4236 current_offset += len(serialized_sidedata)
4231 4237 self._docket.sidedata_end = sdfh.tell()
4232 4238
4233 4239 # rewrite the new index entries
4234 4240 ifh.seek(startrev * self.index.entry_size)
4235 4241 for i, e in enumerate(new_entries):
4236 4242 rev = startrev + i
4237 4243 self.index.replace_sidedata_info(rev, *e)
4238 4244 packed = self.index.entry_binary(rev)
4239 4245 if rev == 0 and self._docket is None:
4240 4246 header = self._format_flags | self._format_version
4241 4247 header = self.index.pack_header(header)
4242 4248 packed = header + packed
4243 4249 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now