##// END OF EJS Templates
revlog: minor refactor in the chunk gather process...
marmoute -
r52000:c2d2e5b6 default
parent child Browse files
Show More
@@ -1,4170 +1,4174 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 # used during diverted write.
373 373 self._orig_index_file = None
374 374
375 375 self._default_compression_header = default_compression_header
376 376
377 377 # index
378 378
379 379 # 3-tuple of file handles being used for active writing.
380 380 self._writinghandles = None
381 381
382 382 self._segmentfile = randomaccessfile.randomaccessfile(
383 383 self.opener,
384 384 (self.index_file if self.inline else self.data_file),
385 385 self.data_config.chunk_cache_size,
386 386 chunk_cache,
387 387 )
388 388 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
389 389 self.opener,
390 390 self.sidedata_file,
391 391 self.data_config.chunk_cache_size,
392 392 )
393 393
394 394 # revlog header -> revlog compressor
395 395 self._decompressors = {}
396 396 # 3-tuple of (node, rev, text) for a raw revision.
397 397 self._revisioncache = None
398 398
399 399 self._delay_buffer = None
400 400
401 401 @property
402 402 def index_file(self):
403 403 return self.__index_file
404 404
405 405 @index_file.setter
406 406 def index_file(self, new_index_file):
407 407 self.__index_file = new_index_file
408 408 if self.inline:
409 409 self._segmentfile.filename = new_index_file
410 410
411 411 def __len__(self):
412 412 return len(self.index)
413 413
414 414 def clear_cache(self):
415 415 assert not self.is_delaying
416 416 self._revisioncache = None
417 417 self._segmentfile.clear_cache()
418 418 self._segmentfile_sidedata.clear_cache()
419 419
420 420 @property
421 421 def canonical_index_file(self):
422 422 if self._orig_index_file is not None:
423 423 return self._orig_index_file
424 424 return self.index_file
425 425
426 426 @property
427 427 def is_delaying(self):
428 428 """is the revlog is currently delaying the visibility of written data?
429 429
430 430 The delaying mechanism can be either in-memory or written on disk in a
431 431 side-file."""
432 432 return (self._delay_buffer is not None) or (
433 433 self._orig_index_file is not None
434 434 )
435 435
436 436 # Derived from index values.
437 437
438 438 def start(self, rev):
439 439 """the offset of the data chunk for this revision"""
440 440 return int(self.index[rev][0] >> 16)
441 441
442 442 def length(self, rev):
443 443 """the length of the data chunk for this revision"""
444 444 return self.index[rev][1]
445 445
446 446 def end(self, rev):
447 447 """the end of the data chunk for this revision"""
448 448 return self.start(rev) + self.length(rev)
449 449
450 450 def deltaparent(self, rev):
451 451 """return deltaparent of the given revision"""
452 452 base = self.index[rev][3]
453 453 if base == rev:
454 454 return nullrev
455 455 elif self.delta_config.general_delta:
456 456 return base
457 457 else:
458 458 return rev - 1
459 459
460 460 def issnapshot(self, rev):
461 461 """tells whether rev is a snapshot"""
462 462 if not self.delta_config.sparse_revlog:
463 463 return self.deltaparent(rev) == nullrev
464 464 elif hasattr(self.index, 'issnapshot'):
465 465 # directly assign the method to cache the testing and access
466 466 self.issnapshot = self.index.issnapshot
467 467 return self.issnapshot(rev)
468 468 if rev == nullrev:
469 469 return True
470 470 entry = self.index[rev]
471 471 base = entry[3]
472 472 if base == rev:
473 473 return True
474 474 if base == nullrev:
475 475 return True
476 476 p1 = entry[5]
477 477 while self.length(p1) == 0:
478 478 b = self.deltaparent(p1)
479 479 if b == p1:
480 480 break
481 481 p1 = b
482 482 p2 = entry[6]
483 483 while self.length(p2) == 0:
484 484 b = self.deltaparent(p2)
485 485 if b == p2:
486 486 break
487 487 p2 = b
488 488 if base == p1 or base == p2:
489 489 return False
490 490 return self.issnapshot(base)
491 491
492 492 def _deltachain(self, rev, stoprev=None):
493 493 """Obtain the delta chain for a revision.
494 494
495 495 ``stoprev`` specifies a revision to stop at. If not specified, we
496 496 stop at the base of the chain.
497 497
498 498 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
499 499 revs in ascending order and ``stopped`` is a bool indicating whether
500 500 ``stoprev`` was hit.
501 501 """
502 502 generaldelta = self.delta_config.general_delta
503 503 # Try C implementation.
504 504 try:
505 505 return self.index.deltachain(rev, stoprev, generaldelta)
506 506 except AttributeError:
507 507 pass
508 508
509 509 chain = []
510 510
511 511 # Alias to prevent attribute lookup in tight loop.
512 512 index = self.index
513 513
514 514 iterrev = rev
515 515 e = index[iterrev]
516 516 while iterrev != e[3] and iterrev != stoprev:
517 517 chain.append(iterrev)
518 518 if generaldelta:
519 519 iterrev = e[3]
520 520 else:
521 521 iterrev -= 1
522 522 e = index[iterrev]
523 523
524 524 if iterrev == stoprev:
525 525 stopped = True
526 526 else:
527 527 chain.append(iterrev)
528 528 stopped = False
529 529
530 530 chain.reverse()
531 531 return chain, stopped
532 532
533 533 @util.propertycache
534 534 def _compressor(self):
535 535 engine = util.compengines[self.feature_config.compression_engine]
536 536 return engine.revlogcompressor(
537 537 self.feature_config.compression_engine_options
538 538 )
539 539
540 540 @util.propertycache
541 541 def _decompressor(self):
542 542 """the default decompressor"""
543 543 if self._default_compression_header is None:
544 544 return None
545 545 t = self._default_compression_header
546 546 c = self._get_decompressor(t)
547 547 return c.decompress
548 548
549 549 def _get_decompressor(self, t):
550 550 try:
551 551 compressor = self._decompressors[t]
552 552 except KeyError:
553 553 try:
554 554 engine = util.compengines.forrevlogheader(t)
555 555 compressor = engine.revlogcompressor(
556 556 self.feature_config.compression_engine_options
557 557 )
558 558 self._decompressors[t] = compressor
559 559 except KeyError:
560 560 raise error.RevlogError(
561 561 _(b'unknown compression type %s') % binascii.hexlify(t)
562 562 )
563 563 return compressor
564 564
565 565 def compress(self, data):
566 566 """Generate a possibly-compressed representation of data."""
567 567 if not data:
568 568 return b'', data
569 569
570 570 compressed = self._compressor.compress(data)
571 571
572 572 if compressed:
573 573 # The revlog compressor added the header in the returned data.
574 574 return b'', compressed
575 575
576 576 if data[0:1] == b'\0':
577 577 return b'', data
578 578 return b'u', data
579 579
580 580 def decompress(self, data):
581 581 """Decompress a revlog chunk.
582 582
583 583 The chunk is expected to begin with a header identifying the
584 584 format type so it can be routed to an appropriate decompressor.
585 585 """
586 586 if not data:
587 587 return data
588 588
589 589 # Revlogs are read much more frequently than they are written and many
590 590 # chunks only take microseconds to decompress, so performance is
591 591 # important here.
592 592 #
593 593 # We can make a few assumptions about revlogs:
594 594 #
595 595 # 1) the majority of chunks will be compressed (as opposed to inline
596 596 # raw data).
597 597 # 2) decompressing *any* data will likely by at least 10x slower than
598 598 # returning raw inline data.
599 599 # 3) we want to prioritize common and officially supported compression
600 600 # engines
601 601 #
602 602 # It follows that we want to optimize for "decompress compressed data
603 603 # when encoded with common and officially supported compression engines"
604 604 # case over "raw data" and "data encoded by less common or non-official
605 605 # compression engines." That is why we have the inline lookup first
606 606 # followed by the compengines lookup.
607 607 #
608 608 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
609 609 # compressed chunks. And this matters for changelog and manifest reads.
610 610 t = data[0:1]
611 611
612 612 if t == b'x':
613 613 try:
614 614 return _zlibdecompress(data)
615 615 except zlib.error as e:
616 616 raise error.RevlogError(
617 617 _(b'revlog decompress error: %s')
618 618 % stringutil.forcebytestr(e)
619 619 )
620 620 # '\0' is more common than 'u' so it goes first.
621 621 elif t == b'\0':
622 622 return data
623 623 elif t == b'u':
624 624 return util.buffer(data, 1)
625 625
626 626 compressor = self._get_decompressor(t)
627 627
628 628 return compressor.decompress(data)
629 629
630 630 @contextlib.contextmanager
631 631 def reading(self):
632 632 """Context manager that keeps data and sidedata files open for reading"""
633 633 if len(self.index) == 0:
634 634 yield # nothing to be read
635 635 else:
636 636 with self._segmentfile.reading():
637 637 with self._segmentfile_sidedata.reading():
638 638 yield
639 639
640 640 @property
641 641 def is_writing(self):
642 642 """True is a writing context is open"""
643 643 return self._writinghandles is not None
644 644
645 645 @property
646 646 def is_open(self):
647 647 """True if any file handle is being held
648 648
649 649 Used for assert and debug in the python code"""
650 650 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
651 651
652 652 @contextlib.contextmanager
653 653 def writing(self, transaction, data_end=None, sidedata_end=None):
654 654 """Open the revlog files for writing
655 655
656 656 Add content to a revlog should be done within such context.
657 657 """
658 658 if self.is_writing:
659 659 yield
660 660 else:
661 661 ifh = dfh = sdfh = None
662 662 try:
663 663 r = len(self.index)
664 664 # opening the data file.
665 665 dsize = 0
666 666 if r:
667 667 dsize = self.end(r - 1)
668 668 dfh = None
669 669 if not self.inline:
670 670 try:
671 671 dfh = self.opener(self.data_file, mode=b"r+")
672 672 if data_end is None:
673 673 dfh.seek(0, os.SEEK_END)
674 674 else:
675 675 dfh.seek(data_end, os.SEEK_SET)
676 676 except FileNotFoundError:
677 677 dfh = self.opener(self.data_file, mode=b"w+")
678 678 transaction.add(self.data_file, dsize)
679 679 if self.sidedata_file is not None:
680 680 assert sidedata_end is not None
681 681 # revlog-v2 does not inline, help Pytype
682 682 assert dfh is not None
683 683 try:
684 684 sdfh = self.opener(self.sidedata_file, mode=b"r+")
685 685 dfh.seek(sidedata_end, os.SEEK_SET)
686 686 except FileNotFoundError:
687 687 sdfh = self.opener(self.sidedata_file, mode=b"w+")
688 688 transaction.add(self.sidedata_file, sidedata_end)
689 689
690 690 # opening the index file.
691 691 isize = r * self.index.entry_size
692 692 ifh = self.__index_write_fp()
693 693 if self.inline:
694 694 transaction.add(self.index_file, dsize + isize)
695 695 else:
696 696 transaction.add(self.index_file, isize)
697 697 # exposing all file handle for writing.
698 698 self._writinghandles = (ifh, dfh, sdfh)
699 699 self._segmentfile.writing_handle = ifh if self.inline else dfh
700 700 self._segmentfile_sidedata.writing_handle = sdfh
701 701 yield
702 702 finally:
703 703 self._writinghandles = None
704 704 self._segmentfile.writing_handle = None
705 705 self._segmentfile_sidedata.writing_handle = None
706 706 if dfh is not None:
707 707 dfh.close()
708 708 if sdfh is not None:
709 709 sdfh.close()
710 710 # closing the index file last to avoid exposing referent to
711 711 # potential unflushed data content.
712 712 if ifh is not None:
713 713 ifh.close()
714 714
715 715 def __index_write_fp(self, index_end=None):
716 716 """internal method to open the index file for writing
717 717
718 718 You should not use this directly and use `_writing` instead
719 719 """
720 720 try:
721 721 if self._delay_buffer is None:
722 722 f = self.opener(
723 723 self.index_file,
724 724 mode=b"r+",
725 725 checkambig=self.data_config.check_ambig,
726 726 )
727 727 else:
728 728 # check_ambig affect we way we open file for writing, however
729 729 # here, we do not actually open a file for writting as write
730 730 # will appened to a delay_buffer. So check_ambig is not
731 731 # meaningful and unneeded here.
732 732 f = randomaccessfile.appender(
733 733 self.opener, self.index_file, b"r+", self._delay_buffer
734 734 )
735 735 if index_end is None:
736 736 f.seek(0, os.SEEK_END)
737 737 else:
738 738 f.seek(index_end, os.SEEK_SET)
739 739 return f
740 740 except FileNotFoundError:
741 741 if self._delay_buffer is None:
742 742 return self.opener(
743 743 self.index_file,
744 744 mode=b"w+",
745 745 checkambig=self.data_config.check_ambig,
746 746 )
747 747 else:
748 748 return randomaccessfile.appender(
749 749 self.opener, self.index_file, b"w+", self._delay_buffer
750 750 )
751 751
752 752 def __index_new_fp(self):
753 753 """internal method to create a new index file for writing
754 754
755 755 You should not use this unless you are upgrading from inline revlog
756 756 """
757 757 return self.opener(
758 758 self.index_file,
759 759 mode=b"w",
760 760 checkambig=self.data_config.check_ambig,
761 761 atomictemp=True,
762 762 )
763 763
764 764 def split_inline(self, tr, header, new_index_file_path=None):
765 765 """split the data of an inline revlog into an index and a data file"""
766 766 existing_handles = False
767 767 if self._writinghandles is not None:
768 768 existing_handles = True
769 769 fp = self._writinghandles[0]
770 770 fp.flush()
771 771 fp.close()
772 772 # We can't use the cached file handle after close(). So prevent
773 773 # its usage.
774 774 self._writinghandles = None
775 775 self._segmentfile.writing_handle = None
776 776 # No need to deal with sidedata writing handle as it is only
777 777 # relevant with revlog-v2 which is never inline, not reaching
778 778 # this code
779 779
780 780 new_dfh = self.opener(self.data_file, mode=b"w+")
781 781 new_dfh.truncate(0) # drop any potentially existing data
782 782 try:
783 783 with self.reading():
784 784 for r in range(len(self.index)):
785 785 new_dfh.write(self.get_segment_for_revs(r, r)[1])
786 786 new_dfh.flush()
787 787
788 788 if new_index_file_path is not None:
789 789 self.index_file = new_index_file_path
790 790 with self.__index_new_fp() as fp:
791 791 self.inline = False
792 792 for i in range(len(self.index)):
793 793 e = self.index.entry_binary(i)
794 794 if i == 0:
795 795 packed_header = self.index.pack_header(header)
796 796 e = packed_header + e
797 797 fp.write(e)
798 798
799 799 # If we don't use side-write, the temp file replace the real
800 800 # index when we exit the context manager
801 801
802 802 self._segmentfile = randomaccessfile.randomaccessfile(
803 803 self.opener,
804 804 self.data_file,
805 805 self.data_config.chunk_cache_size,
806 806 )
807 807
808 808 if existing_handles:
809 809 # switched from inline to conventional reopen the index
810 810 ifh = self.__index_write_fp()
811 811 self._writinghandles = (ifh, new_dfh, None)
812 812 self._segmentfile.writing_handle = new_dfh
813 813 new_dfh = None
814 814 # No need to deal with sidedata writing handle as it is only
815 815 # relevant with revlog-v2 which is never inline, not reaching
816 816 # this code
817 817 finally:
818 818 if new_dfh is not None:
819 819 new_dfh.close()
820 820 return self.index_file
821 821
822 822 def get_segment_for_revs(self, startrev, endrev):
823 823 """Obtain a segment of raw data corresponding to a range of revisions.
824 824
825 825 Accepts the start and end revisions and an optional already-open
826 826 file handle to be used for reading. If the file handle is read, its
827 827 seek position will not be preserved.
828 828
829 829 Requests for data may be satisfied by a cache.
830 830
831 831 Returns a 2-tuple of (offset, data) for the requested range of
832 832 revisions. Offset is the integer offset from the beginning of the
833 833 revlog and data is a str or buffer of the raw byte data.
834 834
835 835 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
836 836 to determine where each revision's data begins and ends.
837 837
838 838 API: we should consider making this a private part of the InnerRevlog
839 839 at some point.
840 840 """
841 841 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
842 842 # (functions are expensive).
843 843 index = self.index
844 844 istart = index[startrev]
845 845 start = int(istart[0] >> 16)
846 846 if startrev == endrev:
847 847 end = start + istart[1]
848 848 else:
849 849 iend = index[endrev]
850 850 end = int(iend[0] >> 16) + iend[1]
851 851
852 852 if self.inline:
853 853 start += (startrev + 1) * self.index.entry_size
854 854 end += (endrev + 1) * self.index.entry_size
855 855 length = end - start
856 856
857 857 return start, self._segmentfile.read_chunk(start, length)
858 858
859 859 def _chunk(self, rev):
860 860 """Obtain a single decompressed chunk for a revision.
861 861
862 862 Accepts an integer revision and an optional already-open file handle
863 863 to be used for reading. If used, the seek position of the file will not
864 864 be preserved.
865 865
866 866 Returns a str holding uncompressed data for the requested revision.
867 867 """
868 868 compression_mode = self.index[rev][10]
869 869 data = self.get_segment_for_revs(rev, rev)[1]
870 870 if compression_mode == COMP_MODE_PLAIN:
871 871 return data
872 872 elif compression_mode == COMP_MODE_DEFAULT:
873 873 return self._decompressor(data)
874 874 elif compression_mode == COMP_MODE_INLINE:
875 875 return self.decompress(data)
876 876 else:
877 877 msg = b'unknown compression mode %d'
878 878 msg %= compression_mode
879 879 raise error.RevlogError(msg)
880 880
881 881 def _chunks(self, revs, targetsize=None):
882 882 """Obtain decompressed chunks for the specified revisions.
883 883
884 884 Accepts an iterable of numeric revisions that are assumed to be in
885 885 ascending order. Also accepts an optional already-open file handle
886 886 to be used for reading. If used, the seek position of the file will
887 887 not be preserved.
888 888
889 889 This function is similar to calling ``self._chunk()`` multiple times,
890 890 but is faster.
891 891
892 892 Returns a list with decompressed data for each requested revision.
893 893 """
894 894 if not revs:
895 895 return []
896 896 start = self.start
897 897 length = self.length
898 898 inline = self.inline
899 899 iosize = self.index.entry_size
900 900 buffer = util.buffer
901 901
902 902 l = []
903 903 ladd = l.append
904 chunks = []
905 ladd = chunks.append
904 906
905 907 if not self.data_config.with_sparse_read:
906 908 slicedchunks = (revs,)
907 909 else:
908 910 slicedchunks = deltautil.slicechunk(
909 911 self,
910 912 revs,
911 913 targetsize=targetsize,
912 914 )
913 915
914 916 for revschunk in slicedchunks:
915 917 firstrev = revschunk[0]
916 918 # Skip trailing revisions with empty diff
917 919 for lastrev in revschunk[::-1]:
918 920 if length(lastrev) != 0:
919 921 break
920 922
921 923 try:
922 924 offset, data = self.get_segment_for_revs(firstrev, lastrev)
923 925 except OverflowError:
924 926 # issue4215 - we can't cache a run of chunks greater than
925 927 # 2G on Windows
926 return [self._chunk(rev) for rev in revschunk]
928 for rev in revschunk:
929 ladd((rev, self._chunk(rev)))
927 930
928 931 decomp = self.decompress
929 932 # self._decompressor might be None, but will not be used in that case
930 933 def_decomp = self._decompressor
931 934 for rev in revschunk:
932 935 chunkstart = start(rev)
933 936 if inline:
934 937 chunkstart += (rev + 1) * iosize
935 938 chunklength = length(rev)
936 939 comp_mode = self.index[rev][10]
937 940 c = buffer(data, chunkstart - offset, chunklength)
938 941 if comp_mode == COMP_MODE_PLAIN:
939 ladd(c)
942 c = c
940 943 elif comp_mode == COMP_MODE_INLINE:
941 ladd(decomp(c))
944 c = decomp(c)
942 945 elif comp_mode == COMP_MODE_DEFAULT:
943 ladd(def_decomp(c))
946 c = def_decomp(c)
944 947 else:
945 948 msg = b'unknown compression mode %d'
946 949 msg %= comp_mode
947 950 raise error.RevlogError(msg)
948
949 return l
951 ladd((rev, c))
952
953 return [x[1] for x in chunks]
950 954
951 955 def raw_text(self, node, rev):
952 956 """return the possibly unvalidated rawtext for a revision
953 957
954 958 returns (rev, rawtext, validated)
955 959 """
956 960
957 961 # revision in the cache (could be useful to apply delta)
958 962 cachedrev = None
959 963 # An intermediate text to apply deltas to
960 964 basetext = None
961 965
962 966 # Check if we have the entry in cache
963 967 # The cache entry looks like (node, rev, rawtext)
964 968 if self._revisioncache:
965 969 cachedrev = self._revisioncache[1]
966 970
967 971 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
968 972 if stopped:
969 973 basetext = self._revisioncache[2]
970 974
971 975 # drop cache to save memory, the caller is expected to
972 976 # update self._inner._revisioncache after validating the text
973 977 self._revisioncache = None
974 978
975 979 targetsize = None
976 980 rawsize = self.index[rev][2]
977 981 if 0 <= rawsize:
978 982 targetsize = 4 * rawsize
979 983
980 984 bins = self._chunks(chain, targetsize=targetsize)
981 985 if basetext is None:
982 986 basetext = bytes(bins[0])
983 987 bins = bins[1:]
984 988
985 989 rawtext = mdiff.patches(basetext, bins)
986 990 del basetext # let us have a chance to free memory early
987 991 return (rev, rawtext, False)
988 992
989 993 def sidedata(self, rev, sidedata_end):
990 994 """Return the sidedata for a given revision number."""
991 995 index_entry = self.index[rev]
992 996 sidedata_offset = index_entry[8]
993 997 sidedata_size = index_entry[9]
994 998
995 999 if self.inline:
996 1000 sidedata_offset += self.index.entry_size * (1 + rev)
997 1001 if sidedata_size == 0:
998 1002 return {}
999 1003
1000 1004 if sidedata_end < sidedata_offset + sidedata_size:
1001 1005 filename = self.sidedata_file
1002 1006 end = sidedata_end
1003 1007 offset = sidedata_offset
1004 1008 length = sidedata_size
1005 1009 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1006 1010 raise error.RevlogError(m)
1007 1011
1008 1012 comp_segment = self._segmentfile_sidedata.read_chunk(
1009 1013 sidedata_offset, sidedata_size
1010 1014 )
1011 1015
1012 1016 comp = self.index[rev][11]
1013 1017 if comp == COMP_MODE_PLAIN:
1014 1018 segment = comp_segment
1015 1019 elif comp == COMP_MODE_DEFAULT:
1016 1020 segment = self._decompressor(comp_segment)
1017 1021 elif comp == COMP_MODE_INLINE:
1018 1022 segment = self.decompress(comp_segment)
1019 1023 else:
1020 1024 msg = b'unknown compression mode %d'
1021 1025 msg %= comp
1022 1026 raise error.RevlogError(msg)
1023 1027
1024 1028 sidedata = sidedatautil.deserialize_sidedata(segment)
1025 1029 return sidedata
1026 1030
1027 1031 def write_entry(
1028 1032 self,
1029 1033 transaction,
1030 1034 entry,
1031 1035 data,
1032 1036 link,
1033 1037 offset,
1034 1038 sidedata,
1035 1039 sidedata_offset,
1036 1040 index_end,
1037 1041 data_end,
1038 1042 sidedata_end,
1039 1043 ):
1040 1044 # Files opened in a+ mode have inconsistent behavior on various
1041 1045 # platforms. Windows requires that a file positioning call be made
1042 1046 # when the file handle transitions between reads and writes. See
1043 1047 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1044 1048 # platforms, Python or the platform itself can be buggy. Some versions
1045 1049 # of Solaris have been observed to not append at the end of the file
1046 1050 # if the file was seeked to before the end. See issue4943 for more.
1047 1051 #
1048 1052 # We work around this issue by inserting a seek() before writing.
1049 1053 # Note: This is likely not necessary on Python 3. However, because
1050 1054 # the file handle is reused for reads and may be seeked there, we need
1051 1055 # to be careful before changing this.
1052 1056 if self._writinghandles is None:
1053 1057 msg = b'adding revision outside `revlog._writing` context'
1054 1058 raise error.ProgrammingError(msg)
1055 1059 ifh, dfh, sdfh = self._writinghandles
1056 1060 if index_end is None:
1057 1061 ifh.seek(0, os.SEEK_END)
1058 1062 else:
1059 1063 ifh.seek(index_end, os.SEEK_SET)
1060 1064 if dfh:
1061 1065 if data_end is None:
1062 1066 dfh.seek(0, os.SEEK_END)
1063 1067 else:
1064 1068 dfh.seek(data_end, os.SEEK_SET)
1065 1069 if sdfh:
1066 1070 sdfh.seek(sidedata_end, os.SEEK_SET)
1067 1071
1068 1072 curr = len(self.index) - 1
1069 1073 if not self.inline:
1070 1074 transaction.add(self.data_file, offset)
1071 1075 if self.sidedata_file:
1072 1076 transaction.add(self.sidedata_file, sidedata_offset)
1073 1077 transaction.add(self.canonical_index_file, curr * len(entry))
1074 1078 if data[0]:
1075 1079 dfh.write(data[0])
1076 1080 dfh.write(data[1])
1077 1081 if sidedata:
1078 1082 sdfh.write(sidedata)
1079 1083 if self._delay_buffer is None:
1080 1084 ifh.write(entry)
1081 1085 else:
1082 1086 self._delay_buffer.append(entry)
1083 1087 else:
1084 1088 offset += curr * self.index.entry_size
1085 1089 transaction.add(self.canonical_index_file, offset)
1086 1090 assert not sidedata
1087 1091 if self._delay_buffer is None:
1088 1092 ifh.write(entry)
1089 1093 ifh.write(data[0])
1090 1094 ifh.write(data[1])
1091 1095 else:
1092 1096 self._delay_buffer.append(entry)
1093 1097 self._delay_buffer.append(data[0])
1094 1098 self._delay_buffer.append(data[1])
1095 1099 return (
1096 1100 ifh.tell(),
1097 1101 dfh.tell() if dfh else None,
1098 1102 sdfh.tell() if sdfh else None,
1099 1103 )
1100 1104
1101 1105 def _divert_index(self):
1102 1106 return self.index_file + b'.a'
1103 1107
1104 1108 def delay(self):
1105 1109 assert not self.is_open
1106 1110 if self._delay_buffer is not None or self._orig_index_file is not None:
1107 1111 # delay or divert already in place
1108 1112 return None
1109 1113 elif len(self.index) == 0:
1110 1114 self._orig_index_file = self.index_file
1111 1115 self.index_file = self._divert_index()
1112 1116 self._segmentfile.filename = self.index_file
1113 1117 assert self._orig_index_file is not None
1114 1118 assert self.index_file is not None
1115 1119 if self.opener.exists(self.index_file):
1116 1120 self.opener.unlink(self.index_file)
1117 1121 return self.index_file
1118 1122 else:
1119 1123 self._segmentfile._delay_buffer = self._delay_buffer = []
1120 1124 return None
1121 1125
1122 1126 def write_pending(self):
1123 1127 assert not self.is_open
1124 1128 if self._orig_index_file is not None:
1125 1129 return None, True
1126 1130 any_pending = False
1127 1131 pending_index_file = self._divert_index()
1128 1132 if self.opener.exists(pending_index_file):
1129 1133 self.opener.unlink(pending_index_file)
1130 1134 util.copyfile(
1131 1135 self.opener.join(self.index_file),
1132 1136 self.opener.join(pending_index_file),
1133 1137 )
1134 1138 if self._delay_buffer:
1135 1139 with self.opener(pending_index_file, b'r+') as ifh:
1136 1140 ifh.seek(0, os.SEEK_END)
1137 1141 ifh.write(b"".join(self._delay_buffer))
1138 1142 any_pending = True
1139 1143 self._segmentfile._delay_buffer = self._delay_buffer = None
1140 1144 self._orig_index_file = self.index_file
1141 1145 self.index_file = pending_index_file
1142 1146 self._segmentfile.filename = self.index_file
1143 1147 return self.index_file, any_pending
1144 1148
1145 1149 def finalize_pending(self):
1146 1150 assert not self.is_open
1147 1151
1148 1152 delay = self._delay_buffer is not None
1149 1153 divert = self._orig_index_file is not None
1150 1154
1151 1155 if delay and divert:
1152 1156 assert False, "unreachable"
1153 1157 elif delay:
1154 1158 if self._delay_buffer:
1155 1159 with self.opener(self.index_file, b'r+') as ifh:
1156 1160 ifh.seek(0, os.SEEK_END)
1157 1161 ifh.write(b"".join(self._delay_buffer))
1158 1162 self._segmentfile._delay_buffer = self._delay_buffer = None
1159 1163 elif divert:
1160 1164 if self.opener.exists(self.index_file):
1161 1165 self.opener.rename(
1162 1166 self.index_file,
1163 1167 self._orig_index_file,
1164 1168 checkambig=True,
1165 1169 )
1166 1170 self.index_file = self._orig_index_file
1167 1171 self._orig_index_file = None
1168 1172 self._segmentfile.filename = self.index_file
1169 1173 else:
1170 1174 msg = b"not delay or divert found on this revlog"
1171 1175 raise error.ProgrammingError(msg)
1172 1176 return self.canonical_index_file
1173 1177
1174 1178
1175 1179 class revlog:
1176 1180 """
1177 1181 the underlying revision storage object
1178 1182
1179 1183 A revlog consists of two parts, an index and the revision data.
1180 1184
1181 1185 The index is a file with a fixed record size containing
1182 1186 information on each revision, including its nodeid (hash), the
1183 1187 nodeids of its parents, the position and offset of its data within
1184 1188 the data file, and the revision it's based on. Finally, each entry
1185 1189 contains a linkrev entry that can serve as a pointer to external
1186 1190 data.
1187 1191
1188 1192 The revision data itself is a linear collection of data chunks.
1189 1193 Each chunk represents a revision and is usually represented as a
1190 1194 delta against the previous chunk. To bound lookup time, runs of
1191 1195 deltas are limited to about 2 times the length of the original
1192 1196 version data. This makes retrieval of a version proportional to
1193 1197 its size, or O(1) relative to the number of revisions.
1194 1198
1195 1199 Both pieces of the revlog are written to in an append-only
1196 1200 fashion, which means we never need to rewrite a file to insert or
1197 1201 remove data, and can use some simple techniques to avoid the need
1198 1202 for locking while reading.
1199 1203
1200 1204 If checkambig, indexfile is opened with checkambig=True at
1201 1205 writing, to avoid file stat ambiguity.
1202 1206
1203 1207 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1204 1208 index will be mmapped rather than read if it is larger than the
1205 1209 configured threshold.
1206 1210
1207 1211 If censorable is True, the revlog can have censored revisions.
1208 1212
1209 1213 If `upperboundcomp` is not None, this is the expected maximal gain from
1210 1214 compression for the data content.
1211 1215
1212 1216 `concurrencychecker` is an optional function that receives 3 arguments: a
1213 1217 file handle, a filename, and an expected position. It should check whether
1214 1218 the current position in the file handle is valid, and log/warn/fail (by
1215 1219 raising).
1216 1220
1217 1221 See mercurial/revlogutils/contants.py for details about the content of an
1218 1222 index entry.
1219 1223 """
1220 1224
1221 1225 _flagserrorclass = error.RevlogError
1222 1226
1223 1227 @staticmethod
1224 1228 def is_inline_index(header_bytes):
1225 1229 """Determine if a revlog is inline from the initial bytes of the index"""
1226 1230 header = INDEX_HEADER.unpack(header_bytes)[0]
1227 1231
1228 1232 _format_flags = header & ~0xFFFF
1229 1233 _format_version = header & 0xFFFF
1230 1234
1231 1235 features = FEATURES_BY_VERSION[_format_version]
1232 1236 return features[b'inline'](_format_flags)
1233 1237
1234 1238 def __init__(
1235 1239 self,
1236 1240 opener,
1237 1241 target,
1238 1242 radix,
1239 1243 postfix=None, # only exist for `tmpcensored` now
1240 1244 checkambig=False,
1241 1245 mmaplargeindex=False,
1242 1246 censorable=False,
1243 1247 upperboundcomp=None,
1244 1248 persistentnodemap=False,
1245 1249 concurrencychecker=None,
1246 1250 trypending=False,
1247 1251 try_split=False,
1248 1252 canonical_parent_order=True,
1249 1253 ):
1250 1254 """
1251 1255 create a revlog object
1252 1256
1253 1257 opener is a function that abstracts the file opening operation
1254 1258 and can be used to implement COW semantics or the like.
1255 1259
1256 1260 `target`: a (KIND, ID) tuple that identify the content stored in
1257 1261 this revlog. It help the rest of the code to understand what the revlog
1258 1262 is about without having to resort to heuristic and index filename
1259 1263 analysis. Note: that this must be reliably be set by normal code, but
1260 1264 that test, debug, or performance measurement code might not set this to
1261 1265 accurate value.
1262 1266 """
1263 1267
1264 1268 self.radix = radix
1265 1269
1266 1270 self._docket_file = None
1267 1271 self._indexfile = None
1268 1272 self._datafile = None
1269 1273 self._sidedatafile = None
1270 1274 self._nodemap_file = None
1271 1275 self.postfix = postfix
1272 1276 self._trypending = trypending
1273 1277 self._try_split = try_split
1274 1278 self.opener = opener
1275 1279 if persistentnodemap:
1276 1280 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1277 1281
1278 1282 assert target[0] in ALL_KINDS
1279 1283 assert len(target) == 2
1280 1284 self.target = target
1281 1285 if b'feature-config' in self.opener.options:
1282 1286 self.feature_config = self.opener.options[b'feature-config'].copy()
1283 1287 else:
1284 1288 self.feature_config = FeatureConfig()
1285 1289 self.feature_config.censorable = censorable
1286 1290 self.feature_config.canonical_parent_order = canonical_parent_order
1287 1291 if b'data-config' in self.opener.options:
1288 1292 self.data_config = self.opener.options[b'data-config'].copy()
1289 1293 else:
1290 1294 self.data_config = DataConfig()
1291 1295 self.data_config.check_ambig = checkambig
1292 1296 self.data_config.mmap_large_index = mmaplargeindex
1293 1297 if b'delta-config' in self.opener.options:
1294 1298 self.delta_config = self.opener.options[b'delta-config'].copy()
1295 1299 else:
1296 1300 self.delta_config = DeltaConfig()
1297 1301 self.delta_config.upper_bound_comp = upperboundcomp
1298 1302
1299 1303 # Maps rev to chain base rev.
1300 1304 self._chainbasecache = util.lrucachedict(100)
1301 1305
1302 1306 self.index = None
1303 1307 self._docket = None
1304 1308 self._nodemap_docket = None
1305 1309 # Mapping of partial identifiers to full nodes.
1306 1310 self._pcache = {}
1307 1311
1308 1312 # other optionnals features
1309 1313
1310 1314 # Make copy of flag processors so each revlog instance can support
1311 1315 # custom flags.
1312 1316 self._flagprocessors = dict(flagutil.flagprocessors)
1313 1317 # prevent nesting of addgroup
1314 1318 self._adding_group = None
1315 1319
1316 1320 chunk_cache = self._loadindex()
1317 1321 self._load_inner(chunk_cache)
1318 1322 self._concurrencychecker = concurrencychecker
1319 1323
1320 1324 @property
1321 1325 def _generaldelta(self):
1322 1326 """temporary compatibility proxy"""
1323 1327 util.nouideprecwarn(
1324 1328 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1325 1329 )
1326 1330 return self.delta_config.general_delta
1327 1331
1328 1332 @property
1329 1333 def _checkambig(self):
1330 1334 """temporary compatibility proxy"""
1331 1335 util.nouideprecwarn(
1332 1336 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1333 1337 )
1334 1338 return self.data_config.check_ambig
1335 1339
1336 1340 @property
1337 1341 def _mmaplargeindex(self):
1338 1342 """temporary compatibility proxy"""
1339 1343 util.nouideprecwarn(
1340 1344 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1341 1345 )
1342 1346 return self.data_config.mmap_large_index
1343 1347
1344 1348 @property
1345 1349 def _censorable(self):
1346 1350 """temporary compatibility proxy"""
1347 1351 util.nouideprecwarn(
1348 1352 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1349 1353 )
1350 1354 return self.feature_config.censorable
1351 1355
1352 1356 @property
1353 1357 def _chunkcachesize(self):
1354 1358 """temporary compatibility proxy"""
1355 1359 util.nouideprecwarn(
1356 1360 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1357 1361 )
1358 1362 return self.data_config.chunk_cache_size
1359 1363
1360 1364 @property
1361 1365 def _maxchainlen(self):
1362 1366 """temporary compatibility proxy"""
1363 1367 util.nouideprecwarn(
1364 1368 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1365 1369 )
1366 1370 return self.delta_config.max_chain_len
1367 1371
1368 1372 @property
1369 1373 def _deltabothparents(self):
1370 1374 """temporary compatibility proxy"""
1371 1375 util.nouideprecwarn(
1372 1376 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1373 1377 )
1374 1378 return self.delta_config.delta_both_parents
1375 1379
1376 1380 @property
1377 1381 def _candidate_group_chunk_size(self):
1378 1382 """temporary compatibility proxy"""
1379 1383 util.nouideprecwarn(
1380 1384 b"use revlog.delta_config.candidate_group_chunk_size",
1381 1385 b"6.6",
1382 1386 stacklevel=2,
1383 1387 )
1384 1388 return self.delta_config.candidate_group_chunk_size
1385 1389
1386 1390 @property
1387 1391 def _debug_delta(self):
1388 1392 """temporary compatibility proxy"""
1389 1393 util.nouideprecwarn(
1390 1394 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1391 1395 )
1392 1396 return self.delta_config.debug_delta
1393 1397
1394 1398 @property
1395 1399 def _compengine(self):
1396 1400 """temporary compatibility proxy"""
1397 1401 util.nouideprecwarn(
1398 1402 b"use revlog.feature_config.compression_engine",
1399 1403 b"6.6",
1400 1404 stacklevel=2,
1401 1405 )
1402 1406 return self.feature_config.compression_engine
1403 1407
1404 1408 @property
1405 1409 def upperboundcomp(self):
1406 1410 """temporary compatibility proxy"""
1407 1411 util.nouideprecwarn(
1408 1412 b"use revlog.delta_config.upper_bound_comp",
1409 1413 b"6.6",
1410 1414 stacklevel=2,
1411 1415 )
1412 1416 return self.delta_config.upper_bound_comp
1413 1417
1414 1418 @property
1415 1419 def _compengineopts(self):
1416 1420 """temporary compatibility proxy"""
1417 1421 util.nouideprecwarn(
1418 1422 b"use revlog.feature_config.compression_engine_options",
1419 1423 b"6.6",
1420 1424 stacklevel=2,
1421 1425 )
1422 1426 return self.feature_config.compression_engine_options
1423 1427
1424 1428 @property
1425 1429 def _maxdeltachainspan(self):
1426 1430 """temporary compatibility proxy"""
1427 1431 util.nouideprecwarn(
1428 1432 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1429 1433 )
1430 1434 return self.delta_config.max_deltachain_span
1431 1435
1432 1436 @property
1433 1437 def _withsparseread(self):
1434 1438 """temporary compatibility proxy"""
1435 1439 util.nouideprecwarn(
1436 1440 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1437 1441 )
1438 1442 return self.data_config.with_sparse_read
1439 1443
1440 1444 @property
1441 1445 def _sparserevlog(self):
1442 1446 """temporary compatibility proxy"""
1443 1447 util.nouideprecwarn(
1444 1448 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1445 1449 )
1446 1450 return self.delta_config.sparse_revlog
1447 1451
1448 1452 @property
1449 1453 def hassidedata(self):
1450 1454 """temporary compatibility proxy"""
1451 1455 util.nouideprecwarn(
1452 1456 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1453 1457 )
1454 1458 return self.feature_config.has_side_data
1455 1459
1456 1460 @property
1457 1461 def _srdensitythreshold(self):
1458 1462 """temporary compatibility proxy"""
1459 1463 util.nouideprecwarn(
1460 1464 b"use revlog.data_config.sr_density_threshold",
1461 1465 b"6.6",
1462 1466 stacklevel=2,
1463 1467 )
1464 1468 return self.data_config.sr_density_threshold
1465 1469
1466 1470 @property
1467 1471 def _srmingapsize(self):
1468 1472 """temporary compatibility proxy"""
1469 1473 util.nouideprecwarn(
1470 1474 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1471 1475 )
1472 1476 return self.data_config.sr_min_gap_size
1473 1477
1474 1478 @property
1475 1479 def _compute_rank(self):
1476 1480 """temporary compatibility proxy"""
1477 1481 util.nouideprecwarn(
1478 1482 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1479 1483 )
1480 1484 return self.feature_config.compute_rank
1481 1485
1482 1486 @property
1483 1487 def canonical_parent_order(self):
1484 1488 """temporary compatibility proxy"""
1485 1489 util.nouideprecwarn(
1486 1490 b"use revlog.feature_config.canonical_parent_order",
1487 1491 b"6.6",
1488 1492 stacklevel=2,
1489 1493 )
1490 1494 return self.feature_config.canonical_parent_order
1491 1495
1492 1496 @property
1493 1497 def _lazydelta(self):
1494 1498 """temporary compatibility proxy"""
1495 1499 util.nouideprecwarn(
1496 1500 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1497 1501 )
1498 1502 return self.delta_config.lazy_delta
1499 1503
1500 1504 @property
1501 1505 def _lazydeltabase(self):
1502 1506 """temporary compatibility proxy"""
1503 1507 util.nouideprecwarn(
1504 1508 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1505 1509 )
1506 1510 return self.delta_config.lazy_delta_base
1507 1511
1508 1512 def _init_opts(self):
1509 1513 """process options (from above/config) to setup associated default revlog mode
1510 1514
1511 1515 These values might be affected when actually reading on disk information.
1512 1516
1513 1517 The relevant values are returned for use in _loadindex().
1514 1518
1515 1519 * newversionflags:
1516 1520 version header to use if we need to create a new revlog
1517 1521
1518 1522 * mmapindexthreshold:
1519 1523 minimal index size for start to use mmap
1520 1524
1521 1525 * force_nodemap:
1522 1526 force the usage of a "development" version of the nodemap code
1523 1527 """
1524 1528 opts = self.opener.options
1525 1529
1526 1530 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1527 1531 new_header = CHANGELOGV2
1528 1532 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1529 1533 self.feature_config.compute_rank = compute_rank
1530 1534 elif b'revlogv2' in opts:
1531 1535 new_header = REVLOGV2
1532 1536 elif b'revlogv1' in opts:
1533 1537 new_header = REVLOGV1 | FLAG_INLINE_DATA
1534 1538 if b'generaldelta' in opts:
1535 1539 new_header |= FLAG_GENERALDELTA
1536 1540 elif b'revlogv0' in self.opener.options:
1537 1541 new_header = REVLOGV0
1538 1542 else:
1539 1543 new_header = REVLOG_DEFAULT_VERSION
1540 1544
1541 1545 mmapindexthreshold = None
1542 1546 if self.data_config.mmap_large_index:
1543 1547 mmapindexthreshold = self.data_config.mmap_index_threshold
1544 1548 if self.feature_config.enable_ellipsis:
1545 1549 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1546 1550
1547 1551 # revlog v0 doesn't have flag processors
1548 1552 for flag, processor in opts.get(b'flagprocessors', {}).items():
1549 1553 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1550 1554
1551 1555 chunk_cache_size = self.data_config.chunk_cache_size
1552 1556 if chunk_cache_size <= 0:
1553 1557 raise error.RevlogError(
1554 1558 _(b'revlog chunk cache size %r is not greater than 0')
1555 1559 % chunk_cache_size
1556 1560 )
1557 1561 elif chunk_cache_size & (chunk_cache_size - 1):
1558 1562 raise error.RevlogError(
1559 1563 _(b'revlog chunk cache size %r is not a power of 2')
1560 1564 % chunk_cache_size
1561 1565 )
1562 1566 force_nodemap = opts.get(b'devel-force-nodemap', False)
1563 1567 return new_header, mmapindexthreshold, force_nodemap
1564 1568
1565 1569 def _get_data(self, filepath, mmap_threshold, size=None):
1566 1570 """return a file content with or without mmap
1567 1571
1568 1572 If the file is missing return the empty string"""
1569 1573 try:
1570 1574 with self.opener(filepath) as fp:
1571 1575 if mmap_threshold is not None:
1572 1576 file_size = self.opener.fstat(fp).st_size
1573 1577 if file_size >= mmap_threshold:
1574 1578 if size is not None:
1575 1579 # avoid potentiel mmap crash
1576 1580 size = min(file_size, size)
1577 1581 # TODO: should .close() to release resources without
1578 1582 # relying on Python GC
1579 1583 if size is None:
1580 1584 return util.buffer(util.mmapread(fp))
1581 1585 else:
1582 1586 return util.buffer(util.mmapread(fp, size))
1583 1587 if size is None:
1584 1588 return fp.read()
1585 1589 else:
1586 1590 return fp.read(size)
1587 1591 except FileNotFoundError:
1588 1592 return b''
1589 1593
1590 1594 def get_streams(self, max_linkrev, force_inline=False):
1591 1595 """return a list of streams that represent this revlog
1592 1596
1593 1597 This is used by stream-clone to do bytes to bytes copies of a repository.
1594 1598
1595 1599 This streams data for all revisions that refer to a changelog revision up
1596 1600 to `max_linkrev`.
1597 1601
1598 1602 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1599 1603
1600 1604 It returns is a list of three-tuple:
1601 1605
1602 1606 [
1603 1607 (filename, bytes_stream, stream_size),
1604 1608 …
1605 1609 ]
1606 1610 """
1607 1611 n = len(self)
1608 1612 index = self.index
1609 1613 while n > 0:
1610 1614 linkrev = index[n - 1][4]
1611 1615 if linkrev < max_linkrev:
1612 1616 break
1613 1617 # note: this loop will rarely go through multiple iterations, since
1614 1618 # it only traverses commits created during the current streaming
1615 1619 # pull operation.
1616 1620 #
1617 1621 # If this become a problem, using a binary search should cap the
1618 1622 # runtime of this.
1619 1623 n = n - 1
1620 1624 if n == 0:
1621 1625 # no data to send
1622 1626 return []
1623 1627 index_size = n * index.entry_size
1624 1628 data_size = self.end(n - 1)
1625 1629
1626 1630 # XXX we might have been split (or stripped) since the object
1627 1631 # initialization, We need to close this race too, but having a way to
1628 1632 # pre-open the file we feed to the revlog and never closing them before
1629 1633 # we are done streaming.
1630 1634
1631 1635 if self._inline:
1632 1636
1633 1637 def get_stream():
1634 1638 with self.opener(self._indexfile, mode=b"r") as fp:
1635 1639 yield None
1636 1640 size = index_size + data_size
1637 1641 if size <= 65536:
1638 1642 yield fp.read(size)
1639 1643 else:
1640 1644 yield from util.filechunkiter(fp, limit=size)
1641 1645
1642 1646 inline_stream = get_stream()
1643 1647 next(inline_stream)
1644 1648 return [
1645 1649 (self._indexfile, inline_stream, index_size + data_size),
1646 1650 ]
1647 1651 elif force_inline:
1648 1652
1649 1653 def get_stream():
1650 1654 with self.reading():
1651 1655 yield None
1652 1656
1653 1657 for rev in range(n):
1654 1658 idx = self.index.entry_binary(rev)
1655 1659 if rev == 0 and self._docket is None:
1656 1660 # re-inject the inline flag
1657 1661 header = self._format_flags
1658 1662 header |= self._format_version
1659 1663 header |= FLAG_INLINE_DATA
1660 1664 header = self.index.pack_header(header)
1661 1665 idx = header + idx
1662 1666 yield idx
1663 1667 yield self._inner.get_segment_for_revs(rev, rev)[1]
1664 1668
1665 1669 inline_stream = get_stream()
1666 1670 next(inline_stream)
1667 1671 return [
1668 1672 (self._indexfile, inline_stream, index_size + data_size),
1669 1673 ]
1670 1674 else:
1671 1675
1672 1676 def get_index_stream():
1673 1677 with self.opener(self._indexfile, mode=b"r") as fp:
1674 1678 yield None
1675 1679 if index_size <= 65536:
1676 1680 yield fp.read(index_size)
1677 1681 else:
1678 1682 yield from util.filechunkiter(fp, limit=index_size)
1679 1683
1680 1684 def get_data_stream():
1681 1685 with self._datafp() as fp:
1682 1686 yield None
1683 1687 if data_size <= 65536:
1684 1688 yield fp.read(data_size)
1685 1689 else:
1686 1690 yield from util.filechunkiter(fp, limit=data_size)
1687 1691
1688 1692 index_stream = get_index_stream()
1689 1693 next(index_stream)
1690 1694 data_stream = get_data_stream()
1691 1695 next(data_stream)
1692 1696 return [
1693 1697 (self._datafile, data_stream, data_size),
1694 1698 (self._indexfile, index_stream, index_size),
1695 1699 ]
1696 1700
1697 1701 def _loadindex(self, docket=None):
1698 1702
1699 1703 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1700 1704
1701 1705 if self.postfix is not None:
1702 1706 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1703 1707 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1704 1708 entry_point = b'%s.i.a' % self.radix
1705 1709 elif self._try_split and self.opener.exists(self._split_index_file):
1706 1710 entry_point = self._split_index_file
1707 1711 else:
1708 1712 entry_point = b'%s.i' % self.radix
1709 1713
1710 1714 if docket is not None:
1711 1715 self._docket = docket
1712 1716 self._docket_file = entry_point
1713 1717 else:
1714 1718 self._initempty = True
1715 1719 entry_data = self._get_data(entry_point, mmapindexthreshold)
1716 1720 if len(entry_data) > 0:
1717 1721 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1718 1722 self._initempty = False
1719 1723 else:
1720 1724 header = new_header
1721 1725
1722 1726 self._format_flags = header & ~0xFFFF
1723 1727 self._format_version = header & 0xFFFF
1724 1728
1725 1729 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1726 1730 if supported_flags is None:
1727 1731 msg = _(b'unknown version (%d) in revlog %s')
1728 1732 msg %= (self._format_version, self.display_id)
1729 1733 raise error.RevlogError(msg)
1730 1734 elif self._format_flags & ~supported_flags:
1731 1735 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1732 1736 display_flag = self._format_flags >> 16
1733 1737 msg %= (display_flag, self._format_version, self.display_id)
1734 1738 raise error.RevlogError(msg)
1735 1739
1736 1740 features = FEATURES_BY_VERSION[self._format_version]
1737 1741 self._inline = features[b'inline'](self._format_flags)
1738 1742 self.delta_config.general_delta = features[b'generaldelta'](
1739 1743 self._format_flags
1740 1744 )
1741 1745 self.feature_config.has_side_data = features[b'sidedata']
1742 1746
1743 1747 if not features[b'docket']:
1744 1748 self._indexfile = entry_point
1745 1749 index_data = entry_data
1746 1750 else:
1747 1751 self._docket_file = entry_point
1748 1752 if self._initempty:
1749 1753 self._docket = docketutil.default_docket(self, header)
1750 1754 else:
1751 1755 self._docket = docketutil.parse_docket(
1752 1756 self, entry_data, use_pending=self._trypending
1753 1757 )
1754 1758
1755 1759 if self._docket is not None:
1756 1760 self._indexfile = self._docket.index_filepath()
1757 1761 index_data = b''
1758 1762 index_size = self._docket.index_end
1759 1763 if index_size > 0:
1760 1764 index_data = self._get_data(
1761 1765 self._indexfile, mmapindexthreshold, size=index_size
1762 1766 )
1763 1767 if len(index_data) < index_size:
1764 1768 msg = _(b'too few index data for %s: got %d, expected %d')
1765 1769 msg %= (self.display_id, len(index_data), index_size)
1766 1770 raise error.RevlogError(msg)
1767 1771
1768 1772 self._inline = False
1769 1773 # generaldelta implied by version 2 revlogs.
1770 1774 self.delta_config.general_delta = True
1771 1775 # the logic for persistent nodemap will be dealt with within the
1772 1776 # main docket, so disable it for now.
1773 1777 self._nodemap_file = None
1774 1778
1775 1779 if self._docket is not None:
1776 1780 self._datafile = self._docket.data_filepath()
1777 1781 self._sidedatafile = self._docket.sidedata_filepath()
1778 1782 elif self.postfix is None:
1779 1783 self._datafile = b'%s.d' % self.radix
1780 1784 else:
1781 1785 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1782 1786
1783 1787 self.nodeconstants = sha1nodeconstants
1784 1788 self.nullid = self.nodeconstants.nullid
1785 1789
1786 1790 # sparse-revlog can't be on without general-delta (issue6056)
1787 1791 if not self.delta_config.general_delta:
1788 1792 self.delta_config.sparse_revlog = False
1789 1793
1790 1794 self._storedeltachains = True
1791 1795
1792 1796 devel_nodemap = (
1793 1797 self._nodemap_file
1794 1798 and force_nodemap
1795 1799 and parse_index_v1_nodemap is not None
1796 1800 )
1797 1801
1798 1802 use_rust_index = False
1799 1803 if rustrevlog is not None:
1800 1804 if self._nodemap_file is not None:
1801 1805 use_rust_index = True
1802 1806 else:
1803 1807 use_rust_index = self.opener.options.get(b'rust.index')
1804 1808
1805 1809 self._parse_index = parse_index_v1
1806 1810 if self._format_version == REVLOGV0:
1807 1811 self._parse_index = revlogv0.parse_index_v0
1808 1812 elif self._format_version == REVLOGV2:
1809 1813 self._parse_index = parse_index_v2
1810 1814 elif self._format_version == CHANGELOGV2:
1811 1815 self._parse_index = parse_index_cl_v2
1812 1816 elif devel_nodemap:
1813 1817 self._parse_index = parse_index_v1_nodemap
1814 1818 elif use_rust_index:
1815 1819 self._parse_index = parse_index_v1_mixed
1816 1820 try:
1817 1821 d = self._parse_index(index_data, self._inline)
1818 1822 index, chunkcache = d
1819 1823 use_nodemap = (
1820 1824 not self._inline
1821 1825 and self._nodemap_file is not None
1822 1826 and hasattr(index, 'update_nodemap_data')
1823 1827 )
1824 1828 if use_nodemap:
1825 1829 nodemap_data = nodemaputil.persisted_data(self)
1826 1830 if nodemap_data is not None:
1827 1831 docket = nodemap_data[0]
1828 1832 if (
1829 1833 len(d[0]) > docket.tip_rev
1830 1834 and d[0][docket.tip_rev][7] == docket.tip_node
1831 1835 ):
1832 1836 # no changelog tampering
1833 1837 self._nodemap_docket = docket
1834 1838 index.update_nodemap_data(*nodemap_data)
1835 1839 except (ValueError, IndexError):
1836 1840 raise error.RevlogError(
1837 1841 _(b"index %s is corrupted") % self.display_id
1838 1842 )
1839 1843 self.index = index
1840 1844 # revnum -> (chain-length, sum-delta-length)
1841 1845 self._chaininfocache = util.lrucachedict(500)
1842 1846
1843 1847 return chunkcache
1844 1848
1845 1849 def _load_inner(self, chunk_cache):
1846 1850 if self._docket is None:
1847 1851 default_compression_header = None
1848 1852 else:
1849 1853 default_compression_header = self._docket.default_compression_header
1850 1854
1851 1855 self._inner = _InnerRevlog(
1852 1856 opener=self.opener,
1853 1857 index=self.index,
1854 1858 index_file=self._indexfile,
1855 1859 data_file=self._datafile,
1856 1860 sidedata_file=self._sidedatafile,
1857 1861 inline=self._inline,
1858 1862 data_config=self.data_config,
1859 1863 delta_config=self.delta_config,
1860 1864 feature_config=self.feature_config,
1861 1865 chunk_cache=chunk_cache,
1862 1866 default_compression_header=default_compression_header,
1863 1867 )
1864 1868
1865 1869 def get_revlog(self):
1866 1870 """simple function to mirror API of other not-really-revlog API"""
1867 1871 return self
1868 1872
1869 1873 @util.propertycache
1870 1874 def revlog_kind(self):
1871 1875 return self.target[0]
1872 1876
1873 1877 @util.propertycache
1874 1878 def display_id(self):
1875 1879 """The public facing "ID" of the revlog that we use in message"""
1876 1880 if self.revlog_kind == KIND_FILELOG:
1877 1881 # Reference the file without the "data/" prefix, so it is familiar
1878 1882 # to the user.
1879 1883 return self.target[1]
1880 1884 else:
1881 1885 return self.radix
1882 1886
1883 1887 def _datafp(self, mode=b'r'):
1884 1888 """file object for the revlog's data file"""
1885 1889 return self.opener(self._datafile, mode=mode)
1886 1890
1887 1891 def tiprev(self):
1888 1892 return len(self.index) - 1
1889 1893
1890 1894 def tip(self):
1891 1895 return self.node(self.tiprev())
1892 1896
1893 1897 def __contains__(self, rev):
1894 1898 return 0 <= rev < len(self)
1895 1899
1896 1900 def __len__(self):
1897 1901 return len(self.index)
1898 1902
1899 1903 def __iter__(self):
1900 1904 return iter(range(len(self)))
1901 1905
1902 1906 def revs(self, start=0, stop=None):
1903 1907 """iterate over all rev in this revlog (from start to stop)"""
1904 1908 return storageutil.iterrevs(len(self), start=start, stop=stop)
1905 1909
1906 1910 def hasnode(self, node):
1907 1911 try:
1908 1912 self.rev(node)
1909 1913 return True
1910 1914 except KeyError:
1911 1915 return False
1912 1916
1913 1917 def _candelta(self, baserev, rev):
1914 1918 """whether two revisions (baserev, rev) can be delta-ed or not"""
1915 1919 # Disable delta if either rev requires a content-changing flag
1916 1920 # processor (ex. LFS). This is because such flag processor can alter
1917 1921 # the rawtext content that the delta will be based on, and two clients
1918 1922 # could have a same revlog node with different flags (i.e. different
1919 1923 # rawtext contents) and the delta could be incompatible.
1920 1924 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1921 1925 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1922 1926 ):
1923 1927 return False
1924 1928 return True
1925 1929
1926 1930 def update_caches(self, transaction):
1927 1931 """update on disk cache
1928 1932
1929 1933 If a transaction is passed, the update may be delayed to transaction
1930 1934 commit."""
1931 1935 if self._nodemap_file is not None:
1932 1936 if transaction is None:
1933 1937 nodemaputil.update_persistent_nodemap(self)
1934 1938 else:
1935 1939 nodemaputil.setup_persistent_nodemap(transaction, self)
1936 1940
1937 1941 def clearcaches(self):
1938 1942 """Clear in-memory caches"""
1939 1943 self._chainbasecache.clear()
1940 1944 self._inner.clear_cache()
1941 1945 self._pcache = {}
1942 1946 self._nodemap_docket = None
1943 1947 self.index.clearcaches()
1944 1948 # The python code is the one responsible for validating the docket, we
1945 1949 # end up having to refresh it here.
1946 1950 use_nodemap = (
1947 1951 not self._inline
1948 1952 and self._nodemap_file is not None
1949 1953 and hasattr(self.index, 'update_nodemap_data')
1950 1954 )
1951 1955 if use_nodemap:
1952 1956 nodemap_data = nodemaputil.persisted_data(self)
1953 1957 if nodemap_data is not None:
1954 1958 self._nodemap_docket = nodemap_data[0]
1955 1959 self.index.update_nodemap_data(*nodemap_data)
1956 1960
1957 1961 def rev(self, node):
1958 1962 """return the revision number associated with a <nodeid>"""
1959 1963 try:
1960 1964 return self.index.rev(node)
1961 1965 except TypeError:
1962 1966 raise
1963 1967 except error.RevlogError:
1964 1968 # parsers.c radix tree lookup failed
1965 1969 if (
1966 1970 node == self.nodeconstants.wdirid
1967 1971 or node in self.nodeconstants.wdirfilenodeids
1968 1972 ):
1969 1973 raise error.WdirUnsupported
1970 1974 raise error.LookupError(node, self.display_id, _(b'no node'))
1971 1975
1972 1976 # Accessors for index entries.
1973 1977
1974 1978 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1975 1979 # are flags.
1976 1980 def start(self, rev):
1977 1981 return int(self.index[rev][0] >> 16)
1978 1982
1979 1983 def sidedata_cut_off(self, rev):
1980 1984 sd_cut_off = self.index[rev][8]
1981 1985 if sd_cut_off != 0:
1982 1986 return sd_cut_off
1983 1987 # This is some annoying dance, because entries without sidedata
1984 1988 # currently use 0 as their ofsset. (instead of previous-offset +
1985 1989 # previous-size)
1986 1990 #
1987 1991 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1988 1992 # In the meantime, we need this.
1989 1993 while 0 <= rev:
1990 1994 e = self.index[rev]
1991 1995 if e[9] != 0:
1992 1996 return e[8] + e[9]
1993 1997 rev -= 1
1994 1998 return 0
1995 1999
1996 2000 def flags(self, rev):
1997 2001 return self.index[rev][0] & 0xFFFF
1998 2002
1999 2003 def length(self, rev):
2000 2004 return self.index[rev][1]
2001 2005
2002 2006 def sidedata_length(self, rev):
2003 2007 if not self.feature_config.has_side_data:
2004 2008 return 0
2005 2009 return self.index[rev][9]
2006 2010
2007 2011 def rawsize(self, rev):
2008 2012 """return the length of the uncompressed text for a given revision"""
2009 2013 l = self.index[rev][2]
2010 2014 if l >= 0:
2011 2015 return l
2012 2016
2013 2017 t = self.rawdata(rev)
2014 2018 return len(t)
2015 2019
2016 2020 def size(self, rev):
2017 2021 """length of non-raw text (processed by a "read" flag processor)"""
2018 2022 # fast path: if no "read" flag processor could change the content,
2019 2023 # size is rawsize. note: ELLIPSIS is known to not change the content.
2020 2024 flags = self.flags(rev)
2021 2025 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2022 2026 return self.rawsize(rev)
2023 2027
2024 2028 return len(self.revision(rev))
2025 2029
2026 2030 def fast_rank(self, rev):
2027 2031 """Return the rank of a revision if already known, or None otherwise.
2028 2032
2029 2033 The rank of a revision is the size of the sub-graph it defines as a
2030 2034 head. Equivalently, the rank of a revision `r` is the size of the set
2031 2035 `ancestors(r)`, `r` included.
2032 2036
2033 2037 This method returns the rank retrieved from the revlog in constant
2034 2038 time. It makes no attempt at computing unknown values for versions of
2035 2039 the revlog which do not persist the rank.
2036 2040 """
2037 2041 rank = self.index[rev][ENTRY_RANK]
2038 2042 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2039 2043 return None
2040 2044 if rev == nullrev:
2041 2045 return 0 # convention
2042 2046 return rank
2043 2047
2044 2048 def chainbase(self, rev):
2045 2049 base = self._chainbasecache.get(rev)
2046 2050 if base is not None:
2047 2051 return base
2048 2052
2049 2053 index = self.index
2050 2054 iterrev = rev
2051 2055 base = index[iterrev][3]
2052 2056 while base != iterrev:
2053 2057 iterrev = base
2054 2058 base = index[iterrev][3]
2055 2059
2056 2060 self._chainbasecache[rev] = base
2057 2061 return base
2058 2062
2059 2063 def linkrev(self, rev):
2060 2064 return self.index[rev][4]
2061 2065
2062 2066 def parentrevs(self, rev):
2063 2067 try:
2064 2068 entry = self.index[rev]
2065 2069 except IndexError:
2066 2070 if rev == wdirrev:
2067 2071 raise error.WdirUnsupported
2068 2072 raise
2069 2073
2070 2074 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2071 2075 return entry[6], entry[5]
2072 2076 else:
2073 2077 return entry[5], entry[6]
2074 2078
2075 2079 # fast parentrevs(rev) where rev isn't filtered
2076 2080 _uncheckedparentrevs = parentrevs
2077 2081
2078 2082 def node(self, rev):
2079 2083 try:
2080 2084 return self.index[rev][7]
2081 2085 except IndexError:
2082 2086 if rev == wdirrev:
2083 2087 raise error.WdirUnsupported
2084 2088 raise
2085 2089
2086 2090 # Derived from index values.
2087 2091
2088 2092 def end(self, rev):
2089 2093 return self.start(rev) + self.length(rev)
2090 2094
2091 2095 def parents(self, node):
2092 2096 i = self.index
2093 2097 d = i[self.rev(node)]
2094 2098 # inline node() to avoid function call overhead
2095 2099 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2096 2100 return i[d[6]][7], i[d[5]][7]
2097 2101 else:
2098 2102 return i[d[5]][7], i[d[6]][7]
2099 2103
2100 2104 def chainlen(self, rev):
2101 2105 return self._chaininfo(rev)[0]
2102 2106
2103 2107 def _chaininfo(self, rev):
2104 2108 chaininfocache = self._chaininfocache
2105 2109 if rev in chaininfocache:
2106 2110 return chaininfocache[rev]
2107 2111 index = self.index
2108 2112 generaldelta = self.delta_config.general_delta
2109 2113 iterrev = rev
2110 2114 e = index[iterrev]
2111 2115 clen = 0
2112 2116 compresseddeltalen = 0
2113 2117 while iterrev != e[3]:
2114 2118 clen += 1
2115 2119 compresseddeltalen += e[1]
2116 2120 if generaldelta:
2117 2121 iterrev = e[3]
2118 2122 else:
2119 2123 iterrev -= 1
2120 2124 if iterrev in chaininfocache:
2121 2125 t = chaininfocache[iterrev]
2122 2126 clen += t[0]
2123 2127 compresseddeltalen += t[1]
2124 2128 break
2125 2129 e = index[iterrev]
2126 2130 else:
2127 2131 # Add text length of base since decompressing that also takes
2128 2132 # work. For cache hits the length is already included.
2129 2133 compresseddeltalen += e[1]
2130 2134 r = (clen, compresseddeltalen)
2131 2135 chaininfocache[rev] = r
2132 2136 return r
2133 2137
2134 2138 def _deltachain(self, rev, stoprev=None):
2135 2139 return self._inner._deltachain(rev, stoprev=stoprev)
2136 2140
2137 2141 def ancestors(self, revs, stoprev=0, inclusive=False):
2138 2142 """Generate the ancestors of 'revs' in reverse revision order.
2139 2143 Does not generate revs lower than stoprev.
2140 2144
2141 2145 See the documentation for ancestor.lazyancestors for more details."""
2142 2146
2143 2147 # first, make sure start revisions aren't filtered
2144 2148 revs = list(revs)
2145 2149 checkrev = self.node
2146 2150 for r in revs:
2147 2151 checkrev(r)
2148 2152 # and we're sure ancestors aren't filtered as well
2149 2153
2150 2154 if rustancestor is not None and self.index.rust_ext_compat:
2151 2155 lazyancestors = rustancestor.LazyAncestors
2152 2156 arg = self.index
2153 2157 else:
2154 2158 lazyancestors = ancestor.lazyancestors
2155 2159 arg = self._uncheckedparentrevs
2156 2160 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2157 2161
2158 2162 def descendants(self, revs):
2159 2163 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2160 2164
2161 2165 def findcommonmissing(self, common=None, heads=None):
2162 2166 """Return a tuple of the ancestors of common and the ancestors of heads
2163 2167 that are not ancestors of common. In revset terminology, we return the
2164 2168 tuple:
2165 2169
2166 2170 ::common, (::heads) - (::common)
2167 2171
2168 2172 The list is sorted by revision number, meaning it is
2169 2173 topologically sorted.
2170 2174
2171 2175 'heads' and 'common' are both lists of node IDs. If heads is
2172 2176 not supplied, uses all of the revlog's heads. If common is not
2173 2177 supplied, uses nullid."""
2174 2178 if common is None:
2175 2179 common = [self.nullid]
2176 2180 if heads is None:
2177 2181 heads = self.heads()
2178 2182
2179 2183 common = [self.rev(n) for n in common]
2180 2184 heads = [self.rev(n) for n in heads]
2181 2185
2182 2186 # we want the ancestors, but inclusive
2183 2187 class lazyset:
2184 2188 def __init__(self, lazyvalues):
2185 2189 self.addedvalues = set()
2186 2190 self.lazyvalues = lazyvalues
2187 2191
2188 2192 def __contains__(self, value):
2189 2193 return value in self.addedvalues or value in self.lazyvalues
2190 2194
2191 2195 def __iter__(self):
2192 2196 added = self.addedvalues
2193 2197 for r in added:
2194 2198 yield r
2195 2199 for r in self.lazyvalues:
2196 2200 if not r in added:
2197 2201 yield r
2198 2202
2199 2203 def add(self, value):
2200 2204 self.addedvalues.add(value)
2201 2205
2202 2206 def update(self, values):
2203 2207 self.addedvalues.update(values)
2204 2208
2205 2209 has = lazyset(self.ancestors(common))
2206 2210 has.add(nullrev)
2207 2211 has.update(common)
2208 2212
2209 2213 # take all ancestors from heads that aren't in has
2210 2214 missing = set()
2211 2215 visit = collections.deque(r for r in heads if r not in has)
2212 2216 while visit:
2213 2217 r = visit.popleft()
2214 2218 if r in missing:
2215 2219 continue
2216 2220 else:
2217 2221 missing.add(r)
2218 2222 for p in self.parentrevs(r):
2219 2223 if p not in has:
2220 2224 visit.append(p)
2221 2225 missing = list(missing)
2222 2226 missing.sort()
2223 2227 return has, [self.node(miss) for miss in missing]
2224 2228
2225 2229 def incrementalmissingrevs(self, common=None):
2226 2230 """Return an object that can be used to incrementally compute the
2227 2231 revision numbers of the ancestors of arbitrary sets that are not
2228 2232 ancestors of common. This is an ancestor.incrementalmissingancestors
2229 2233 object.
2230 2234
2231 2235 'common' is a list of revision numbers. If common is not supplied, uses
2232 2236 nullrev.
2233 2237 """
2234 2238 if common is None:
2235 2239 common = [nullrev]
2236 2240
2237 2241 if rustancestor is not None and self.index.rust_ext_compat:
2238 2242 return rustancestor.MissingAncestors(self.index, common)
2239 2243 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2240 2244
2241 2245 def findmissingrevs(self, common=None, heads=None):
2242 2246 """Return the revision numbers of the ancestors of heads that
2243 2247 are not ancestors of common.
2244 2248
2245 2249 More specifically, return a list of revision numbers corresponding to
2246 2250 nodes N such that every N satisfies the following constraints:
2247 2251
2248 2252 1. N is an ancestor of some node in 'heads'
2249 2253 2. N is not an ancestor of any node in 'common'
2250 2254
2251 2255 The list is sorted by revision number, meaning it is
2252 2256 topologically sorted.
2253 2257
2254 2258 'heads' and 'common' are both lists of revision numbers. If heads is
2255 2259 not supplied, uses all of the revlog's heads. If common is not
2256 2260 supplied, uses nullid."""
2257 2261 if common is None:
2258 2262 common = [nullrev]
2259 2263 if heads is None:
2260 2264 heads = self.headrevs()
2261 2265
2262 2266 inc = self.incrementalmissingrevs(common=common)
2263 2267 return inc.missingancestors(heads)
2264 2268
2265 2269 def findmissing(self, common=None, heads=None):
2266 2270 """Return the ancestors of heads that are not ancestors of common.
2267 2271
2268 2272 More specifically, return a list of nodes N such that every N
2269 2273 satisfies the following constraints:
2270 2274
2271 2275 1. N is an ancestor of some node in 'heads'
2272 2276 2. N is not an ancestor of any node in 'common'
2273 2277
2274 2278 The list is sorted by revision number, meaning it is
2275 2279 topologically sorted.
2276 2280
2277 2281 'heads' and 'common' are both lists of node IDs. If heads is
2278 2282 not supplied, uses all of the revlog's heads. If common is not
2279 2283 supplied, uses nullid."""
2280 2284 if common is None:
2281 2285 common = [self.nullid]
2282 2286 if heads is None:
2283 2287 heads = self.heads()
2284 2288
2285 2289 common = [self.rev(n) for n in common]
2286 2290 heads = [self.rev(n) for n in heads]
2287 2291
2288 2292 inc = self.incrementalmissingrevs(common=common)
2289 2293 return [self.node(r) for r in inc.missingancestors(heads)]
2290 2294
2291 2295 def nodesbetween(self, roots=None, heads=None):
2292 2296 """Return a topological path from 'roots' to 'heads'.
2293 2297
2294 2298 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2295 2299 topologically sorted list of all nodes N that satisfy both of
2296 2300 these constraints:
2297 2301
2298 2302 1. N is a descendant of some node in 'roots'
2299 2303 2. N is an ancestor of some node in 'heads'
2300 2304
2301 2305 Every node is considered to be both a descendant and an ancestor
2302 2306 of itself, so every reachable node in 'roots' and 'heads' will be
2303 2307 included in 'nodes'.
2304 2308
2305 2309 'outroots' is the list of reachable nodes in 'roots', i.e., the
2306 2310 subset of 'roots' that is returned in 'nodes'. Likewise,
2307 2311 'outheads' is the subset of 'heads' that is also in 'nodes'.
2308 2312
2309 2313 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2310 2314 unspecified, uses nullid as the only root. If 'heads' is
2311 2315 unspecified, uses list of all of the revlog's heads."""
2312 2316 nonodes = ([], [], [])
2313 2317 if roots is not None:
2314 2318 roots = list(roots)
2315 2319 if not roots:
2316 2320 return nonodes
2317 2321 lowestrev = min([self.rev(n) for n in roots])
2318 2322 else:
2319 2323 roots = [self.nullid] # Everybody's a descendant of nullid
2320 2324 lowestrev = nullrev
2321 2325 if (lowestrev == nullrev) and (heads is None):
2322 2326 # We want _all_ the nodes!
2323 2327 return (
2324 2328 [self.node(r) for r in self],
2325 2329 [self.nullid],
2326 2330 list(self.heads()),
2327 2331 )
2328 2332 if heads is None:
2329 2333 # All nodes are ancestors, so the latest ancestor is the last
2330 2334 # node.
2331 2335 highestrev = len(self) - 1
2332 2336 # Set ancestors to None to signal that every node is an ancestor.
2333 2337 ancestors = None
2334 2338 # Set heads to an empty dictionary for later discovery of heads
2335 2339 heads = {}
2336 2340 else:
2337 2341 heads = list(heads)
2338 2342 if not heads:
2339 2343 return nonodes
2340 2344 ancestors = set()
2341 2345 # Turn heads into a dictionary so we can remove 'fake' heads.
2342 2346 # Also, later we will be using it to filter out the heads we can't
2343 2347 # find from roots.
2344 2348 heads = dict.fromkeys(heads, False)
2345 2349 # Start at the top and keep marking parents until we're done.
2346 2350 nodestotag = set(heads)
2347 2351 # Remember where the top was so we can use it as a limit later.
2348 2352 highestrev = max([self.rev(n) for n in nodestotag])
2349 2353 while nodestotag:
2350 2354 # grab a node to tag
2351 2355 n = nodestotag.pop()
2352 2356 # Never tag nullid
2353 2357 if n == self.nullid:
2354 2358 continue
2355 2359 # A node's revision number represents its place in a
2356 2360 # topologically sorted list of nodes.
2357 2361 r = self.rev(n)
2358 2362 if r >= lowestrev:
2359 2363 if n not in ancestors:
2360 2364 # If we are possibly a descendant of one of the roots
2361 2365 # and we haven't already been marked as an ancestor
2362 2366 ancestors.add(n) # Mark as ancestor
2363 2367 # Add non-nullid parents to list of nodes to tag.
2364 2368 nodestotag.update(
2365 2369 [p for p in self.parents(n) if p != self.nullid]
2366 2370 )
2367 2371 elif n in heads: # We've seen it before, is it a fake head?
2368 2372 # So it is, real heads should not be the ancestors of
2369 2373 # any other heads.
2370 2374 heads.pop(n)
2371 2375 if not ancestors:
2372 2376 return nonodes
2373 2377 # Now that we have our set of ancestors, we want to remove any
2374 2378 # roots that are not ancestors.
2375 2379
2376 2380 # If one of the roots was nullid, everything is included anyway.
2377 2381 if lowestrev > nullrev:
2378 2382 # But, since we weren't, let's recompute the lowest rev to not
2379 2383 # include roots that aren't ancestors.
2380 2384
2381 2385 # Filter out roots that aren't ancestors of heads
2382 2386 roots = [root for root in roots if root in ancestors]
2383 2387 # Recompute the lowest revision
2384 2388 if roots:
2385 2389 lowestrev = min([self.rev(root) for root in roots])
2386 2390 else:
2387 2391 # No more roots? Return empty list
2388 2392 return nonodes
2389 2393 else:
2390 2394 # We are descending from nullid, and don't need to care about
2391 2395 # any other roots.
2392 2396 lowestrev = nullrev
2393 2397 roots = [self.nullid]
2394 2398 # Transform our roots list into a set.
2395 2399 descendants = set(roots)
2396 2400 # Also, keep the original roots so we can filter out roots that aren't
2397 2401 # 'real' roots (i.e. are descended from other roots).
2398 2402 roots = descendants.copy()
2399 2403 # Our topologically sorted list of output nodes.
2400 2404 orderedout = []
2401 2405 # Don't start at nullid since we don't want nullid in our output list,
2402 2406 # and if nullid shows up in descendants, empty parents will look like
2403 2407 # they're descendants.
2404 2408 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2405 2409 n = self.node(r)
2406 2410 isdescendant = False
2407 2411 if lowestrev == nullrev: # Everybody is a descendant of nullid
2408 2412 isdescendant = True
2409 2413 elif n in descendants:
2410 2414 # n is already a descendant
2411 2415 isdescendant = True
2412 2416 # This check only needs to be done here because all the roots
2413 2417 # will start being marked is descendants before the loop.
2414 2418 if n in roots:
2415 2419 # If n was a root, check if it's a 'real' root.
2416 2420 p = tuple(self.parents(n))
2417 2421 # If any of its parents are descendants, it's not a root.
2418 2422 if (p[0] in descendants) or (p[1] in descendants):
2419 2423 roots.remove(n)
2420 2424 else:
2421 2425 p = tuple(self.parents(n))
2422 2426 # A node is a descendant if either of its parents are
2423 2427 # descendants. (We seeded the dependents list with the roots
2424 2428 # up there, remember?)
2425 2429 if (p[0] in descendants) or (p[1] in descendants):
2426 2430 descendants.add(n)
2427 2431 isdescendant = True
2428 2432 if isdescendant and ((ancestors is None) or (n in ancestors)):
2429 2433 # Only include nodes that are both descendants and ancestors.
2430 2434 orderedout.append(n)
2431 2435 if (ancestors is not None) and (n in heads):
2432 2436 # We're trying to figure out which heads are reachable
2433 2437 # from roots.
2434 2438 # Mark this head as having been reached
2435 2439 heads[n] = True
2436 2440 elif ancestors is None:
2437 2441 # Otherwise, we're trying to discover the heads.
2438 2442 # Assume this is a head because if it isn't, the next step
2439 2443 # will eventually remove it.
2440 2444 heads[n] = True
2441 2445 # But, obviously its parents aren't.
2442 2446 for p in self.parents(n):
2443 2447 heads.pop(p, None)
2444 2448 heads = [head for head, flag in heads.items() if flag]
2445 2449 roots = list(roots)
2446 2450 assert orderedout
2447 2451 assert roots
2448 2452 assert heads
2449 2453 return (orderedout, roots, heads)
2450 2454
2451 2455 def headrevs(self, revs=None):
2452 2456 if revs is None:
2453 2457 try:
2454 2458 return self.index.headrevs()
2455 2459 except AttributeError:
2456 2460 return self._headrevs()
2457 2461 if rustdagop is not None and self.index.rust_ext_compat:
2458 2462 return rustdagop.headrevs(self.index, revs)
2459 2463 return dagop.headrevs(revs, self._uncheckedparentrevs)
2460 2464
2461 2465 def computephases(self, roots):
2462 2466 return self.index.computephasesmapsets(roots)
2463 2467
2464 2468 def _headrevs(self):
2465 2469 count = len(self)
2466 2470 if not count:
2467 2471 return [nullrev]
2468 2472 # we won't iter over filtered rev so nobody is a head at start
2469 2473 ishead = [0] * (count + 1)
2470 2474 index = self.index
2471 2475 for r in self:
2472 2476 ishead[r] = 1 # I may be an head
2473 2477 e = index[r]
2474 2478 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2475 2479 return [r for r, val in enumerate(ishead) if val]
2476 2480
2477 2481 def heads(self, start=None, stop=None):
2478 2482 """return the list of all nodes that have no children
2479 2483
2480 2484 if start is specified, only heads that are descendants of
2481 2485 start will be returned
2482 2486 if stop is specified, it will consider all the revs from stop
2483 2487 as if they had no children
2484 2488 """
2485 2489 if start is None and stop is None:
2486 2490 if not len(self):
2487 2491 return [self.nullid]
2488 2492 return [self.node(r) for r in self.headrevs()]
2489 2493
2490 2494 if start is None:
2491 2495 start = nullrev
2492 2496 else:
2493 2497 start = self.rev(start)
2494 2498
2495 2499 stoprevs = {self.rev(n) for n in stop or []}
2496 2500
2497 2501 revs = dagop.headrevssubset(
2498 2502 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2499 2503 )
2500 2504
2501 2505 return [self.node(rev) for rev in revs]
2502 2506
2503 2507 def children(self, node):
2504 2508 """find the children of a given node"""
2505 2509 c = []
2506 2510 p = self.rev(node)
2507 2511 for r in self.revs(start=p + 1):
2508 2512 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2509 2513 if prevs:
2510 2514 for pr in prevs:
2511 2515 if pr == p:
2512 2516 c.append(self.node(r))
2513 2517 elif p == nullrev:
2514 2518 c.append(self.node(r))
2515 2519 return c
2516 2520
2517 2521 def commonancestorsheads(self, a, b):
2518 2522 """calculate all the heads of the common ancestors of nodes a and b"""
2519 2523 a, b = self.rev(a), self.rev(b)
2520 2524 ancs = self._commonancestorsheads(a, b)
2521 2525 return pycompat.maplist(self.node, ancs)
2522 2526
2523 2527 def _commonancestorsheads(self, *revs):
2524 2528 """calculate all the heads of the common ancestors of revs"""
2525 2529 try:
2526 2530 ancs = self.index.commonancestorsheads(*revs)
2527 2531 except (AttributeError, OverflowError): # C implementation failed
2528 2532 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2529 2533 return ancs
2530 2534
2531 2535 def isancestor(self, a, b):
2532 2536 """return True if node a is an ancestor of node b
2533 2537
2534 2538 A revision is considered an ancestor of itself."""
2535 2539 a, b = self.rev(a), self.rev(b)
2536 2540 return self.isancestorrev(a, b)
2537 2541
2538 2542 def isancestorrev(self, a, b):
2539 2543 """return True if revision a is an ancestor of revision b
2540 2544
2541 2545 A revision is considered an ancestor of itself.
2542 2546
2543 2547 The implementation of this is trivial but the use of
2544 2548 reachableroots is not."""
2545 2549 if a == nullrev:
2546 2550 return True
2547 2551 elif a == b:
2548 2552 return True
2549 2553 elif a > b:
2550 2554 return False
2551 2555 return bool(self.reachableroots(a, [b], [a], includepath=False))
2552 2556
2553 2557 def reachableroots(self, minroot, heads, roots, includepath=False):
2554 2558 """return (heads(::(<roots> and <roots>::<heads>)))
2555 2559
2556 2560 If includepath is True, return (<roots>::<heads>)."""
2557 2561 try:
2558 2562 return self.index.reachableroots2(
2559 2563 minroot, heads, roots, includepath
2560 2564 )
2561 2565 except AttributeError:
2562 2566 return dagop._reachablerootspure(
2563 2567 self.parentrevs, minroot, roots, heads, includepath
2564 2568 )
2565 2569
2566 2570 def ancestor(self, a, b):
2567 2571 """calculate the "best" common ancestor of nodes a and b"""
2568 2572
2569 2573 a, b = self.rev(a), self.rev(b)
2570 2574 try:
2571 2575 ancs = self.index.ancestors(a, b)
2572 2576 except (AttributeError, OverflowError):
2573 2577 ancs = ancestor.ancestors(self.parentrevs, a, b)
2574 2578 if ancs:
2575 2579 # choose a consistent winner when there's a tie
2576 2580 return min(map(self.node, ancs))
2577 2581 return self.nullid
2578 2582
2579 2583 def _match(self, id):
2580 2584 if isinstance(id, int):
2581 2585 # rev
2582 2586 return self.node(id)
2583 2587 if len(id) == self.nodeconstants.nodelen:
2584 2588 # possibly a binary node
2585 2589 # odds of a binary node being all hex in ASCII are 1 in 10**25
2586 2590 try:
2587 2591 node = id
2588 2592 self.rev(node) # quick search the index
2589 2593 return node
2590 2594 except error.LookupError:
2591 2595 pass # may be partial hex id
2592 2596 try:
2593 2597 # str(rev)
2594 2598 rev = int(id)
2595 2599 if b"%d" % rev != id:
2596 2600 raise ValueError
2597 2601 if rev < 0:
2598 2602 rev = len(self) + rev
2599 2603 if rev < 0 or rev >= len(self):
2600 2604 raise ValueError
2601 2605 return self.node(rev)
2602 2606 except (ValueError, OverflowError):
2603 2607 pass
2604 2608 if len(id) == 2 * self.nodeconstants.nodelen:
2605 2609 try:
2606 2610 # a full hex nodeid?
2607 2611 node = bin(id)
2608 2612 self.rev(node)
2609 2613 return node
2610 2614 except (binascii.Error, error.LookupError):
2611 2615 pass
2612 2616
2613 2617 def _partialmatch(self, id):
2614 2618 # we don't care wdirfilenodeids as they should be always full hash
2615 2619 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2616 2620 ambiguous = False
2617 2621 try:
2618 2622 partial = self.index.partialmatch(id)
2619 2623 if partial and self.hasnode(partial):
2620 2624 if maybewdir:
2621 2625 # single 'ff...' match in radix tree, ambiguous with wdir
2622 2626 ambiguous = True
2623 2627 else:
2624 2628 return partial
2625 2629 elif maybewdir:
2626 2630 # no 'ff...' match in radix tree, wdir identified
2627 2631 raise error.WdirUnsupported
2628 2632 else:
2629 2633 return None
2630 2634 except error.RevlogError:
2631 2635 # parsers.c radix tree lookup gave multiple matches
2632 2636 # fast path: for unfiltered changelog, radix tree is accurate
2633 2637 if not getattr(self, 'filteredrevs', None):
2634 2638 ambiguous = True
2635 2639 # fall through to slow path that filters hidden revisions
2636 2640 except (AttributeError, ValueError):
2637 2641 # we are pure python, or key is not hex
2638 2642 pass
2639 2643 if ambiguous:
2640 2644 raise error.AmbiguousPrefixLookupError(
2641 2645 id, self.display_id, _(b'ambiguous identifier')
2642 2646 )
2643 2647
2644 2648 if id in self._pcache:
2645 2649 return self._pcache[id]
2646 2650
2647 2651 if len(id) <= 40:
2648 2652 # hex(node)[:...]
2649 2653 l = len(id) // 2 * 2 # grab an even number of digits
2650 2654 try:
2651 2655 # we're dropping the last digit, so let's check that it's hex,
2652 2656 # to avoid the expensive computation below if it's not
2653 2657 if len(id) % 2 > 0:
2654 2658 if not (id[-1] in hexdigits):
2655 2659 return None
2656 2660 prefix = bin(id[:l])
2657 2661 except binascii.Error:
2658 2662 pass
2659 2663 else:
2660 2664 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2661 2665 nl = [
2662 2666 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2663 2667 ]
2664 2668 if self.nodeconstants.nullhex.startswith(id):
2665 2669 nl.append(self.nullid)
2666 2670 if len(nl) > 0:
2667 2671 if len(nl) == 1 and not maybewdir:
2668 2672 self._pcache[id] = nl[0]
2669 2673 return nl[0]
2670 2674 raise error.AmbiguousPrefixLookupError(
2671 2675 id, self.display_id, _(b'ambiguous identifier')
2672 2676 )
2673 2677 if maybewdir:
2674 2678 raise error.WdirUnsupported
2675 2679 return None
2676 2680
2677 2681 def lookup(self, id):
2678 2682 """locate a node based on:
2679 2683 - revision number or str(revision number)
2680 2684 - nodeid or subset of hex nodeid
2681 2685 """
2682 2686 n = self._match(id)
2683 2687 if n is not None:
2684 2688 return n
2685 2689 n = self._partialmatch(id)
2686 2690 if n:
2687 2691 return n
2688 2692
2689 2693 raise error.LookupError(id, self.display_id, _(b'no match found'))
2690 2694
2691 2695 def shortest(self, node, minlength=1):
2692 2696 """Find the shortest unambiguous prefix that matches node."""
2693 2697
2694 2698 def isvalid(prefix):
2695 2699 try:
2696 2700 matchednode = self._partialmatch(prefix)
2697 2701 except error.AmbiguousPrefixLookupError:
2698 2702 return False
2699 2703 except error.WdirUnsupported:
2700 2704 # single 'ff...' match
2701 2705 return True
2702 2706 if matchednode is None:
2703 2707 raise error.LookupError(node, self.display_id, _(b'no node'))
2704 2708 return True
2705 2709
2706 2710 def maybewdir(prefix):
2707 2711 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2708 2712
2709 2713 hexnode = hex(node)
2710 2714
2711 2715 def disambiguate(hexnode, minlength):
2712 2716 """Disambiguate against wdirid."""
2713 2717 for length in range(minlength, len(hexnode) + 1):
2714 2718 prefix = hexnode[:length]
2715 2719 if not maybewdir(prefix):
2716 2720 return prefix
2717 2721
2718 2722 if not getattr(self, 'filteredrevs', None):
2719 2723 try:
2720 2724 length = max(self.index.shortest(node), minlength)
2721 2725 return disambiguate(hexnode, length)
2722 2726 except error.RevlogError:
2723 2727 if node != self.nodeconstants.wdirid:
2724 2728 raise error.LookupError(
2725 2729 node, self.display_id, _(b'no node')
2726 2730 )
2727 2731 except AttributeError:
2728 2732 # Fall through to pure code
2729 2733 pass
2730 2734
2731 2735 if node == self.nodeconstants.wdirid:
2732 2736 for length in range(minlength, len(hexnode) + 1):
2733 2737 prefix = hexnode[:length]
2734 2738 if isvalid(prefix):
2735 2739 return prefix
2736 2740
2737 2741 for length in range(minlength, len(hexnode) + 1):
2738 2742 prefix = hexnode[:length]
2739 2743 if isvalid(prefix):
2740 2744 return disambiguate(hexnode, length)
2741 2745
2742 2746 def cmp(self, node, text):
2743 2747 """compare text with a given file revision
2744 2748
2745 2749 returns True if text is different than what is stored.
2746 2750 """
2747 2751 p1, p2 = self.parents(node)
2748 2752 return storageutil.hashrevisionsha1(text, p1, p2) != node
2749 2753
2750 2754 def deltaparent(self, rev):
2751 2755 """return deltaparent of the given revision"""
2752 2756 base = self.index[rev][3]
2753 2757 if base == rev:
2754 2758 return nullrev
2755 2759 elif self.delta_config.general_delta:
2756 2760 return base
2757 2761 else:
2758 2762 return rev - 1
2759 2763
2760 2764 def issnapshot(self, rev):
2761 2765 """tells whether rev is a snapshot"""
2762 2766 ret = self._inner.issnapshot(rev)
2763 2767 self.issnapshot = self._inner.issnapshot
2764 2768 return ret
2765 2769
2766 2770 def snapshotdepth(self, rev):
2767 2771 """number of snapshot in the chain before this one"""
2768 2772 if not self.issnapshot(rev):
2769 2773 raise error.ProgrammingError(b'revision %d not a snapshot')
2770 2774 return len(self._inner._deltachain(rev)[0]) - 1
2771 2775
2772 2776 def revdiff(self, rev1, rev2):
2773 2777 """return or calculate a delta between two revisions
2774 2778
2775 2779 The delta calculated is in binary form and is intended to be written to
2776 2780 revlog data directly. So this function needs raw revision data.
2777 2781 """
2778 2782 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2779 2783 return bytes(self._inner._chunk(rev2))
2780 2784
2781 2785 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2782 2786
2783 2787 def revision(self, nodeorrev):
2784 2788 """return an uncompressed revision of a given node or revision
2785 2789 number.
2786 2790 """
2787 2791 return self._revisiondata(nodeorrev)
2788 2792
2789 2793 def sidedata(self, nodeorrev):
2790 2794 """a map of extra data related to the changeset but not part of the hash
2791 2795
2792 2796 This function currently return a dictionary. However, more advanced
2793 2797 mapping object will likely be used in the future for a more
2794 2798 efficient/lazy code.
2795 2799 """
2796 2800 # deal with <nodeorrev> argument type
2797 2801 if isinstance(nodeorrev, int):
2798 2802 rev = nodeorrev
2799 2803 else:
2800 2804 rev = self.rev(nodeorrev)
2801 2805 return self._sidedata(rev)
2802 2806
2803 2807 def _rawtext(self, node, rev):
2804 2808 """return the possibly unvalidated rawtext for a revision
2805 2809
2806 2810 returns (rev, rawtext, validated)
2807 2811 """
2808 2812 # Check if we have the entry in cache
2809 2813 # The cache entry looks like (node, rev, rawtext)
2810 2814 if self._inner._revisioncache:
2811 2815 if self._inner._revisioncache[0] == node:
2812 2816 return (rev, self._inner._revisioncache[2], True)
2813 2817
2814 2818 if rev is None:
2815 2819 rev = self.rev(node)
2816 2820
2817 2821 return self._inner.raw_text(node, rev)
2818 2822
2819 2823 def _revisiondata(self, nodeorrev, raw=False):
2820 2824 # deal with <nodeorrev> argument type
2821 2825 if isinstance(nodeorrev, int):
2822 2826 rev = nodeorrev
2823 2827 node = self.node(rev)
2824 2828 else:
2825 2829 node = nodeorrev
2826 2830 rev = None
2827 2831
2828 2832 # fast path the special `nullid` rev
2829 2833 if node == self.nullid:
2830 2834 return b""
2831 2835
2832 2836 # ``rawtext`` is the text as stored inside the revlog. Might be the
2833 2837 # revision or might need to be processed to retrieve the revision.
2834 2838 rev, rawtext, validated = self._rawtext(node, rev)
2835 2839
2836 2840 if raw and validated:
2837 2841 # if we don't want to process the raw text and that raw
2838 2842 # text is cached, we can exit early.
2839 2843 return rawtext
2840 2844 if rev is None:
2841 2845 rev = self.rev(node)
2842 2846 # the revlog's flag for this revision
2843 2847 # (usually alter its state or content)
2844 2848 flags = self.flags(rev)
2845 2849
2846 2850 if validated and flags == REVIDX_DEFAULT_FLAGS:
2847 2851 # no extra flags set, no flag processor runs, text = rawtext
2848 2852 return rawtext
2849 2853
2850 2854 if raw:
2851 2855 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2852 2856 text = rawtext
2853 2857 else:
2854 2858 r = flagutil.processflagsread(self, rawtext, flags)
2855 2859 text, validatehash = r
2856 2860 if validatehash:
2857 2861 self.checkhash(text, node, rev=rev)
2858 2862 if not validated:
2859 2863 self._inner._revisioncache = (node, rev, rawtext)
2860 2864
2861 2865 return text
2862 2866
2863 2867 def _sidedata(self, rev):
2864 2868 """Return the sidedata for a given revision number."""
2865 2869 sidedata_end = None
2866 2870 if self._docket is not None:
2867 2871 sidedata_end = self._docket.sidedata_end
2868 2872 return self._inner.sidedata(rev, sidedata_end)
2869 2873
2870 2874 def rawdata(self, nodeorrev):
2871 2875 """return an uncompressed raw data of a given node or revision number."""
2872 2876 return self._revisiondata(nodeorrev, raw=True)
2873 2877
2874 2878 def hash(self, text, p1, p2):
2875 2879 """Compute a node hash.
2876 2880
2877 2881 Available as a function so that subclasses can replace the hash
2878 2882 as needed.
2879 2883 """
2880 2884 return storageutil.hashrevisionsha1(text, p1, p2)
2881 2885
2882 2886 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2883 2887 """Check node hash integrity.
2884 2888
2885 2889 Available as a function so that subclasses can extend hash mismatch
2886 2890 behaviors as needed.
2887 2891 """
2888 2892 try:
2889 2893 if p1 is None and p2 is None:
2890 2894 p1, p2 = self.parents(node)
2891 2895 if node != self.hash(text, p1, p2):
2892 2896 # Clear the revision cache on hash failure. The revision cache
2893 2897 # only stores the raw revision and clearing the cache does have
2894 2898 # the side-effect that we won't have a cache hit when the raw
2895 2899 # revision data is accessed. But this case should be rare and
2896 2900 # it is extra work to teach the cache about the hash
2897 2901 # verification state.
2898 2902 if (
2899 2903 self._inner._revisioncache
2900 2904 and self._inner._revisioncache[0] == node
2901 2905 ):
2902 2906 self._inner._revisioncache = None
2903 2907
2904 2908 revornode = rev
2905 2909 if revornode is None:
2906 2910 revornode = templatefilters.short(hex(node))
2907 2911 raise error.RevlogError(
2908 2912 _(b"integrity check failed on %s:%s")
2909 2913 % (self.display_id, pycompat.bytestr(revornode))
2910 2914 )
2911 2915 except error.RevlogError:
2912 2916 if self.feature_config.censorable and storageutil.iscensoredtext(
2913 2917 text
2914 2918 ):
2915 2919 raise error.CensoredNodeError(self.display_id, node, text)
2916 2920 raise
2917 2921
2918 2922 @property
2919 2923 def _split_index_file(self):
2920 2924 """the path where to expect the index of an ongoing splitting operation
2921 2925
2922 2926 The file will only exist if a splitting operation is in progress, but
2923 2927 it is always expected at the same location."""
2924 2928 parts = self.radix.split(b'/')
2925 2929 if len(parts) > 1:
2926 2930 # adds a '-s' prefix to the ``data/` or `meta/` base
2927 2931 head = parts[0] + b'-s'
2928 2932 mids = parts[1:-1]
2929 2933 tail = parts[-1] + b'.i'
2930 2934 pieces = [head] + mids + [tail]
2931 2935 return b'/'.join(pieces)
2932 2936 else:
2933 2937 # the revlog is stored at the root of the store (changelog or
2934 2938 # manifest), no risk of collision.
2935 2939 return self.radix + b'.i.s'
2936 2940
2937 2941 def _enforceinlinesize(self, tr, side_write=True):
2938 2942 """Check if the revlog is too big for inline and convert if so.
2939 2943
2940 2944 This should be called after revisions are added to the revlog. If the
2941 2945 revlog has grown too large to be an inline revlog, it will convert it
2942 2946 to use multiple index and data files.
2943 2947 """
2944 2948 tiprev = len(self) - 1
2945 2949 total_size = self.start(tiprev) + self.length(tiprev)
2946 2950 if not self._inline or total_size < _maxinline:
2947 2951 return
2948 2952
2949 2953 if self._docket is not None:
2950 2954 msg = b"inline revlog should not have a docket"
2951 2955 raise error.ProgrammingError(msg)
2952 2956
2953 2957 troffset = tr.findoffset(self._inner.canonical_index_file)
2954 2958 if troffset is None:
2955 2959 raise error.RevlogError(
2956 2960 _(b"%s not found in the transaction") % self._indexfile
2957 2961 )
2958 2962 if troffset:
2959 2963 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
2960 2964 tr.add(self._datafile, 0)
2961 2965
2962 2966 new_index_file_path = None
2963 2967 if side_write:
2964 2968 old_index_file_path = self._indexfile
2965 2969 new_index_file_path = self._split_index_file
2966 2970 opener = self.opener
2967 2971 weak_self = weakref.ref(self)
2968 2972
2969 2973 # the "split" index replace the real index when the transaction is
2970 2974 # finalized
2971 2975 def finalize_callback(tr):
2972 2976 opener.rename(
2973 2977 new_index_file_path,
2974 2978 old_index_file_path,
2975 2979 checkambig=True,
2976 2980 )
2977 2981 maybe_self = weak_self()
2978 2982 if maybe_self is not None:
2979 2983 maybe_self._indexfile = old_index_file_path
2980 2984 maybe_self._inner.index_file = maybe_self._indexfile
2981 2985
2982 2986 def abort_callback(tr):
2983 2987 maybe_self = weak_self()
2984 2988 if maybe_self is not None:
2985 2989 maybe_self._indexfile = old_index_file_path
2986 2990 maybe_self._inner.inline = True
2987 2991 maybe_self._inner.index_file = old_index_file_path
2988 2992
2989 2993 tr.registertmp(new_index_file_path)
2990 2994 if self.target[1] is not None:
2991 2995 callback_id = b'000-revlog-split-%d-%s' % self.target
2992 2996 else:
2993 2997 callback_id = b'000-revlog-split-%d' % self.target[0]
2994 2998 tr.addfinalize(callback_id, finalize_callback)
2995 2999 tr.addabort(callback_id, abort_callback)
2996 3000
2997 3001 self._format_flags &= ~FLAG_INLINE_DATA
2998 3002 self._inner.split_inline(
2999 3003 tr,
3000 3004 self._format_flags | self._format_version,
3001 3005 new_index_file_path=new_index_file_path,
3002 3006 )
3003 3007
3004 3008 self._inline = False
3005 3009 if new_index_file_path is not None:
3006 3010 self._indexfile = new_index_file_path
3007 3011
3008 3012 nodemaputil.setup_persistent_nodemap(tr, self)
3009 3013
3010 3014 def _nodeduplicatecallback(self, transaction, node):
3011 3015 """called when trying to add a node already stored."""
3012 3016
3013 3017 @contextlib.contextmanager
3014 3018 def reading(self):
3015 3019 with self._inner.reading():
3016 3020 yield
3017 3021
3018 3022 @contextlib.contextmanager
3019 3023 def _writing(self, transaction):
3020 3024 if self._trypending:
3021 3025 msg = b'try to write in a `trypending` revlog: %s'
3022 3026 msg %= self.display_id
3023 3027 raise error.ProgrammingError(msg)
3024 3028 if self._inner.is_writing:
3025 3029 yield
3026 3030 else:
3027 3031 data_end = None
3028 3032 sidedata_end = None
3029 3033 if self._docket is not None:
3030 3034 data_end = self._docket.data_end
3031 3035 sidedata_end = self._docket.sidedata_end
3032 3036 with self._inner.writing(
3033 3037 transaction,
3034 3038 data_end=data_end,
3035 3039 sidedata_end=sidedata_end,
3036 3040 ):
3037 3041 yield
3038 3042 if self._docket is not None:
3039 3043 self._write_docket(transaction)
3040 3044
3041 3045 @property
3042 3046 def is_delaying(self):
3043 3047 return self._inner.is_delaying
3044 3048
3045 3049 def _write_docket(self, transaction):
3046 3050 """write the current docket on disk
3047 3051
3048 3052 Exist as a method to help changelog to implement transaction logic
3049 3053
3050 3054 We could also imagine using the same transaction logic for all revlog
3051 3055 since docket are cheap."""
3052 3056 self._docket.write(transaction)
3053 3057
3054 3058 def addrevision(
3055 3059 self,
3056 3060 text,
3057 3061 transaction,
3058 3062 link,
3059 3063 p1,
3060 3064 p2,
3061 3065 cachedelta=None,
3062 3066 node=None,
3063 3067 flags=REVIDX_DEFAULT_FLAGS,
3064 3068 deltacomputer=None,
3065 3069 sidedata=None,
3066 3070 ):
3067 3071 """add a revision to the log
3068 3072
3069 3073 text - the revision data to add
3070 3074 transaction - the transaction object used for rollback
3071 3075 link - the linkrev data to add
3072 3076 p1, p2 - the parent nodeids of the revision
3073 3077 cachedelta - an optional precomputed delta
3074 3078 node - nodeid of revision; typically node is not specified, and it is
3075 3079 computed by default as hash(text, p1, p2), however subclasses might
3076 3080 use different hashing method (and override checkhash() in such case)
3077 3081 flags - the known flags to set on the revision
3078 3082 deltacomputer - an optional deltacomputer instance shared between
3079 3083 multiple calls
3080 3084 """
3081 3085 if link == nullrev:
3082 3086 raise error.RevlogError(
3083 3087 _(b"attempted to add linkrev -1 to %s") % self.display_id
3084 3088 )
3085 3089
3086 3090 if sidedata is None:
3087 3091 sidedata = {}
3088 3092 elif sidedata and not self.feature_config.has_side_data:
3089 3093 raise error.ProgrammingError(
3090 3094 _(b"trying to add sidedata to a revlog who don't support them")
3091 3095 )
3092 3096
3093 3097 if flags:
3094 3098 node = node or self.hash(text, p1, p2)
3095 3099
3096 3100 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3097 3101
3098 3102 # If the flag processor modifies the revision data, ignore any provided
3099 3103 # cachedelta.
3100 3104 if rawtext != text:
3101 3105 cachedelta = None
3102 3106
3103 3107 if len(rawtext) > _maxentrysize:
3104 3108 raise error.RevlogError(
3105 3109 _(
3106 3110 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3107 3111 )
3108 3112 % (self.display_id, len(rawtext))
3109 3113 )
3110 3114
3111 3115 node = node or self.hash(rawtext, p1, p2)
3112 3116 rev = self.index.get_rev(node)
3113 3117 if rev is not None:
3114 3118 return rev
3115 3119
3116 3120 if validatehash:
3117 3121 self.checkhash(rawtext, node, p1=p1, p2=p2)
3118 3122
3119 3123 return self.addrawrevision(
3120 3124 rawtext,
3121 3125 transaction,
3122 3126 link,
3123 3127 p1,
3124 3128 p2,
3125 3129 node,
3126 3130 flags,
3127 3131 cachedelta=cachedelta,
3128 3132 deltacomputer=deltacomputer,
3129 3133 sidedata=sidedata,
3130 3134 )
3131 3135
3132 3136 def addrawrevision(
3133 3137 self,
3134 3138 rawtext,
3135 3139 transaction,
3136 3140 link,
3137 3141 p1,
3138 3142 p2,
3139 3143 node,
3140 3144 flags,
3141 3145 cachedelta=None,
3142 3146 deltacomputer=None,
3143 3147 sidedata=None,
3144 3148 ):
3145 3149 """add a raw revision with known flags, node and parents
3146 3150 useful when reusing a revision not stored in this revlog (ex: received
3147 3151 over wire, or read from an external bundle).
3148 3152 """
3149 3153 with self._writing(transaction):
3150 3154 return self._addrevision(
3151 3155 node,
3152 3156 rawtext,
3153 3157 transaction,
3154 3158 link,
3155 3159 p1,
3156 3160 p2,
3157 3161 flags,
3158 3162 cachedelta,
3159 3163 deltacomputer=deltacomputer,
3160 3164 sidedata=sidedata,
3161 3165 )
3162 3166
3163 3167 def compress(self, data):
3164 3168 return self._inner.compress(data)
3165 3169
3166 3170 def decompress(self, data):
3167 3171 return self._inner.decompress(data)
3168 3172
3169 3173 def _addrevision(
3170 3174 self,
3171 3175 node,
3172 3176 rawtext,
3173 3177 transaction,
3174 3178 link,
3175 3179 p1,
3176 3180 p2,
3177 3181 flags,
3178 3182 cachedelta,
3179 3183 alwayscache=False,
3180 3184 deltacomputer=None,
3181 3185 sidedata=None,
3182 3186 ):
3183 3187 """internal function to add revisions to the log
3184 3188
3185 3189 see addrevision for argument descriptions.
3186 3190
3187 3191 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3188 3192
3189 3193 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3190 3194 be used.
3191 3195
3192 3196 invariants:
3193 3197 - rawtext is optional (can be None); if not set, cachedelta must be set.
3194 3198 if both are set, they must correspond to each other.
3195 3199 """
3196 3200 if node == self.nullid:
3197 3201 raise error.RevlogError(
3198 3202 _(b"%s: attempt to add null revision") % self.display_id
3199 3203 )
3200 3204 if (
3201 3205 node == self.nodeconstants.wdirid
3202 3206 or node in self.nodeconstants.wdirfilenodeids
3203 3207 ):
3204 3208 raise error.RevlogError(
3205 3209 _(b"%s: attempt to add wdir revision") % self.display_id
3206 3210 )
3207 3211 if self._inner._writinghandles is None:
3208 3212 msg = b'adding revision outside `revlog._writing` context'
3209 3213 raise error.ProgrammingError(msg)
3210 3214
3211 3215 btext = [rawtext]
3212 3216
3213 3217 curr = len(self)
3214 3218 prev = curr - 1
3215 3219
3216 3220 offset = self._get_data_offset(prev)
3217 3221
3218 3222 if self._concurrencychecker:
3219 3223 ifh, dfh, sdfh = self._inner._writinghandles
3220 3224 # XXX no checking for the sidedata file
3221 3225 if self._inline:
3222 3226 # offset is "as if" it were in the .d file, so we need to add on
3223 3227 # the size of the entry metadata.
3224 3228 self._concurrencychecker(
3225 3229 ifh, self._indexfile, offset + curr * self.index.entry_size
3226 3230 )
3227 3231 else:
3228 3232 # Entries in the .i are a consistent size.
3229 3233 self._concurrencychecker(
3230 3234 ifh, self._indexfile, curr * self.index.entry_size
3231 3235 )
3232 3236 self._concurrencychecker(dfh, self._datafile, offset)
3233 3237
3234 3238 p1r, p2r = self.rev(p1), self.rev(p2)
3235 3239
3236 3240 # full versions are inserted when the needed deltas
3237 3241 # become comparable to the uncompressed text
3238 3242 if rawtext is None:
3239 3243 # need rawtext size, before changed by flag processors, which is
3240 3244 # the non-raw size. use revlog explicitly to avoid filelog's extra
3241 3245 # logic that might remove metadata size.
3242 3246 textlen = mdiff.patchedsize(
3243 3247 revlog.size(self, cachedelta[0]), cachedelta[1]
3244 3248 )
3245 3249 else:
3246 3250 textlen = len(rawtext)
3247 3251
3248 3252 if deltacomputer is None:
3249 3253 write_debug = None
3250 3254 if self.delta_config.debug_delta:
3251 3255 write_debug = transaction._report
3252 3256 deltacomputer = deltautil.deltacomputer(
3253 3257 self, write_debug=write_debug
3254 3258 )
3255 3259
3256 3260 if cachedelta is not None and len(cachedelta) == 2:
3257 3261 # If the cached delta has no information about how it should be
3258 3262 # reused, add the default reuse instruction according to the
3259 3263 # revlog's configuration.
3260 3264 if (
3261 3265 self.delta_config.general_delta
3262 3266 and self.delta_config.lazy_delta_base
3263 3267 ):
3264 3268 delta_base_reuse = DELTA_BASE_REUSE_TRY
3265 3269 else:
3266 3270 delta_base_reuse = DELTA_BASE_REUSE_NO
3267 3271 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3268 3272
3269 3273 revinfo = revlogutils.revisioninfo(
3270 3274 node,
3271 3275 p1,
3272 3276 p2,
3273 3277 btext,
3274 3278 textlen,
3275 3279 cachedelta,
3276 3280 flags,
3277 3281 )
3278 3282
3279 3283 deltainfo = deltacomputer.finddeltainfo(revinfo)
3280 3284
3281 3285 compression_mode = COMP_MODE_INLINE
3282 3286 if self._docket is not None:
3283 3287 default_comp = self._docket.default_compression_header
3284 3288 r = deltautil.delta_compression(default_comp, deltainfo)
3285 3289 compression_mode, deltainfo = r
3286 3290
3287 3291 sidedata_compression_mode = COMP_MODE_INLINE
3288 3292 if sidedata and self.feature_config.has_side_data:
3289 3293 sidedata_compression_mode = COMP_MODE_PLAIN
3290 3294 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3291 3295 sidedata_offset = self._docket.sidedata_end
3292 3296 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3293 3297 if (
3294 3298 h != b'u'
3295 3299 and comp_sidedata[0:1] != b'\0'
3296 3300 and len(comp_sidedata) < len(serialized_sidedata)
3297 3301 ):
3298 3302 assert not h
3299 3303 if (
3300 3304 comp_sidedata[0:1]
3301 3305 == self._docket.default_compression_header
3302 3306 ):
3303 3307 sidedata_compression_mode = COMP_MODE_DEFAULT
3304 3308 serialized_sidedata = comp_sidedata
3305 3309 else:
3306 3310 sidedata_compression_mode = COMP_MODE_INLINE
3307 3311 serialized_sidedata = comp_sidedata
3308 3312 else:
3309 3313 serialized_sidedata = b""
3310 3314 # Don't store the offset if the sidedata is empty, that way
3311 3315 # we can easily detect empty sidedata and they will be no different
3312 3316 # than ones we manually add.
3313 3317 sidedata_offset = 0
3314 3318
3315 3319 rank = RANK_UNKNOWN
3316 3320 if self.feature_config.compute_rank:
3317 3321 if (p1r, p2r) == (nullrev, nullrev):
3318 3322 rank = 1
3319 3323 elif p1r != nullrev and p2r == nullrev:
3320 3324 rank = 1 + self.fast_rank(p1r)
3321 3325 elif p1r == nullrev and p2r != nullrev:
3322 3326 rank = 1 + self.fast_rank(p2r)
3323 3327 else: # merge node
3324 3328 if rustdagop is not None and self.index.rust_ext_compat:
3325 3329 rank = rustdagop.rank(self.index, p1r, p2r)
3326 3330 else:
3327 3331 pmin, pmax = sorted((p1r, p2r))
3328 3332 rank = 1 + self.fast_rank(pmax)
3329 3333 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3330 3334
3331 3335 e = revlogutils.entry(
3332 3336 flags=flags,
3333 3337 data_offset=offset,
3334 3338 data_compressed_length=deltainfo.deltalen,
3335 3339 data_uncompressed_length=textlen,
3336 3340 data_compression_mode=compression_mode,
3337 3341 data_delta_base=deltainfo.base,
3338 3342 link_rev=link,
3339 3343 parent_rev_1=p1r,
3340 3344 parent_rev_2=p2r,
3341 3345 node_id=node,
3342 3346 sidedata_offset=sidedata_offset,
3343 3347 sidedata_compressed_length=len(serialized_sidedata),
3344 3348 sidedata_compression_mode=sidedata_compression_mode,
3345 3349 rank=rank,
3346 3350 )
3347 3351
3348 3352 self.index.append(e)
3349 3353 entry = self.index.entry_binary(curr)
3350 3354 if curr == 0 and self._docket is None:
3351 3355 header = self._format_flags | self._format_version
3352 3356 header = self.index.pack_header(header)
3353 3357 entry = header + entry
3354 3358 self._writeentry(
3355 3359 transaction,
3356 3360 entry,
3357 3361 deltainfo.data,
3358 3362 link,
3359 3363 offset,
3360 3364 serialized_sidedata,
3361 3365 sidedata_offset,
3362 3366 )
3363 3367
3364 3368 rawtext = btext[0]
3365 3369
3366 3370 if alwayscache and rawtext is None:
3367 3371 rawtext = deltacomputer.buildtext(revinfo)
3368 3372
3369 3373 if type(rawtext) == bytes: # only accept immutable objects
3370 3374 self._inner._revisioncache = (node, curr, rawtext)
3371 3375 self._chainbasecache[curr] = deltainfo.chainbase
3372 3376 return curr
3373 3377
3374 3378 def _get_data_offset(self, prev):
3375 3379 """Returns the current offset in the (in-transaction) data file.
3376 3380 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3377 3381 file to store that information: since sidedata can be rewritten to the
3378 3382 end of the data file within a transaction, you can have cases where, for
3379 3383 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3380 3384 to `n - 1`'s sidedata being written after `n`'s data.
3381 3385
3382 3386 TODO cache this in a docket file before getting out of experimental."""
3383 3387 if self._docket is None:
3384 3388 return self.end(prev)
3385 3389 else:
3386 3390 return self._docket.data_end
3387 3391
3388 3392 def _writeentry(
3389 3393 self,
3390 3394 transaction,
3391 3395 entry,
3392 3396 data,
3393 3397 link,
3394 3398 offset,
3395 3399 sidedata,
3396 3400 sidedata_offset,
3397 3401 ):
3398 3402 # Files opened in a+ mode have inconsistent behavior on various
3399 3403 # platforms. Windows requires that a file positioning call be made
3400 3404 # when the file handle transitions between reads and writes. See
3401 3405 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3402 3406 # platforms, Python or the platform itself can be buggy. Some versions
3403 3407 # of Solaris have been observed to not append at the end of the file
3404 3408 # if the file was seeked to before the end. See issue4943 for more.
3405 3409 #
3406 3410 # We work around this issue by inserting a seek() before writing.
3407 3411 # Note: This is likely not necessary on Python 3. However, because
3408 3412 # the file handle is reused for reads and may be seeked there, we need
3409 3413 # to be careful before changing this.
3410 3414 index_end = data_end = sidedata_end = None
3411 3415 if self._docket is not None:
3412 3416 index_end = self._docket.index_end
3413 3417 data_end = self._docket.data_end
3414 3418 sidedata_end = self._docket.sidedata_end
3415 3419
3416 3420 files_end = self._inner.write_entry(
3417 3421 transaction,
3418 3422 entry,
3419 3423 data,
3420 3424 link,
3421 3425 offset,
3422 3426 sidedata,
3423 3427 sidedata_offset,
3424 3428 index_end,
3425 3429 data_end,
3426 3430 sidedata_end,
3427 3431 )
3428 3432 self._enforceinlinesize(transaction)
3429 3433 if self._docket is not None:
3430 3434 self._docket.index_end = files_end[0]
3431 3435 self._docket.data_end = files_end[1]
3432 3436 self._docket.sidedata_end = files_end[2]
3433 3437
3434 3438 nodemaputil.setup_persistent_nodemap(transaction, self)
3435 3439
3436 3440 def addgroup(
3437 3441 self,
3438 3442 deltas,
3439 3443 linkmapper,
3440 3444 transaction,
3441 3445 alwayscache=False,
3442 3446 addrevisioncb=None,
3443 3447 duplicaterevisioncb=None,
3444 3448 debug_info=None,
3445 3449 delta_base_reuse_policy=None,
3446 3450 ):
3447 3451 """
3448 3452 add a delta group
3449 3453
3450 3454 given a set of deltas, add them to the revision log. the
3451 3455 first delta is against its parent, which should be in our
3452 3456 log, the rest are against the previous delta.
3453 3457
3454 3458 If ``addrevisioncb`` is defined, it will be called with arguments of
3455 3459 this revlog and the node that was added.
3456 3460 """
3457 3461
3458 3462 if self._adding_group:
3459 3463 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3460 3464
3461 3465 # read the default delta-base reuse policy from revlog config if the
3462 3466 # group did not specify one.
3463 3467 if delta_base_reuse_policy is None:
3464 3468 if (
3465 3469 self.delta_config.general_delta
3466 3470 and self.delta_config.lazy_delta_base
3467 3471 ):
3468 3472 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3469 3473 else:
3470 3474 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3471 3475
3472 3476 self._adding_group = True
3473 3477 empty = True
3474 3478 try:
3475 3479 with self._writing(transaction):
3476 3480 write_debug = None
3477 3481 if self.delta_config.debug_delta:
3478 3482 write_debug = transaction._report
3479 3483 deltacomputer = deltautil.deltacomputer(
3480 3484 self,
3481 3485 write_debug=write_debug,
3482 3486 debug_info=debug_info,
3483 3487 )
3484 3488 # loop through our set of deltas
3485 3489 for data in deltas:
3486 3490 (
3487 3491 node,
3488 3492 p1,
3489 3493 p2,
3490 3494 linknode,
3491 3495 deltabase,
3492 3496 delta,
3493 3497 flags,
3494 3498 sidedata,
3495 3499 ) = data
3496 3500 link = linkmapper(linknode)
3497 3501 flags = flags or REVIDX_DEFAULT_FLAGS
3498 3502
3499 3503 rev = self.index.get_rev(node)
3500 3504 if rev is not None:
3501 3505 # this can happen if two branches make the same change
3502 3506 self._nodeduplicatecallback(transaction, rev)
3503 3507 if duplicaterevisioncb:
3504 3508 duplicaterevisioncb(self, rev)
3505 3509 empty = False
3506 3510 continue
3507 3511
3508 3512 for p in (p1, p2):
3509 3513 if not self.index.has_node(p):
3510 3514 raise error.LookupError(
3511 3515 p, self.radix, _(b'unknown parent')
3512 3516 )
3513 3517
3514 3518 if not self.index.has_node(deltabase):
3515 3519 raise error.LookupError(
3516 3520 deltabase, self.display_id, _(b'unknown delta base')
3517 3521 )
3518 3522
3519 3523 baserev = self.rev(deltabase)
3520 3524
3521 3525 if baserev != nullrev and self.iscensored(baserev):
3522 3526 # if base is censored, delta must be full replacement in a
3523 3527 # single patch operation
3524 3528 hlen = struct.calcsize(b">lll")
3525 3529 oldlen = self.rawsize(baserev)
3526 3530 newlen = len(delta) - hlen
3527 3531 if delta[:hlen] != mdiff.replacediffheader(
3528 3532 oldlen, newlen
3529 3533 ):
3530 3534 raise error.CensoredBaseError(
3531 3535 self.display_id, self.node(baserev)
3532 3536 )
3533 3537
3534 3538 if not flags and self._peek_iscensored(baserev, delta):
3535 3539 flags |= REVIDX_ISCENSORED
3536 3540
3537 3541 # We assume consumers of addrevisioncb will want to retrieve
3538 3542 # the added revision, which will require a call to
3539 3543 # revision(). revision() will fast path if there is a cache
3540 3544 # hit. So, we tell _addrevision() to always cache in this case.
3541 3545 # We're only using addgroup() in the context of changegroup
3542 3546 # generation so the revision data can always be handled as raw
3543 3547 # by the flagprocessor.
3544 3548 rev = self._addrevision(
3545 3549 node,
3546 3550 None,
3547 3551 transaction,
3548 3552 link,
3549 3553 p1,
3550 3554 p2,
3551 3555 flags,
3552 3556 (baserev, delta, delta_base_reuse_policy),
3553 3557 alwayscache=alwayscache,
3554 3558 deltacomputer=deltacomputer,
3555 3559 sidedata=sidedata,
3556 3560 )
3557 3561
3558 3562 if addrevisioncb:
3559 3563 addrevisioncb(self, rev)
3560 3564 empty = False
3561 3565 finally:
3562 3566 self._adding_group = False
3563 3567 return not empty
3564 3568
3565 3569 def iscensored(self, rev):
3566 3570 """Check if a file revision is censored."""
3567 3571 if not self.feature_config.censorable:
3568 3572 return False
3569 3573
3570 3574 return self.flags(rev) & REVIDX_ISCENSORED
3571 3575
3572 3576 def _peek_iscensored(self, baserev, delta):
3573 3577 """Quickly check if a delta produces a censored revision."""
3574 3578 if not self.feature_config.censorable:
3575 3579 return False
3576 3580
3577 3581 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3578 3582
3579 3583 def getstrippoint(self, minlink):
3580 3584 """find the minimum rev that must be stripped to strip the linkrev
3581 3585
3582 3586 Returns a tuple containing the minimum rev and a set of all revs that
3583 3587 have linkrevs that will be broken by this strip.
3584 3588 """
3585 3589 return storageutil.resolvestripinfo(
3586 3590 minlink,
3587 3591 len(self) - 1,
3588 3592 self.headrevs(),
3589 3593 self.linkrev,
3590 3594 self.parentrevs,
3591 3595 )
3592 3596
3593 3597 def strip(self, minlink, transaction):
3594 3598 """truncate the revlog on the first revision with a linkrev >= minlink
3595 3599
3596 3600 This function is called when we're stripping revision minlink and
3597 3601 its descendants from the repository.
3598 3602
3599 3603 We have to remove all revisions with linkrev >= minlink, because
3600 3604 the equivalent changelog revisions will be renumbered after the
3601 3605 strip.
3602 3606
3603 3607 So we truncate the revlog on the first of these revisions, and
3604 3608 trust that the caller has saved the revisions that shouldn't be
3605 3609 removed and that it'll re-add them after this truncation.
3606 3610 """
3607 3611 if len(self) == 0:
3608 3612 return
3609 3613
3610 3614 rev, _ = self.getstrippoint(minlink)
3611 3615 if rev == len(self):
3612 3616 return
3613 3617
3614 3618 # first truncate the files on disk
3615 3619 data_end = self.start(rev)
3616 3620 if not self._inline:
3617 3621 transaction.add(self._datafile, data_end)
3618 3622 end = rev * self.index.entry_size
3619 3623 else:
3620 3624 end = data_end + (rev * self.index.entry_size)
3621 3625
3622 3626 if self._sidedatafile:
3623 3627 sidedata_end = self.sidedata_cut_off(rev)
3624 3628 transaction.add(self._sidedatafile, sidedata_end)
3625 3629
3626 3630 transaction.add(self._indexfile, end)
3627 3631 if self._docket is not None:
3628 3632 # XXX we could, leverage the docket while stripping. However it is
3629 3633 # not powerfull enough at the time of this comment
3630 3634 self._docket.index_end = end
3631 3635 self._docket.data_end = data_end
3632 3636 self._docket.sidedata_end = sidedata_end
3633 3637 self._docket.write(transaction, stripping=True)
3634 3638
3635 3639 # then reset internal state in memory to forget those revisions
3636 3640 self._chaininfocache = util.lrucachedict(500)
3637 3641 self._inner.clear_cache()
3638 3642
3639 3643 del self.index[rev:-1]
3640 3644
3641 3645 def checksize(self):
3642 3646 """Check size of index and data files
3643 3647
3644 3648 return a (dd, di) tuple.
3645 3649 - dd: extra bytes for the "data" file
3646 3650 - di: extra bytes for the "index" file
3647 3651
3648 3652 A healthy revlog will return (0, 0).
3649 3653 """
3650 3654 expected = 0
3651 3655 if len(self):
3652 3656 expected = max(0, self.end(len(self) - 1))
3653 3657
3654 3658 try:
3655 3659 with self._datafp() as f:
3656 3660 f.seek(0, io.SEEK_END)
3657 3661 actual = f.tell()
3658 3662 dd = actual - expected
3659 3663 except FileNotFoundError:
3660 3664 dd = 0
3661 3665
3662 3666 try:
3663 3667 f = self.opener(self._indexfile)
3664 3668 f.seek(0, io.SEEK_END)
3665 3669 actual = f.tell()
3666 3670 f.close()
3667 3671 s = self.index.entry_size
3668 3672 i = max(0, actual // s)
3669 3673 di = actual - (i * s)
3670 3674 if self._inline:
3671 3675 databytes = 0
3672 3676 for r in self:
3673 3677 databytes += max(0, self.length(r))
3674 3678 dd = 0
3675 3679 di = actual - len(self) * s - databytes
3676 3680 except FileNotFoundError:
3677 3681 di = 0
3678 3682
3679 3683 return (dd, di)
3680 3684
3681 3685 def files(self):
3682 3686 """return list of files that compose this revlog"""
3683 3687 res = [self._indexfile]
3684 3688 if self._docket_file is None:
3685 3689 if not self._inline:
3686 3690 res.append(self._datafile)
3687 3691 else:
3688 3692 res.append(self._docket_file)
3689 3693 res.extend(self._docket.old_index_filepaths(include_empty=False))
3690 3694 if self._docket.data_end:
3691 3695 res.append(self._datafile)
3692 3696 res.extend(self._docket.old_data_filepaths(include_empty=False))
3693 3697 if self._docket.sidedata_end:
3694 3698 res.append(self._sidedatafile)
3695 3699 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3696 3700 return res
3697 3701
3698 3702 def emitrevisions(
3699 3703 self,
3700 3704 nodes,
3701 3705 nodesorder=None,
3702 3706 revisiondata=False,
3703 3707 assumehaveparentrevisions=False,
3704 3708 deltamode=repository.CG_DELTAMODE_STD,
3705 3709 sidedata_helpers=None,
3706 3710 debug_info=None,
3707 3711 ):
3708 3712 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3709 3713 raise error.ProgrammingError(
3710 3714 b'unhandled value for nodesorder: %s' % nodesorder
3711 3715 )
3712 3716
3713 3717 if nodesorder is None and not self.delta_config.general_delta:
3714 3718 nodesorder = b'storage'
3715 3719
3716 3720 if (
3717 3721 not self._storedeltachains
3718 3722 and deltamode != repository.CG_DELTAMODE_PREV
3719 3723 ):
3720 3724 deltamode = repository.CG_DELTAMODE_FULL
3721 3725
3722 3726 return storageutil.emitrevisions(
3723 3727 self,
3724 3728 nodes,
3725 3729 nodesorder,
3726 3730 revlogrevisiondelta,
3727 3731 deltaparentfn=self.deltaparent,
3728 3732 candeltafn=self._candelta,
3729 3733 rawsizefn=self.rawsize,
3730 3734 revdifffn=self.revdiff,
3731 3735 flagsfn=self.flags,
3732 3736 deltamode=deltamode,
3733 3737 revisiondata=revisiondata,
3734 3738 assumehaveparentrevisions=assumehaveparentrevisions,
3735 3739 sidedata_helpers=sidedata_helpers,
3736 3740 debug_info=debug_info,
3737 3741 )
3738 3742
3739 3743 DELTAREUSEALWAYS = b'always'
3740 3744 DELTAREUSESAMEREVS = b'samerevs'
3741 3745 DELTAREUSENEVER = b'never'
3742 3746
3743 3747 DELTAREUSEFULLADD = b'fulladd'
3744 3748
3745 3749 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3746 3750
3747 3751 def clone(
3748 3752 self,
3749 3753 tr,
3750 3754 destrevlog,
3751 3755 addrevisioncb=None,
3752 3756 deltareuse=DELTAREUSESAMEREVS,
3753 3757 forcedeltabothparents=None,
3754 3758 sidedata_helpers=None,
3755 3759 ):
3756 3760 """Copy this revlog to another, possibly with format changes.
3757 3761
3758 3762 The destination revlog will contain the same revisions and nodes.
3759 3763 However, it may not be bit-for-bit identical due to e.g. delta encoding
3760 3764 differences.
3761 3765
3762 3766 The ``deltareuse`` argument control how deltas from the existing revlog
3763 3767 are preserved in the destination revlog. The argument can have the
3764 3768 following values:
3765 3769
3766 3770 DELTAREUSEALWAYS
3767 3771 Deltas will always be reused (if possible), even if the destination
3768 3772 revlog would not select the same revisions for the delta. This is the
3769 3773 fastest mode of operation.
3770 3774 DELTAREUSESAMEREVS
3771 3775 Deltas will be reused if the destination revlog would pick the same
3772 3776 revisions for the delta. This mode strikes a balance between speed
3773 3777 and optimization.
3774 3778 DELTAREUSENEVER
3775 3779 Deltas will never be reused. This is the slowest mode of execution.
3776 3780 This mode can be used to recompute deltas (e.g. if the diff/delta
3777 3781 algorithm changes).
3778 3782 DELTAREUSEFULLADD
3779 3783 Revision will be re-added as if their were new content. This is
3780 3784 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3781 3785 eg: large file detection and handling.
3782 3786
3783 3787 Delta computation can be slow, so the choice of delta reuse policy can
3784 3788 significantly affect run time.
3785 3789
3786 3790 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3787 3791 two extremes. Deltas will be reused if they are appropriate. But if the
3788 3792 delta could choose a better revision, it will do so. This means if you
3789 3793 are converting a non-generaldelta revlog to a generaldelta revlog,
3790 3794 deltas will be recomputed if the delta's parent isn't a parent of the
3791 3795 revision.
3792 3796
3793 3797 In addition to the delta policy, the ``forcedeltabothparents``
3794 3798 argument controls whether to force compute deltas against both parents
3795 3799 for merges. By default, the current default is used.
3796 3800
3797 3801 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3798 3802 `sidedata_helpers`.
3799 3803 """
3800 3804 if deltareuse not in self.DELTAREUSEALL:
3801 3805 raise ValueError(
3802 3806 _(b'value for deltareuse invalid: %s') % deltareuse
3803 3807 )
3804 3808
3805 3809 if len(destrevlog):
3806 3810 raise ValueError(_(b'destination revlog is not empty'))
3807 3811
3808 3812 if getattr(self, 'filteredrevs', None):
3809 3813 raise ValueError(_(b'source revlog has filtered revisions'))
3810 3814 if getattr(destrevlog, 'filteredrevs', None):
3811 3815 raise ValueError(_(b'destination revlog has filtered revisions'))
3812 3816
3813 3817 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3814 3818 # if possible.
3815 3819 old_delta_config = destrevlog.delta_config
3816 3820 destrevlog.delta_config = destrevlog.delta_config.copy()
3817 3821
3818 3822 try:
3819 3823 if deltareuse == self.DELTAREUSEALWAYS:
3820 3824 destrevlog.delta_config.lazy_delta_base = True
3821 3825 destrevlog.delta_config.lazy_delta = True
3822 3826 elif deltareuse == self.DELTAREUSESAMEREVS:
3823 3827 destrevlog.delta_config.lazy_delta_base = False
3824 3828 destrevlog.delta_config.lazy_delta = True
3825 3829 elif deltareuse == self.DELTAREUSENEVER:
3826 3830 destrevlog.delta_config.lazy_delta_base = False
3827 3831 destrevlog.delta_config.lazy_delta = False
3828 3832
3829 3833 delta_both_parents = (
3830 3834 forcedeltabothparents or old_delta_config.delta_both_parents
3831 3835 )
3832 3836 destrevlog.delta_config.delta_both_parents = delta_both_parents
3833 3837
3834 3838 with self.reading(), destrevlog._writing(tr):
3835 3839 self._clone(
3836 3840 tr,
3837 3841 destrevlog,
3838 3842 addrevisioncb,
3839 3843 deltareuse,
3840 3844 forcedeltabothparents,
3841 3845 sidedata_helpers,
3842 3846 )
3843 3847
3844 3848 finally:
3845 3849 destrevlog.delta_config = old_delta_config
3846 3850
3847 3851 def _clone(
3848 3852 self,
3849 3853 tr,
3850 3854 destrevlog,
3851 3855 addrevisioncb,
3852 3856 deltareuse,
3853 3857 forcedeltabothparents,
3854 3858 sidedata_helpers,
3855 3859 ):
3856 3860 """perform the core duty of `revlog.clone` after parameter processing"""
3857 3861 write_debug = None
3858 3862 if self.delta_config.debug_delta:
3859 3863 write_debug = tr._report
3860 3864 deltacomputer = deltautil.deltacomputer(
3861 3865 destrevlog,
3862 3866 write_debug=write_debug,
3863 3867 )
3864 3868 index = self.index
3865 3869 for rev in self:
3866 3870 entry = index[rev]
3867 3871
3868 3872 # Some classes override linkrev to take filtered revs into
3869 3873 # account. Use raw entry from index.
3870 3874 flags = entry[0] & 0xFFFF
3871 3875 linkrev = entry[4]
3872 3876 p1 = index[entry[5]][7]
3873 3877 p2 = index[entry[6]][7]
3874 3878 node = entry[7]
3875 3879
3876 3880 # (Possibly) reuse the delta from the revlog if allowed and
3877 3881 # the revlog chunk is a delta.
3878 3882 cachedelta = None
3879 3883 rawtext = None
3880 3884 if deltareuse == self.DELTAREUSEFULLADD:
3881 3885 text = self._revisiondata(rev)
3882 3886 sidedata = self.sidedata(rev)
3883 3887
3884 3888 if sidedata_helpers is not None:
3885 3889 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3886 3890 self, sidedata_helpers, sidedata, rev
3887 3891 )
3888 3892 flags = flags | new_flags[0] & ~new_flags[1]
3889 3893
3890 3894 destrevlog.addrevision(
3891 3895 text,
3892 3896 tr,
3893 3897 linkrev,
3894 3898 p1,
3895 3899 p2,
3896 3900 cachedelta=cachedelta,
3897 3901 node=node,
3898 3902 flags=flags,
3899 3903 deltacomputer=deltacomputer,
3900 3904 sidedata=sidedata,
3901 3905 )
3902 3906 else:
3903 3907 if destrevlog.delta_config.lazy_delta:
3904 3908 dp = self.deltaparent(rev)
3905 3909 if dp != nullrev:
3906 3910 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3907 3911
3908 3912 sidedata = None
3909 3913 if not cachedelta:
3910 3914 try:
3911 3915 rawtext = self._revisiondata(rev)
3912 3916 except error.CensoredNodeError as censored:
3913 3917 assert flags & REVIDX_ISCENSORED
3914 3918 rawtext = censored.tombstone
3915 3919 sidedata = self.sidedata(rev)
3916 3920 if sidedata is None:
3917 3921 sidedata = self.sidedata(rev)
3918 3922
3919 3923 if sidedata_helpers is not None:
3920 3924 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3921 3925 self, sidedata_helpers, sidedata, rev
3922 3926 )
3923 3927 flags = flags | new_flags[0] & ~new_flags[1]
3924 3928
3925 3929 destrevlog._addrevision(
3926 3930 node,
3927 3931 rawtext,
3928 3932 tr,
3929 3933 linkrev,
3930 3934 p1,
3931 3935 p2,
3932 3936 flags,
3933 3937 cachedelta,
3934 3938 deltacomputer=deltacomputer,
3935 3939 sidedata=sidedata,
3936 3940 )
3937 3941
3938 3942 if addrevisioncb:
3939 3943 addrevisioncb(self, rev, node)
3940 3944
3941 3945 def censorrevision(self, tr, censornode, tombstone=b''):
3942 3946 if self._format_version == REVLOGV0:
3943 3947 raise error.RevlogError(
3944 3948 _(b'cannot censor with version %d revlogs')
3945 3949 % self._format_version
3946 3950 )
3947 3951 elif self._format_version == REVLOGV1:
3948 3952 rewrite.v1_censor(self, tr, censornode, tombstone)
3949 3953 else:
3950 3954 rewrite.v2_censor(self, tr, censornode, tombstone)
3951 3955
3952 3956 def verifyintegrity(self, state):
3953 3957 """Verifies the integrity of the revlog.
3954 3958
3955 3959 Yields ``revlogproblem`` instances describing problems that are
3956 3960 found.
3957 3961 """
3958 3962 dd, di = self.checksize()
3959 3963 if dd:
3960 3964 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3961 3965 if di:
3962 3966 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3963 3967
3964 3968 version = self._format_version
3965 3969
3966 3970 # The verifier tells us what version revlog we should be.
3967 3971 if version != state[b'expectedversion']:
3968 3972 yield revlogproblem(
3969 3973 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3970 3974 % (self.display_id, version, state[b'expectedversion'])
3971 3975 )
3972 3976
3973 3977 state[b'skipread'] = set()
3974 3978 state[b'safe_renamed'] = set()
3975 3979
3976 3980 for rev in self:
3977 3981 node = self.node(rev)
3978 3982
3979 3983 # Verify contents. 4 cases to care about:
3980 3984 #
3981 3985 # common: the most common case
3982 3986 # rename: with a rename
3983 3987 # meta: file content starts with b'\1\n', the metadata
3984 3988 # header defined in filelog.py, but without a rename
3985 3989 # ext: content stored externally
3986 3990 #
3987 3991 # More formally, their differences are shown below:
3988 3992 #
3989 3993 # | common | rename | meta | ext
3990 3994 # -------------------------------------------------------
3991 3995 # flags() | 0 | 0 | 0 | not 0
3992 3996 # renamed() | False | True | False | ?
3993 3997 # rawtext[0:2]=='\1\n'| False | True | True | ?
3994 3998 #
3995 3999 # "rawtext" means the raw text stored in revlog data, which
3996 4000 # could be retrieved by "rawdata(rev)". "text"
3997 4001 # mentioned below is "revision(rev)".
3998 4002 #
3999 4003 # There are 3 different lengths stored physically:
4000 4004 # 1. L1: rawsize, stored in revlog index
4001 4005 # 2. L2: len(rawtext), stored in revlog data
4002 4006 # 3. L3: len(text), stored in revlog data if flags==0, or
4003 4007 # possibly somewhere else if flags!=0
4004 4008 #
4005 4009 # L1 should be equal to L2. L3 could be different from them.
4006 4010 # "text" may or may not affect commit hash depending on flag
4007 4011 # processors (see flagutil.addflagprocessor).
4008 4012 #
4009 4013 # | common | rename | meta | ext
4010 4014 # -------------------------------------------------
4011 4015 # rawsize() | L1 | L1 | L1 | L1
4012 4016 # size() | L1 | L2-LM | L1(*) | L1 (?)
4013 4017 # len(rawtext) | L2 | L2 | L2 | L2
4014 4018 # len(text) | L2 | L2 | L2 | L3
4015 4019 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4016 4020 #
4017 4021 # LM: length of metadata, depending on rawtext
4018 4022 # (*): not ideal, see comment in filelog.size
4019 4023 # (?): could be "- len(meta)" if the resolved content has
4020 4024 # rename metadata
4021 4025 #
4022 4026 # Checks needed to be done:
4023 4027 # 1. length check: L1 == L2, in all cases.
4024 4028 # 2. hash check: depending on flag processor, we may need to
4025 4029 # use either "text" (external), or "rawtext" (in revlog).
4026 4030
4027 4031 try:
4028 4032 skipflags = state.get(b'skipflags', 0)
4029 4033 if skipflags:
4030 4034 skipflags &= self.flags(rev)
4031 4035
4032 4036 _verify_revision(self, skipflags, state, node)
4033 4037
4034 4038 l1 = self.rawsize(rev)
4035 4039 l2 = len(self.rawdata(node))
4036 4040
4037 4041 if l1 != l2:
4038 4042 yield revlogproblem(
4039 4043 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4040 4044 node=node,
4041 4045 )
4042 4046
4043 4047 except error.CensoredNodeError:
4044 4048 if state[b'erroroncensored']:
4045 4049 yield revlogproblem(
4046 4050 error=_(b'censored file data'), node=node
4047 4051 )
4048 4052 state[b'skipread'].add(node)
4049 4053 except Exception as e:
4050 4054 yield revlogproblem(
4051 4055 error=_(b'unpacking %s: %s')
4052 4056 % (short(node), stringutil.forcebytestr(e)),
4053 4057 node=node,
4054 4058 )
4055 4059 state[b'skipread'].add(node)
4056 4060
4057 4061 def storageinfo(
4058 4062 self,
4059 4063 exclusivefiles=False,
4060 4064 sharedfiles=False,
4061 4065 revisionscount=False,
4062 4066 trackedsize=False,
4063 4067 storedsize=False,
4064 4068 ):
4065 4069 d = {}
4066 4070
4067 4071 if exclusivefiles:
4068 4072 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4069 4073 if not self._inline:
4070 4074 d[b'exclusivefiles'].append((self.opener, self._datafile))
4071 4075
4072 4076 if sharedfiles:
4073 4077 d[b'sharedfiles'] = []
4074 4078
4075 4079 if revisionscount:
4076 4080 d[b'revisionscount'] = len(self)
4077 4081
4078 4082 if trackedsize:
4079 4083 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4080 4084
4081 4085 if storedsize:
4082 4086 d[b'storedsize'] = sum(
4083 4087 self.opener.stat(path).st_size for path in self.files()
4084 4088 )
4085 4089
4086 4090 return d
4087 4091
4088 4092 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4089 4093 if not self.feature_config.has_side_data:
4090 4094 return
4091 4095 # revlog formats with sidedata support does not support inline
4092 4096 assert not self._inline
4093 4097 if not helpers[1] and not helpers[2]:
4094 4098 # Nothing to generate or remove
4095 4099 return
4096 4100
4097 4101 new_entries = []
4098 4102 # append the new sidedata
4099 4103 with self._writing(transaction):
4100 4104 ifh, dfh, sdfh = self._inner._writinghandles
4101 4105 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4102 4106
4103 4107 current_offset = sdfh.tell()
4104 4108 for rev in range(startrev, endrev + 1):
4105 4109 entry = self.index[rev]
4106 4110 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4107 4111 store=self,
4108 4112 sidedata_helpers=helpers,
4109 4113 sidedata={},
4110 4114 rev=rev,
4111 4115 )
4112 4116
4113 4117 serialized_sidedata = sidedatautil.serialize_sidedata(
4114 4118 new_sidedata
4115 4119 )
4116 4120
4117 4121 sidedata_compression_mode = COMP_MODE_INLINE
4118 4122 if serialized_sidedata and self.feature_config.has_side_data:
4119 4123 sidedata_compression_mode = COMP_MODE_PLAIN
4120 4124 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4121 4125 if (
4122 4126 h != b'u'
4123 4127 and comp_sidedata[0] != b'\0'
4124 4128 and len(comp_sidedata) < len(serialized_sidedata)
4125 4129 ):
4126 4130 assert not h
4127 4131 if (
4128 4132 comp_sidedata[0]
4129 4133 == self._docket.default_compression_header
4130 4134 ):
4131 4135 sidedata_compression_mode = COMP_MODE_DEFAULT
4132 4136 serialized_sidedata = comp_sidedata
4133 4137 else:
4134 4138 sidedata_compression_mode = COMP_MODE_INLINE
4135 4139 serialized_sidedata = comp_sidedata
4136 4140 if entry[8] != 0 or entry[9] != 0:
4137 4141 # rewriting entries that already have sidedata is not
4138 4142 # supported yet, because it introduces garbage data in the
4139 4143 # revlog.
4140 4144 msg = b"rewriting existing sidedata is not supported yet"
4141 4145 raise error.Abort(msg)
4142 4146
4143 4147 # Apply (potential) flags to add and to remove after running
4144 4148 # the sidedata helpers
4145 4149 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4146 4150 entry_update = (
4147 4151 current_offset,
4148 4152 len(serialized_sidedata),
4149 4153 new_offset_flags,
4150 4154 sidedata_compression_mode,
4151 4155 )
4152 4156
4153 4157 # the sidedata computation might have move the file cursors around
4154 4158 sdfh.seek(current_offset, os.SEEK_SET)
4155 4159 sdfh.write(serialized_sidedata)
4156 4160 new_entries.append(entry_update)
4157 4161 current_offset += len(serialized_sidedata)
4158 4162 self._docket.sidedata_end = sdfh.tell()
4159 4163
4160 4164 # rewrite the new index entries
4161 4165 ifh.seek(startrev * self.index.entry_size)
4162 4166 for i, e in enumerate(new_entries):
4163 4167 rev = startrev + i
4164 4168 self.index.replace_sidedata_info(rev, *e)
4165 4169 packed = self.index.entry_binary(rev)
4166 4170 if rev == 0 and self._docket is None:
4167 4171 header = self._format_flags | self._format_version
4168 4172 header = self.index.pack_header(header)
4169 4173 packed = header + packed
4170 4174 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now