##// END OF EJS Templates
revlog: remove legacy usage of `_censorable`...
marmoute -
r51943:0d33f4b0 default
parent child Browse files
Show More
@@ -1,3718 +1,3720 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 opts = self.opener.options
614 614
615 615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 616 new_header = CHANGELOGV2
617 617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 618 self.feature_config.compute_rank = compute_rank
619 619 elif b'revlogv2' in opts:
620 620 new_header = REVLOGV2
621 621 elif b'revlogv1' in opts:
622 622 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 623 if b'generaldelta' in opts:
624 624 new_header |= FLAG_GENERALDELTA
625 625 elif b'revlogv0' in self.opener.options:
626 626 new_header = REVLOGV0
627 627 else:
628 628 new_header = REVLOG_DEFAULT_VERSION
629 629
630 630 mmapindexthreshold = None
631 631 if self.data_config.mmap_large_index:
632 632 mmapindexthreshold = self.data_config.mmap_index_threshold
633 633 if self.feature_config.enable_ellipsis:
634 634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
635 635
636 636 # revlog v0 doesn't have flag processors
637 637 for flag, processor in opts.get(b'flagprocessors', {}).items():
638 638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
639 639
640 640 chunk_cache_size = self.data_config.chunk_cache_size
641 641 if chunk_cache_size <= 0:
642 642 raise error.RevlogError(
643 643 _(b'revlog chunk cache size %r is not greater than 0')
644 644 % chunk_cache_size
645 645 )
646 646 elif chunk_cache_size & (chunk_cache_size - 1):
647 647 raise error.RevlogError(
648 648 _(b'revlog chunk cache size %r is not a power of 2')
649 649 % chunk_cache_size
650 650 )
651 651 force_nodemap = opts.get(b'devel-force-nodemap', False)
652 652 return new_header, mmapindexthreshold, force_nodemap
653 653
654 654 def _get_data(self, filepath, mmap_threshold, size=None):
655 655 """return a file content with or without mmap
656 656
657 657 If the file is missing return the empty string"""
658 658 try:
659 659 with self.opener(filepath) as fp:
660 660 if mmap_threshold is not None:
661 661 file_size = self.opener.fstat(fp).st_size
662 662 if file_size >= mmap_threshold:
663 663 if size is not None:
664 664 # avoid potentiel mmap crash
665 665 size = min(file_size, size)
666 666 # TODO: should .close() to release resources without
667 667 # relying on Python GC
668 668 if size is None:
669 669 return util.buffer(util.mmapread(fp))
670 670 else:
671 671 return util.buffer(util.mmapread(fp, size))
672 672 if size is None:
673 673 return fp.read()
674 674 else:
675 675 return fp.read(size)
676 676 except FileNotFoundError:
677 677 return b''
678 678
679 679 def get_streams(self, max_linkrev, force_inline=False):
680 680 """return a list of streams that represent this revlog
681 681
682 682 This is used by stream-clone to do bytes to bytes copies of a repository.
683 683
684 684 This streams data for all revisions that refer to a changelog revision up
685 685 to `max_linkrev`.
686 686
687 687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
688 688
689 689 It returns is a list of three-tuple:
690 690
691 691 [
692 692 (filename, bytes_stream, stream_size),
693 693 …
694 694 ]
695 695 """
696 696 n = len(self)
697 697 index = self.index
698 698 while n > 0:
699 699 linkrev = index[n - 1][4]
700 700 if linkrev < max_linkrev:
701 701 break
702 702 # note: this loop will rarely go through multiple iterations, since
703 703 # it only traverses commits created during the current streaming
704 704 # pull operation.
705 705 #
706 706 # If this become a problem, using a binary search should cap the
707 707 # runtime of this.
708 708 n = n - 1
709 709 if n == 0:
710 710 # no data to send
711 711 return []
712 712 index_size = n * index.entry_size
713 713 data_size = self.end(n - 1)
714 714
715 715 # XXX we might have been split (or stripped) since the object
716 716 # initialization, We need to close this race too, but having a way to
717 717 # pre-open the file we feed to the revlog and never closing them before
718 718 # we are done streaming.
719 719
720 720 if self._inline:
721 721
722 722 def get_stream():
723 723 with self._indexfp() as fp:
724 724 yield None
725 725 size = index_size + data_size
726 726 if size <= 65536:
727 727 yield fp.read(size)
728 728 else:
729 729 yield from util.filechunkiter(fp, limit=size)
730 730
731 731 inline_stream = get_stream()
732 732 next(inline_stream)
733 733 return [
734 734 (self._indexfile, inline_stream, index_size + data_size),
735 735 ]
736 736 elif force_inline:
737 737
738 738 def get_stream():
739 739 with self.reading():
740 740 yield None
741 741
742 742 for rev in range(n):
743 743 idx = self.index.entry_binary(rev)
744 744 if rev == 0 and self._docket is None:
745 745 # re-inject the inline flag
746 746 header = self._format_flags
747 747 header |= self._format_version
748 748 header |= FLAG_INLINE_DATA
749 749 header = self.index.pack_header(header)
750 750 idx = header + idx
751 751 yield idx
752 752 yield self._getsegmentforrevs(rev, rev)[1]
753 753
754 754 inline_stream = get_stream()
755 755 next(inline_stream)
756 756 return [
757 757 (self._indexfile, inline_stream, index_size + data_size),
758 758 ]
759 759 else:
760 760
761 761 def get_index_stream():
762 762 with self._indexfp() as fp:
763 763 yield None
764 764 if index_size <= 65536:
765 765 yield fp.read(index_size)
766 766 else:
767 767 yield from util.filechunkiter(fp, limit=index_size)
768 768
769 769 def get_data_stream():
770 770 with self._datafp() as fp:
771 771 yield None
772 772 if data_size <= 65536:
773 773 yield fp.read(data_size)
774 774 else:
775 775 yield from util.filechunkiter(fp, limit=data_size)
776 776
777 777 index_stream = get_index_stream()
778 778 next(index_stream)
779 779 data_stream = get_data_stream()
780 780 next(data_stream)
781 781 return [
782 782 (self._datafile, data_stream, data_size),
783 783 (self._indexfile, index_stream, index_size),
784 784 ]
785 785
786 786 def _loadindex(self, docket=None):
787 787
788 788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
789 789
790 790 if self.postfix is not None:
791 791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
792 792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
793 793 entry_point = b'%s.i.a' % self.radix
794 794 elif self._try_split and self.opener.exists(self._split_index_file):
795 795 entry_point = self._split_index_file
796 796 else:
797 797 entry_point = b'%s.i' % self.radix
798 798
799 799 if docket is not None:
800 800 self._docket = docket
801 801 self._docket_file = entry_point
802 802 else:
803 803 self._initempty = True
804 804 entry_data = self._get_data(entry_point, mmapindexthreshold)
805 805 if len(entry_data) > 0:
806 806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
807 807 self._initempty = False
808 808 else:
809 809 header = new_header
810 810
811 811 self._format_flags = header & ~0xFFFF
812 812 self._format_version = header & 0xFFFF
813 813
814 814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
815 815 if supported_flags is None:
816 816 msg = _(b'unknown version (%d) in revlog %s')
817 817 msg %= (self._format_version, self.display_id)
818 818 raise error.RevlogError(msg)
819 819 elif self._format_flags & ~supported_flags:
820 820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
821 821 display_flag = self._format_flags >> 16
822 822 msg %= (display_flag, self._format_version, self.display_id)
823 823 raise error.RevlogError(msg)
824 824
825 825 features = FEATURES_BY_VERSION[self._format_version]
826 826 self._inline = features[b'inline'](self._format_flags)
827 827 self.delta_config.general_delta = features[b'generaldelta'](
828 828 self._format_flags
829 829 )
830 830 self.feature_config.has_side_data = features[b'sidedata']
831 831
832 832 if not features[b'docket']:
833 833 self._indexfile = entry_point
834 834 index_data = entry_data
835 835 else:
836 836 self._docket_file = entry_point
837 837 if self._initempty:
838 838 self._docket = docketutil.default_docket(self, header)
839 839 else:
840 840 self._docket = docketutil.parse_docket(
841 841 self, entry_data, use_pending=self._trypending
842 842 )
843 843
844 844 if self._docket is not None:
845 845 self._indexfile = self._docket.index_filepath()
846 846 index_data = b''
847 847 index_size = self._docket.index_end
848 848 if index_size > 0:
849 849 index_data = self._get_data(
850 850 self._indexfile, mmapindexthreshold, size=index_size
851 851 )
852 852 if len(index_data) < index_size:
853 853 msg = _(b'too few index data for %s: got %d, expected %d')
854 854 msg %= (self.display_id, len(index_data), index_size)
855 855 raise error.RevlogError(msg)
856 856
857 857 self._inline = False
858 858 # generaldelta implied by version 2 revlogs.
859 859 self.delta_config.general_delta = True
860 860 # the logic for persistent nodemap will be dealt with within the
861 861 # main docket, so disable it for now.
862 862 self._nodemap_file = None
863 863
864 864 if self._docket is not None:
865 865 self._datafile = self._docket.data_filepath()
866 866 self._sidedatafile = self._docket.sidedata_filepath()
867 867 elif self.postfix is None:
868 868 self._datafile = b'%s.d' % self.radix
869 869 else:
870 870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
871 871
872 872 self.nodeconstants = sha1nodeconstants
873 873 self.nullid = self.nodeconstants.nullid
874 874
875 875 # sparse-revlog can't be on without general-delta (issue6056)
876 876 if not self.delta_config.general_delta:
877 877 self.delta_config.sparse_revlog = False
878 878
879 879 self._storedeltachains = True
880 880
881 881 devel_nodemap = (
882 882 self._nodemap_file
883 883 and force_nodemap
884 884 and parse_index_v1_nodemap is not None
885 885 )
886 886
887 887 use_rust_index = False
888 888 if rustrevlog is not None:
889 889 if self._nodemap_file is not None:
890 890 use_rust_index = True
891 891 else:
892 892 use_rust_index = self.opener.options.get(b'rust.index')
893 893
894 894 self._parse_index = parse_index_v1
895 895 if self._format_version == REVLOGV0:
896 896 self._parse_index = revlogv0.parse_index_v0
897 897 elif self._format_version == REVLOGV2:
898 898 self._parse_index = parse_index_v2
899 899 elif self._format_version == CHANGELOGV2:
900 900 self._parse_index = parse_index_cl_v2
901 901 elif devel_nodemap:
902 902 self._parse_index = parse_index_v1_nodemap
903 903 elif use_rust_index:
904 904 self._parse_index = parse_index_v1_mixed
905 905 try:
906 906 d = self._parse_index(index_data, self._inline)
907 907 index, chunkcache = d
908 908 use_nodemap = (
909 909 not self._inline
910 910 and self._nodemap_file is not None
911 911 and hasattr(index, 'update_nodemap_data')
912 912 )
913 913 if use_nodemap:
914 914 nodemap_data = nodemaputil.persisted_data(self)
915 915 if nodemap_data is not None:
916 916 docket = nodemap_data[0]
917 917 if (
918 918 len(d[0]) > docket.tip_rev
919 919 and d[0][docket.tip_rev][7] == docket.tip_node
920 920 ):
921 921 # no changelog tampering
922 922 self._nodemap_docket = docket
923 923 index.update_nodemap_data(*nodemap_data)
924 924 except (ValueError, IndexError):
925 925 raise error.RevlogError(
926 926 _(b"index %s is corrupted") % self.display_id
927 927 )
928 928 self.index = index
929 929 self._segmentfile = randomaccessfile.randomaccessfile(
930 930 self.opener,
931 931 (self._indexfile if self._inline else self._datafile),
932 932 self._chunkcachesize,
933 933 chunkcache,
934 934 )
935 935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
936 936 self.opener,
937 937 self._sidedatafile,
938 938 self._chunkcachesize,
939 939 )
940 940 # revnum -> (chain-length, sum-delta-length)
941 941 self._chaininfocache = util.lrucachedict(500)
942 942 # revlog header -> revlog compressor
943 943 self._decompressors = {}
944 944
945 945 def get_revlog(self):
946 946 """simple function to mirror API of other not-really-revlog API"""
947 947 return self
948 948
949 949 @util.propertycache
950 950 def revlog_kind(self):
951 951 return self.target[0]
952 952
953 953 @util.propertycache
954 954 def display_id(self):
955 955 """The public facing "ID" of the revlog that we use in message"""
956 956 if self.revlog_kind == KIND_FILELOG:
957 957 # Reference the file without the "data/" prefix, so it is familiar
958 958 # to the user.
959 959 return self.target[1]
960 960 else:
961 961 return self.radix
962 962
963 963 def _get_decompressor(self, t):
964 964 try:
965 965 compressor = self._decompressors[t]
966 966 except KeyError:
967 967 try:
968 968 engine = util.compengines.forrevlogheader(t)
969 969 compressor = engine.revlogcompressor(self._compengineopts)
970 970 self._decompressors[t] = compressor
971 971 except KeyError:
972 972 raise error.RevlogError(
973 973 _(b'unknown compression type %s') % binascii.hexlify(t)
974 974 )
975 975 return compressor
976 976
977 977 @util.propertycache
978 978 def _compressor(self):
979 979 engine = util.compengines[self._compengine]
980 980 return engine.revlogcompressor(self._compengineopts)
981 981
982 982 @util.propertycache
983 983 def _decompressor(self):
984 984 """the default decompressor"""
985 985 if self._docket is None:
986 986 return None
987 987 t = self._docket.default_compression_header
988 988 c = self._get_decompressor(t)
989 989 return c.decompress
990 990
991 991 def _indexfp(self):
992 992 """file object for the revlog's index file"""
993 993 return self.opener(self._indexfile, mode=b"r")
994 994
995 995 def __index_write_fp(self):
996 996 # You should not use this directly and use `_writing` instead
997 997 try:
998 998 f = self.opener(
999 999 self._indexfile,
1000 1000 mode=b"r+",
1001 1001 checkambig=self.data_config.check_ambig,
1002 1002 )
1003 1003 if self._docket is None:
1004 1004 f.seek(0, os.SEEK_END)
1005 1005 else:
1006 1006 f.seek(self._docket.index_end, os.SEEK_SET)
1007 1007 return f
1008 1008 except FileNotFoundError:
1009 1009 return self.opener(
1010 1010 self._indexfile,
1011 1011 mode=b"w+",
1012 1012 checkambig=self.data_config.check_ambig,
1013 1013 )
1014 1014
1015 1015 def __index_new_fp(self):
1016 1016 # You should not use this unless you are upgrading from inline revlog
1017 1017 return self.opener(
1018 1018 self._indexfile,
1019 1019 mode=b"w",
1020 1020 checkambig=self.data_config.check_ambig,
1021 1021 atomictemp=True,
1022 1022 )
1023 1023
1024 1024 def _datafp(self, mode=b'r'):
1025 1025 """file object for the revlog's data file"""
1026 1026 return self.opener(self._datafile, mode=mode)
1027 1027
1028 1028 @contextlib.contextmanager
1029 1029 def _sidedatareadfp(self):
1030 1030 """file object suitable to read sidedata"""
1031 1031 if self._writinghandles:
1032 1032 yield self._writinghandles[2]
1033 1033 else:
1034 1034 with self.opener(self._sidedatafile) as fp:
1035 1035 yield fp
1036 1036
1037 1037 def tiprev(self):
1038 1038 return len(self.index) - 1
1039 1039
1040 1040 def tip(self):
1041 1041 return self.node(self.tiprev())
1042 1042
1043 1043 def __contains__(self, rev):
1044 1044 return 0 <= rev < len(self)
1045 1045
1046 1046 def __len__(self):
1047 1047 return len(self.index)
1048 1048
1049 1049 def __iter__(self):
1050 1050 return iter(range(len(self)))
1051 1051
1052 1052 def revs(self, start=0, stop=None):
1053 1053 """iterate over all rev in this revlog (from start to stop)"""
1054 1054 return storageutil.iterrevs(len(self), start=start, stop=stop)
1055 1055
1056 1056 def hasnode(self, node):
1057 1057 try:
1058 1058 self.rev(node)
1059 1059 return True
1060 1060 except KeyError:
1061 1061 return False
1062 1062
1063 1063 def _candelta(self, baserev, rev):
1064 1064 """whether two revisions (baserev, rev) can be delta-ed or not"""
1065 1065 # Disable delta if either rev requires a content-changing flag
1066 1066 # processor (ex. LFS). This is because such flag processor can alter
1067 1067 # the rawtext content that the delta will be based on, and two clients
1068 1068 # could have a same revlog node with different flags (i.e. different
1069 1069 # rawtext contents) and the delta could be incompatible.
1070 1070 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1071 1071 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1072 1072 ):
1073 1073 return False
1074 1074 return True
1075 1075
1076 1076 def update_caches(self, transaction):
1077 1077 """update on disk cache
1078 1078
1079 1079 If a transaction is passed, the update may be delayed to transaction
1080 1080 commit."""
1081 1081 if self._nodemap_file is not None:
1082 1082 if transaction is None:
1083 1083 nodemaputil.update_persistent_nodemap(self)
1084 1084 else:
1085 1085 nodemaputil.setup_persistent_nodemap(transaction, self)
1086 1086
1087 1087 def clearcaches(self):
1088 1088 """Clear in-memory caches"""
1089 1089 self._revisioncache = None
1090 1090 self._chainbasecache.clear()
1091 1091 self._segmentfile.clear_cache()
1092 1092 self._segmentfile_sidedata.clear_cache()
1093 1093 self._pcache = {}
1094 1094 self._nodemap_docket = None
1095 1095 self.index.clearcaches()
1096 1096 # The python code is the one responsible for validating the docket, we
1097 1097 # end up having to refresh it here.
1098 1098 use_nodemap = (
1099 1099 not self._inline
1100 1100 and self._nodemap_file is not None
1101 1101 and hasattr(self.index, 'update_nodemap_data')
1102 1102 )
1103 1103 if use_nodemap:
1104 1104 nodemap_data = nodemaputil.persisted_data(self)
1105 1105 if nodemap_data is not None:
1106 1106 self._nodemap_docket = nodemap_data[0]
1107 1107 self.index.update_nodemap_data(*nodemap_data)
1108 1108
1109 1109 def rev(self, node):
1110 1110 """return the revision number associated with a <nodeid>"""
1111 1111 try:
1112 1112 return self.index.rev(node)
1113 1113 except TypeError:
1114 1114 raise
1115 1115 except error.RevlogError:
1116 1116 # parsers.c radix tree lookup failed
1117 1117 if (
1118 1118 node == self.nodeconstants.wdirid
1119 1119 or node in self.nodeconstants.wdirfilenodeids
1120 1120 ):
1121 1121 raise error.WdirUnsupported
1122 1122 raise error.LookupError(node, self.display_id, _(b'no node'))
1123 1123
1124 1124 # Accessors for index entries.
1125 1125
1126 1126 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1127 1127 # are flags.
1128 1128 def start(self, rev):
1129 1129 return int(self.index[rev][0] >> 16)
1130 1130
1131 1131 def sidedata_cut_off(self, rev):
1132 1132 sd_cut_off = self.index[rev][8]
1133 1133 if sd_cut_off != 0:
1134 1134 return sd_cut_off
1135 1135 # This is some annoying dance, because entries without sidedata
1136 1136 # currently use 0 as their ofsset. (instead of previous-offset +
1137 1137 # previous-size)
1138 1138 #
1139 1139 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1140 1140 # In the meantime, we need this.
1141 1141 while 0 <= rev:
1142 1142 e = self.index[rev]
1143 1143 if e[9] != 0:
1144 1144 return e[8] + e[9]
1145 1145 rev -= 1
1146 1146 return 0
1147 1147
1148 1148 def flags(self, rev):
1149 1149 return self.index[rev][0] & 0xFFFF
1150 1150
1151 1151 def length(self, rev):
1152 1152 return self.index[rev][1]
1153 1153
1154 1154 def sidedata_length(self, rev):
1155 1155 if not self.hassidedata:
1156 1156 return 0
1157 1157 return self.index[rev][9]
1158 1158
1159 1159 def rawsize(self, rev):
1160 1160 """return the length of the uncompressed text for a given revision"""
1161 1161 l = self.index[rev][2]
1162 1162 if l >= 0:
1163 1163 return l
1164 1164
1165 1165 t = self.rawdata(rev)
1166 1166 return len(t)
1167 1167
1168 1168 def size(self, rev):
1169 1169 """length of non-raw text (processed by a "read" flag processor)"""
1170 1170 # fast path: if no "read" flag processor could change the content,
1171 1171 # size is rawsize. note: ELLIPSIS is known to not change the content.
1172 1172 flags = self.flags(rev)
1173 1173 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1174 1174 return self.rawsize(rev)
1175 1175
1176 1176 return len(self.revision(rev))
1177 1177
1178 1178 def fast_rank(self, rev):
1179 1179 """Return the rank of a revision if already known, or None otherwise.
1180 1180
1181 1181 The rank of a revision is the size of the sub-graph it defines as a
1182 1182 head. Equivalently, the rank of a revision `r` is the size of the set
1183 1183 `ancestors(r)`, `r` included.
1184 1184
1185 1185 This method returns the rank retrieved from the revlog in constant
1186 1186 time. It makes no attempt at computing unknown values for versions of
1187 1187 the revlog which do not persist the rank.
1188 1188 """
1189 1189 rank = self.index[rev][ENTRY_RANK]
1190 1190 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1191 1191 return None
1192 1192 if rev == nullrev:
1193 1193 return 0 # convention
1194 1194 return rank
1195 1195
1196 1196 def chainbase(self, rev):
1197 1197 base = self._chainbasecache.get(rev)
1198 1198 if base is not None:
1199 1199 return base
1200 1200
1201 1201 index = self.index
1202 1202 iterrev = rev
1203 1203 base = index[iterrev][3]
1204 1204 while base != iterrev:
1205 1205 iterrev = base
1206 1206 base = index[iterrev][3]
1207 1207
1208 1208 self._chainbasecache[rev] = base
1209 1209 return base
1210 1210
1211 1211 def linkrev(self, rev):
1212 1212 return self.index[rev][4]
1213 1213
1214 1214 def parentrevs(self, rev):
1215 1215 try:
1216 1216 entry = self.index[rev]
1217 1217 except IndexError:
1218 1218 if rev == wdirrev:
1219 1219 raise error.WdirUnsupported
1220 1220 raise
1221 1221
1222 1222 if self.canonical_parent_order and entry[5] == nullrev:
1223 1223 return entry[6], entry[5]
1224 1224 else:
1225 1225 return entry[5], entry[6]
1226 1226
1227 1227 # fast parentrevs(rev) where rev isn't filtered
1228 1228 _uncheckedparentrevs = parentrevs
1229 1229
1230 1230 def node(self, rev):
1231 1231 try:
1232 1232 return self.index[rev][7]
1233 1233 except IndexError:
1234 1234 if rev == wdirrev:
1235 1235 raise error.WdirUnsupported
1236 1236 raise
1237 1237
1238 1238 # Derived from index values.
1239 1239
1240 1240 def end(self, rev):
1241 1241 return self.start(rev) + self.length(rev)
1242 1242
1243 1243 def parents(self, node):
1244 1244 i = self.index
1245 1245 d = i[self.rev(node)]
1246 1246 # inline node() to avoid function call overhead
1247 1247 if self.canonical_parent_order and d[5] == self.nullid:
1248 1248 return i[d[6]][7], i[d[5]][7]
1249 1249 else:
1250 1250 return i[d[5]][7], i[d[6]][7]
1251 1251
1252 1252 def chainlen(self, rev):
1253 1253 return self._chaininfo(rev)[0]
1254 1254
1255 1255 def _chaininfo(self, rev):
1256 1256 chaininfocache = self._chaininfocache
1257 1257 if rev in chaininfocache:
1258 1258 return chaininfocache[rev]
1259 1259 index = self.index
1260 1260 generaldelta = self.delta_config.general_delta
1261 1261 iterrev = rev
1262 1262 e = index[iterrev]
1263 1263 clen = 0
1264 1264 compresseddeltalen = 0
1265 1265 while iterrev != e[3]:
1266 1266 clen += 1
1267 1267 compresseddeltalen += e[1]
1268 1268 if generaldelta:
1269 1269 iterrev = e[3]
1270 1270 else:
1271 1271 iterrev -= 1
1272 1272 if iterrev in chaininfocache:
1273 1273 t = chaininfocache[iterrev]
1274 1274 clen += t[0]
1275 1275 compresseddeltalen += t[1]
1276 1276 break
1277 1277 e = index[iterrev]
1278 1278 else:
1279 1279 # Add text length of base since decompressing that also takes
1280 1280 # work. For cache hits the length is already included.
1281 1281 compresseddeltalen += e[1]
1282 1282 r = (clen, compresseddeltalen)
1283 1283 chaininfocache[rev] = r
1284 1284 return r
1285 1285
1286 1286 def _deltachain(self, rev, stoprev=None):
1287 1287 """Obtain the delta chain for a revision.
1288 1288
1289 1289 ``stoprev`` specifies a revision to stop at. If not specified, we
1290 1290 stop at the base of the chain.
1291 1291
1292 1292 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1293 1293 revs in ascending order and ``stopped`` is a bool indicating whether
1294 1294 ``stoprev`` was hit.
1295 1295 """
1296 1296 generaldelta = self.delta_config.general_delta
1297 1297 # Try C implementation.
1298 1298 try:
1299 1299 return self.index.deltachain(rev, stoprev, generaldelta)
1300 1300 except AttributeError:
1301 1301 pass
1302 1302
1303 1303 chain = []
1304 1304
1305 1305 # Alias to prevent attribute lookup in tight loop.
1306 1306 index = self.index
1307 1307
1308 1308 iterrev = rev
1309 1309 e = index[iterrev]
1310 1310 while iterrev != e[3] and iterrev != stoprev:
1311 1311 chain.append(iterrev)
1312 1312 if generaldelta:
1313 1313 iterrev = e[3]
1314 1314 else:
1315 1315 iterrev -= 1
1316 1316 e = index[iterrev]
1317 1317
1318 1318 if iterrev == stoprev:
1319 1319 stopped = True
1320 1320 else:
1321 1321 chain.append(iterrev)
1322 1322 stopped = False
1323 1323
1324 1324 chain.reverse()
1325 1325 return chain, stopped
1326 1326
1327 1327 def ancestors(self, revs, stoprev=0, inclusive=False):
1328 1328 """Generate the ancestors of 'revs' in reverse revision order.
1329 1329 Does not generate revs lower than stoprev.
1330 1330
1331 1331 See the documentation for ancestor.lazyancestors for more details."""
1332 1332
1333 1333 # first, make sure start revisions aren't filtered
1334 1334 revs = list(revs)
1335 1335 checkrev = self.node
1336 1336 for r in revs:
1337 1337 checkrev(r)
1338 1338 # and we're sure ancestors aren't filtered as well
1339 1339
1340 1340 if rustancestor is not None and self.index.rust_ext_compat:
1341 1341 lazyancestors = rustancestor.LazyAncestors
1342 1342 arg = self.index
1343 1343 else:
1344 1344 lazyancestors = ancestor.lazyancestors
1345 1345 arg = self._uncheckedparentrevs
1346 1346 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1347 1347
1348 1348 def descendants(self, revs):
1349 1349 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1350 1350
1351 1351 def findcommonmissing(self, common=None, heads=None):
1352 1352 """Return a tuple of the ancestors of common and the ancestors of heads
1353 1353 that are not ancestors of common. In revset terminology, we return the
1354 1354 tuple:
1355 1355
1356 1356 ::common, (::heads) - (::common)
1357 1357
1358 1358 The list is sorted by revision number, meaning it is
1359 1359 topologically sorted.
1360 1360
1361 1361 'heads' and 'common' are both lists of node IDs. If heads is
1362 1362 not supplied, uses all of the revlog's heads. If common is not
1363 1363 supplied, uses nullid."""
1364 1364 if common is None:
1365 1365 common = [self.nullid]
1366 1366 if heads is None:
1367 1367 heads = self.heads()
1368 1368
1369 1369 common = [self.rev(n) for n in common]
1370 1370 heads = [self.rev(n) for n in heads]
1371 1371
1372 1372 # we want the ancestors, but inclusive
1373 1373 class lazyset:
1374 1374 def __init__(self, lazyvalues):
1375 1375 self.addedvalues = set()
1376 1376 self.lazyvalues = lazyvalues
1377 1377
1378 1378 def __contains__(self, value):
1379 1379 return value in self.addedvalues or value in self.lazyvalues
1380 1380
1381 1381 def __iter__(self):
1382 1382 added = self.addedvalues
1383 1383 for r in added:
1384 1384 yield r
1385 1385 for r in self.lazyvalues:
1386 1386 if not r in added:
1387 1387 yield r
1388 1388
1389 1389 def add(self, value):
1390 1390 self.addedvalues.add(value)
1391 1391
1392 1392 def update(self, values):
1393 1393 self.addedvalues.update(values)
1394 1394
1395 1395 has = lazyset(self.ancestors(common))
1396 1396 has.add(nullrev)
1397 1397 has.update(common)
1398 1398
1399 1399 # take all ancestors from heads that aren't in has
1400 1400 missing = set()
1401 1401 visit = collections.deque(r for r in heads if r not in has)
1402 1402 while visit:
1403 1403 r = visit.popleft()
1404 1404 if r in missing:
1405 1405 continue
1406 1406 else:
1407 1407 missing.add(r)
1408 1408 for p in self.parentrevs(r):
1409 1409 if p not in has:
1410 1410 visit.append(p)
1411 1411 missing = list(missing)
1412 1412 missing.sort()
1413 1413 return has, [self.node(miss) for miss in missing]
1414 1414
1415 1415 def incrementalmissingrevs(self, common=None):
1416 1416 """Return an object that can be used to incrementally compute the
1417 1417 revision numbers of the ancestors of arbitrary sets that are not
1418 1418 ancestors of common. This is an ancestor.incrementalmissingancestors
1419 1419 object.
1420 1420
1421 1421 'common' is a list of revision numbers. If common is not supplied, uses
1422 1422 nullrev.
1423 1423 """
1424 1424 if common is None:
1425 1425 common = [nullrev]
1426 1426
1427 1427 if rustancestor is not None and self.index.rust_ext_compat:
1428 1428 return rustancestor.MissingAncestors(self.index, common)
1429 1429 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1430 1430
1431 1431 def findmissingrevs(self, common=None, heads=None):
1432 1432 """Return the revision numbers of the ancestors of heads that
1433 1433 are not ancestors of common.
1434 1434
1435 1435 More specifically, return a list of revision numbers corresponding to
1436 1436 nodes N such that every N satisfies the following constraints:
1437 1437
1438 1438 1. N is an ancestor of some node in 'heads'
1439 1439 2. N is not an ancestor of any node in 'common'
1440 1440
1441 1441 The list is sorted by revision number, meaning it is
1442 1442 topologically sorted.
1443 1443
1444 1444 'heads' and 'common' are both lists of revision numbers. If heads is
1445 1445 not supplied, uses all of the revlog's heads. If common is not
1446 1446 supplied, uses nullid."""
1447 1447 if common is None:
1448 1448 common = [nullrev]
1449 1449 if heads is None:
1450 1450 heads = self.headrevs()
1451 1451
1452 1452 inc = self.incrementalmissingrevs(common=common)
1453 1453 return inc.missingancestors(heads)
1454 1454
1455 1455 def findmissing(self, common=None, heads=None):
1456 1456 """Return the ancestors of heads that are not ancestors of common.
1457 1457
1458 1458 More specifically, return a list of nodes N such that every N
1459 1459 satisfies the following constraints:
1460 1460
1461 1461 1. N is an ancestor of some node in 'heads'
1462 1462 2. N is not an ancestor of any node in 'common'
1463 1463
1464 1464 The list is sorted by revision number, meaning it is
1465 1465 topologically sorted.
1466 1466
1467 1467 'heads' and 'common' are both lists of node IDs. If heads is
1468 1468 not supplied, uses all of the revlog's heads. If common is not
1469 1469 supplied, uses nullid."""
1470 1470 if common is None:
1471 1471 common = [self.nullid]
1472 1472 if heads is None:
1473 1473 heads = self.heads()
1474 1474
1475 1475 common = [self.rev(n) for n in common]
1476 1476 heads = [self.rev(n) for n in heads]
1477 1477
1478 1478 inc = self.incrementalmissingrevs(common=common)
1479 1479 return [self.node(r) for r in inc.missingancestors(heads)]
1480 1480
1481 1481 def nodesbetween(self, roots=None, heads=None):
1482 1482 """Return a topological path from 'roots' to 'heads'.
1483 1483
1484 1484 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1485 1485 topologically sorted list of all nodes N that satisfy both of
1486 1486 these constraints:
1487 1487
1488 1488 1. N is a descendant of some node in 'roots'
1489 1489 2. N is an ancestor of some node in 'heads'
1490 1490
1491 1491 Every node is considered to be both a descendant and an ancestor
1492 1492 of itself, so every reachable node in 'roots' and 'heads' will be
1493 1493 included in 'nodes'.
1494 1494
1495 1495 'outroots' is the list of reachable nodes in 'roots', i.e., the
1496 1496 subset of 'roots' that is returned in 'nodes'. Likewise,
1497 1497 'outheads' is the subset of 'heads' that is also in 'nodes'.
1498 1498
1499 1499 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1500 1500 unspecified, uses nullid as the only root. If 'heads' is
1501 1501 unspecified, uses list of all of the revlog's heads."""
1502 1502 nonodes = ([], [], [])
1503 1503 if roots is not None:
1504 1504 roots = list(roots)
1505 1505 if not roots:
1506 1506 return nonodes
1507 1507 lowestrev = min([self.rev(n) for n in roots])
1508 1508 else:
1509 1509 roots = [self.nullid] # Everybody's a descendant of nullid
1510 1510 lowestrev = nullrev
1511 1511 if (lowestrev == nullrev) and (heads is None):
1512 1512 # We want _all_ the nodes!
1513 1513 return (
1514 1514 [self.node(r) for r in self],
1515 1515 [self.nullid],
1516 1516 list(self.heads()),
1517 1517 )
1518 1518 if heads is None:
1519 1519 # All nodes are ancestors, so the latest ancestor is the last
1520 1520 # node.
1521 1521 highestrev = len(self) - 1
1522 1522 # Set ancestors to None to signal that every node is an ancestor.
1523 1523 ancestors = None
1524 1524 # Set heads to an empty dictionary for later discovery of heads
1525 1525 heads = {}
1526 1526 else:
1527 1527 heads = list(heads)
1528 1528 if not heads:
1529 1529 return nonodes
1530 1530 ancestors = set()
1531 1531 # Turn heads into a dictionary so we can remove 'fake' heads.
1532 1532 # Also, later we will be using it to filter out the heads we can't
1533 1533 # find from roots.
1534 1534 heads = dict.fromkeys(heads, False)
1535 1535 # Start at the top and keep marking parents until we're done.
1536 1536 nodestotag = set(heads)
1537 1537 # Remember where the top was so we can use it as a limit later.
1538 1538 highestrev = max([self.rev(n) for n in nodestotag])
1539 1539 while nodestotag:
1540 1540 # grab a node to tag
1541 1541 n = nodestotag.pop()
1542 1542 # Never tag nullid
1543 1543 if n == self.nullid:
1544 1544 continue
1545 1545 # A node's revision number represents its place in a
1546 1546 # topologically sorted list of nodes.
1547 1547 r = self.rev(n)
1548 1548 if r >= lowestrev:
1549 1549 if n not in ancestors:
1550 1550 # If we are possibly a descendant of one of the roots
1551 1551 # and we haven't already been marked as an ancestor
1552 1552 ancestors.add(n) # Mark as ancestor
1553 1553 # Add non-nullid parents to list of nodes to tag.
1554 1554 nodestotag.update(
1555 1555 [p for p in self.parents(n) if p != self.nullid]
1556 1556 )
1557 1557 elif n in heads: # We've seen it before, is it a fake head?
1558 1558 # So it is, real heads should not be the ancestors of
1559 1559 # any other heads.
1560 1560 heads.pop(n)
1561 1561 if not ancestors:
1562 1562 return nonodes
1563 1563 # Now that we have our set of ancestors, we want to remove any
1564 1564 # roots that are not ancestors.
1565 1565
1566 1566 # If one of the roots was nullid, everything is included anyway.
1567 1567 if lowestrev > nullrev:
1568 1568 # But, since we weren't, let's recompute the lowest rev to not
1569 1569 # include roots that aren't ancestors.
1570 1570
1571 1571 # Filter out roots that aren't ancestors of heads
1572 1572 roots = [root for root in roots if root in ancestors]
1573 1573 # Recompute the lowest revision
1574 1574 if roots:
1575 1575 lowestrev = min([self.rev(root) for root in roots])
1576 1576 else:
1577 1577 # No more roots? Return empty list
1578 1578 return nonodes
1579 1579 else:
1580 1580 # We are descending from nullid, and don't need to care about
1581 1581 # any other roots.
1582 1582 lowestrev = nullrev
1583 1583 roots = [self.nullid]
1584 1584 # Transform our roots list into a set.
1585 1585 descendants = set(roots)
1586 1586 # Also, keep the original roots so we can filter out roots that aren't
1587 1587 # 'real' roots (i.e. are descended from other roots).
1588 1588 roots = descendants.copy()
1589 1589 # Our topologically sorted list of output nodes.
1590 1590 orderedout = []
1591 1591 # Don't start at nullid since we don't want nullid in our output list,
1592 1592 # and if nullid shows up in descendants, empty parents will look like
1593 1593 # they're descendants.
1594 1594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1595 1595 n = self.node(r)
1596 1596 isdescendant = False
1597 1597 if lowestrev == nullrev: # Everybody is a descendant of nullid
1598 1598 isdescendant = True
1599 1599 elif n in descendants:
1600 1600 # n is already a descendant
1601 1601 isdescendant = True
1602 1602 # This check only needs to be done here because all the roots
1603 1603 # will start being marked is descendants before the loop.
1604 1604 if n in roots:
1605 1605 # If n was a root, check if it's a 'real' root.
1606 1606 p = tuple(self.parents(n))
1607 1607 # If any of its parents are descendants, it's not a root.
1608 1608 if (p[0] in descendants) or (p[1] in descendants):
1609 1609 roots.remove(n)
1610 1610 else:
1611 1611 p = tuple(self.parents(n))
1612 1612 # A node is a descendant if either of its parents are
1613 1613 # descendants. (We seeded the dependents list with the roots
1614 1614 # up there, remember?)
1615 1615 if (p[0] in descendants) or (p[1] in descendants):
1616 1616 descendants.add(n)
1617 1617 isdescendant = True
1618 1618 if isdescendant and ((ancestors is None) or (n in ancestors)):
1619 1619 # Only include nodes that are both descendants and ancestors.
1620 1620 orderedout.append(n)
1621 1621 if (ancestors is not None) and (n in heads):
1622 1622 # We're trying to figure out which heads are reachable
1623 1623 # from roots.
1624 1624 # Mark this head as having been reached
1625 1625 heads[n] = True
1626 1626 elif ancestors is None:
1627 1627 # Otherwise, we're trying to discover the heads.
1628 1628 # Assume this is a head because if it isn't, the next step
1629 1629 # will eventually remove it.
1630 1630 heads[n] = True
1631 1631 # But, obviously its parents aren't.
1632 1632 for p in self.parents(n):
1633 1633 heads.pop(p, None)
1634 1634 heads = [head for head, flag in heads.items() if flag]
1635 1635 roots = list(roots)
1636 1636 assert orderedout
1637 1637 assert roots
1638 1638 assert heads
1639 1639 return (orderedout, roots, heads)
1640 1640
1641 1641 def headrevs(self, revs=None):
1642 1642 if revs is None:
1643 1643 try:
1644 1644 return self.index.headrevs()
1645 1645 except AttributeError:
1646 1646 return self._headrevs()
1647 1647 if rustdagop is not None and self.index.rust_ext_compat:
1648 1648 return rustdagop.headrevs(self.index, revs)
1649 1649 return dagop.headrevs(revs, self._uncheckedparentrevs)
1650 1650
1651 1651 def computephases(self, roots):
1652 1652 return self.index.computephasesmapsets(roots)
1653 1653
1654 1654 def _headrevs(self):
1655 1655 count = len(self)
1656 1656 if not count:
1657 1657 return [nullrev]
1658 1658 # we won't iter over filtered rev so nobody is a head at start
1659 1659 ishead = [0] * (count + 1)
1660 1660 index = self.index
1661 1661 for r in self:
1662 1662 ishead[r] = 1 # I may be an head
1663 1663 e = index[r]
1664 1664 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1665 1665 return [r for r, val in enumerate(ishead) if val]
1666 1666
1667 1667 def heads(self, start=None, stop=None):
1668 1668 """return the list of all nodes that have no children
1669 1669
1670 1670 if start is specified, only heads that are descendants of
1671 1671 start will be returned
1672 1672 if stop is specified, it will consider all the revs from stop
1673 1673 as if they had no children
1674 1674 """
1675 1675 if start is None and stop is None:
1676 1676 if not len(self):
1677 1677 return [self.nullid]
1678 1678 return [self.node(r) for r in self.headrevs()]
1679 1679
1680 1680 if start is None:
1681 1681 start = nullrev
1682 1682 else:
1683 1683 start = self.rev(start)
1684 1684
1685 1685 stoprevs = {self.rev(n) for n in stop or []}
1686 1686
1687 1687 revs = dagop.headrevssubset(
1688 1688 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1689 1689 )
1690 1690
1691 1691 return [self.node(rev) for rev in revs]
1692 1692
1693 1693 def children(self, node):
1694 1694 """find the children of a given node"""
1695 1695 c = []
1696 1696 p = self.rev(node)
1697 1697 for r in self.revs(start=p + 1):
1698 1698 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1699 1699 if prevs:
1700 1700 for pr in prevs:
1701 1701 if pr == p:
1702 1702 c.append(self.node(r))
1703 1703 elif p == nullrev:
1704 1704 c.append(self.node(r))
1705 1705 return c
1706 1706
1707 1707 def commonancestorsheads(self, a, b):
1708 1708 """calculate all the heads of the common ancestors of nodes a and b"""
1709 1709 a, b = self.rev(a), self.rev(b)
1710 1710 ancs = self._commonancestorsheads(a, b)
1711 1711 return pycompat.maplist(self.node, ancs)
1712 1712
1713 1713 def _commonancestorsheads(self, *revs):
1714 1714 """calculate all the heads of the common ancestors of revs"""
1715 1715 try:
1716 1716 ancs = self.index.commonancestorsheads(*revs)
1717 1717 except (AttributeError, OverflowError): # C implementation failed
1718 1718 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1719 1719 return ancs
1720 1720
1721 1721 def isancestor(self, a, b):
1722 1722 """return True if node a is an ancestor of node b
1723 1723
1724 1724 A revision is considered an ancestor of itself."""
1725 1725 a, b = self.rev(a), self.rev(b)
1726 1726 return self.isancestorrev(a, b)
1727 1727
1728 1728 def isancestorrev(self, a, b):
1729 1729 """return True if revision a is an ancestor of revision b
1730 1730
1731 1731 A revision is considered an ancestor of itself.
1732 1732
1733 1733 The implementation of this is trivial but the use of
1734 1734 reachableroots is not."""
1735 1735 if a == nullrev:
1736 1736 return True
1737 1737 elif a == b:
1738 1738 return True
1739 1739 elif a > b:
1740 1740 return False
1741 1741 return bool(self.reachableroots(a, [b], [a], includepath=False))
1742 1742
1743 1743 def reachableroots(self, minroot, heads, roots, includepath=False):
1744 1744 """return (heads(::(<roots> and <roots>::<heads>)))
1745 1745
1746 1746 If includepath is True, return (<roots>::<heads>)."""
1747 1747 try:
1748 1748 return self.index.reachableroots2(
1749 1749 minroot, heads, roots, includepath
1750 1750 )
1751 1751 except AttributeError:
1752 1752 return dagop._reachablerootspure(
1753 1753 self.parentrevs, minroot, roots, heads, includepath
1754 1754 )
1755 1755
1756 1756 def ancestor(self, a, b):
1757 1757 """calculate the "best" common ancestor of nodes a and b"""
1758 1758
1759 1759 a, b = self.rev(a), self.rev(b)
1760 1760 try:
1761 1761 ancs = self.index.ancestors(a, b)
1762 1762 except (AttributeError, OverflowError):
1763 1763 ancs = ancestor.ancestors(self.parentrevs, a, b)
1764 1764 if ancs:
1765 1765 # choose a consistent winner when there's a tie
1766 1766 return min(map(self.node, ancs))
1767 1767 return self.nullid
1768 1768
1769 1769 def _match(self, id):
1770 1770 if isinstance(id, int):
1771 1771 # rev
1772 1772 return self.node(id)
1773 1773 if len(id) == self.nodeconstants.nodelen:
1774 1774 # possibly a binary node
1775 1775 # odds of a binary node being all hex in ASCII are 1 in 10**25
1776 1776 try:
1777 1777 node = id
1778 1778 self.rev(node) # quick search the index
1779 1779 return node
1780 1780 except error.LookupError:
1781 1781 pass # may be partial hex id
1782 1782 try:
1783 1783 # str(rev)
1784 1784 rev = int(id)
1785 1785 if b"%d" % rev != id:
1786 1786 raise ValueError
1787 1787 if rev < 0:
1788 1788 rev = len(self) + rev
1789 1789 if rev < 0 or rev >= len(self):
1790 1790 raise ValueError
1791 1791 return self.node(rev)
1792 1792 except (ValueError, OverflowError):
1793 1793 pass
1794 1794 if len(id) == 2 * self.nodeconstants.nodelen:
1795 1795 try:
1796 1796 # a full hex nodeid?
1797 1797 node = bin(id)
1798 1798 self.rev(node)
1799 1799 return node
1800 1800 except (binascii.Error, error.LookupError):
1801 1801 pass
1802 1802
1803 1803 def _partialmatch(self, id):
1804 1804 # we don't care wdirfilenodeids as they should be always full hash
1805 1805 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1806 1806 ambiguous = False
1807 1807 try:
1808 1808 partial = self.index.partialmatch(id)
1809 1809 if partial and self.hasnode(partial):
1810 1810 if maybewdir:
1811 1811 # single 'ff...' match in radix tree, ambiguous with wdir
1812 1812 ambiguous = True
1813 1813 else:
1814 1814 return partial
1815 1815 elif maybewdir:
1816 1816 # no 'ff...' match in radix tree, wdir identified
1817 1817 raise error.WdirUnsupported
1818 1818 else:
1819 1819 return None
1820 1820 except error.RevlogError:
1821 1821 # parsers.c radix tree lookup gave multiple matches
1822 1822 # fast path: for unfiltered changelog, radix tree is accurate
1823 1823 if not getattr(self, 'filteredrevs', None):
1824 1824 ambiguous = True
1825 1825 # fall through to slow path that filters hidden revisions
1826 1826 except (AttributeError, ValueError):
1827 1827 # we are pure python, or key is not hex
1828 1828 pass
1829 1829 if ambiguous:
1830 1830 raise error.AmbiguousPrefixLookupError(
1831 1831 id, self.display_id, _(b'ambiguous identifier')
1832 1832 )
1833 1833
1834 1834 if id in self._pcache:
1835 1835 return self._pcache[id]
1836 1836
1837 1837 if len(id) <= 40:
1838 1838 # hex(node)[:...]
1839 1839 l = len(id) // 2 * 2 # grab an even number of digits
1840 1840 try:
1841 1841 # we're dropping the last digit, so let's check that it's hex,
1842 1842 # to avoid the expensive computation below if it's not
1843 1843 if len(id) % 2 > 0:
1844 1844 if not (id[-1] in hexdigits):
1845 1845 return None
1846 1846 prefix = bin(id[:l])
1847 1847 except binascii.Error:
1848 1848 pass
1849 1849 else:
1850 1850 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1851 1851 nl = [
1852 1852 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1853 1853 ]
1854 1854 if self.nodeconstants.nullhex.startswith(id):
1855 1855 nl.append(self.nullid)
1856 1856 if len(nl) > 0:
1857 1857 if len(nl) == 1 and not maybewdir:
1858 1858 self._pcache[id] = nl[0]
1859 1859 return nl[0]
1860 1860 raise error.AmbiguousPrefixLookupError(
1861 1861 id, self.display_id, _(b'ambiguous identifier')
1862 1862 )
1863 1863 if maybewdir:
1864 1864 raise error.WdirUnsupported
1865 1865 return None
1866 1866
1867 1867 def lookup(self, id):
1868 1868 """locate a node based on:
1869 1869 - revision number or str(revision number)
1870 1870 - nodeid or subset of hex nodeid
1871 1871 """
1872 1872 n = self._match(id)
1873 1873 if n is not None:
1874 1874 return n
1875 1875 n = self._partialmatch(id)
1876 1876 if n:
1877 1877 return n
1878 1878
1879 1879 raise error.LookupError(id, self.display_id, _(b'no match found'))
1880 1880
1881 1881 def shortest(self, node, minlength=1):
1882 1882 """Find the shortest unambiguous prefix that matches node."""
1883 1883
1884 1884 def isvalid(prefix):
1885 1885 try:
1886 1886 matchednode = self._partialmatch(prefix)
1887 1887 except error.AmbiguousPrefixLookupError:
1888 1888 return False
1889 1889 except error.WdirUnsupported:
1890 1890 # single 'ff...' match
1891 1891 return True
1892 1892 if matchednode is None:
1893 1893 raise error.LookupError(node, self.display_id, _(b'no node'))
1894 1894 return True
1895 1895
1896 1896 def maybewdir(prefix):
1897 1897 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1898 1898
1899 1899 hexnode = hex(node)
1900 1900
1901 1901 def disambiguate(hexnode, minlength):
1902 1902 """Disambiguate against wdirid."""
1903 1903 for length in range(minlength, len(hexnode) + 1):
1904 1904 prefix = hexnode[:length]
1905 1905 if not maybewdir(prefix):
1906 1906 return prefix
1907 1907
1908 1908 if not getattr(self, 'filteredrevs', None):
1909 1909 try:
1910 1910 length = max(self.index.shortest(node), minlength)
1911 1911 return disambiguate(hexnode, length)
1912 1912 except error.RevlogError:
1913 1913 if node != self.nodeconstants.wdirid:
1914 1914 raise error.LookupError(
1915 1915 node, self.display_id, _(b'no node')
1916 1916 )
1917 1917 except AttributeError:
1918 1918 # Fall through to pure code
1919 1919 pass
1920 1920
1921 1921 if node == self.nodeconstants.wdirid:
1922 1922 for length in range(minlength, len(hexnode) + 1):
1923 1923 prefix = hexnode[:length]
1924 1924 if isvalid(prefix):
1925 1925 return prefix
1926 1926
1927 1927 for length in range(minlength, len(hexnode) + 1):
1928 1928 prefix = hexnode[:length]
1929 1929 if isvalid(prefix):
1930 1930 return disambiguate(hexnode, length)
1931 1931
1932 1932 def cmp(self, node, text):
1933 1933 """compare text with a given file revision
1934 1934
1935 1935 returns True if text is different than what is stored.
1936 1936 """
1937 1937 p1, p2 = self.parents(node)
1938 1938 return storageutil.hashrevisionsha1(text, p1, p2) != node
1939 1939
1940 1940 def _getsegmentforrevs(self, startrev, endrev):
1941 1941 """Obtain a segment of raw data corresponding to a range of revisions.
1942 1942
1943 1943 Accepts the start and end revisions and an optional already-open
1944 1944 file handle to be used for reading. If the file handle is read, its
1945 1945 seek position will not be preserved.
1946 1946
1947 1947 Requests for data may be satisfied by a cache.
1948 1948
1949 1949 Returns a 2-tuple of (offset, data) for the requested range of
1950 1950 revisions. Offset is the integer offset from the beginning of the
1951 1951 revlog and data is a str or buffer of the raw byte data.
1952 1952
1953 1953 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1954 1954 to determine where each revision's data begins and ends.
1955 1955 """
1956 1956 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1957 1957 # (functions are expensive).
1958 1958 index = self.index
1959 1959 istart = index[startrev]
1960 1960 start = int(istart[0] >> 16)
1961 1961 if startrev == endrev:
1962 1962 end = start + istart[1]
1963 1963 else:
1964 1964 iend = index[endrev]
1965 1965 end = int(iend[0] >> 16) + iend[1]
1966 1966
1967 1967 if self._inline:
1968 1968 start += (startrev + 1) * self.index.entry_size
1969 1969 end += (endrev + 1) * self.index.entry_size
1970 1970 length = end - start
1971 1971
1972 1972 return start, self._segmentfile.read_chunk(start, length)
1973 1973
1974 1974 def _chunk(self, rev):
1975 1975 """Obtain a single decompressed chunk for a revision.
1976 1976
1977 1977 Accepts an integer revision and an optional already-open file handle
1978 1978 to be used for reading. If used, the seek position of the file will not
1979 1979 be preserved.
1980 1980
1981 1981 Returns a str holding uncompressed data for the requested revision.
1982 1982 """
1983 1983 compression_mode = self.index[rev][10]
1984 1984 data = self._getsegmentforrevs(rev, rev)[1]
1985 1985 if compression_mode == COMP_MODE_PLAIN:
1986 1986 return data
1987 1987 elif compression_mode == COMP_MODE_DEFAULT:
1988 1988 return self._decompressor(data)
1989 1989 elif compression_mode == COMP_MODE_INLINE:
1990 1990 return self.decompress(data)
1991 1991 else:
1992 1992 msg = b'unknown compression mode %d'
1993 1993 msg %= compression_mode
1994 1994 raise error.RevlogError(msg)
1995 1995
1996 1996 def _chunks(self, revs, targetsize=None):
1997 1997 """Obtain decompressed chunks for the specified revisions.
1998 1998
1999 1999 Accepts an iterable of numeric revisions that are assumed to be in
2000 2000 ascending order. Also accepts an optional already-open file handle
2001 2001 to be used for reading. If used, the seek position of the file will
2002 2002 not be preserved.
2003 2003
2004 2004 This function is similar to calling ``self._chunk()`` multiple times,
2005 2005 but is faster.
2006 2006
2007 2007 Returns a list with decompressed data for each requested revision.
2008 2008 """
2009 2009 if not revs:
2010 2010 return []
2011 2011 start = self.start
2012 2012 length = self.length
2013 2013 inline = self._inline
2014 2014 iosize = self.index.entry_size
2015 2015 buffer = util.buffer
2016 2016
2017 2017 l = []
2018 2018 ladd = l.append
2019 2019
2020 2020 if not self._withsparseread:
2021 2021 slicedchunks = (revs,)
2022 2022 else:
2023 2023 slicedchunks = deltautil.slicechunk(
2024 2024 self, revs, targetsize=targetsize
2025 2025 )
2026 2026
2027 2027 for revschunk in slicedchunks:
2028 2028 firstrev = revschunk[0]
2029 2029 # Skip trailing revisions with empty diff
2030 2030 for lastrev in revschunk[::-1]:
2031 2031 if length(lastrev) != 0:
2032 2032 break
2033 2033
2034 2034 try:
2035 2035 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2036 2036 except OverflowError:
2037 2037 # issue4215 - we can't cache a run of chunks greater than
2038 2038 # 2G on Windows
2039 2039 return [self._chunk(rev) for rev in revschunk]
2040 2040
2041 2041 decomp = self.decompress
2042 2042 # self._decompressor might be None, but will not be used in that case
2043 2043 def_decomp = self._decompressor
2044 2044 for rev in revschunk:
2045 2045 chunkstart = start(rev)
2046 2046 if inline:
2047 2047 chunkstart += (rev + 1) * iosize
2048 2048 chunklength = length(rev)
2049 2049 comp_mode = self.index[rev][10]
2050 2050 c = buffer(data, chunkstart - offset, chunklength)
2051 2051 if comp_mode == COMP_MODE_PLAIN:
2052 2052 ladd(c)
2053 2053 elif comp_mode == COMP_MODE_INLINE:
2054 2054 ladd(decomp(c))
2055 2055 elif comp_mode == COMP_MODE_DEFAULT:
2056 2056 ladd(def_decomp(c))
2057 2057 else:
2058 2058 msg = b'unknown compression mode %d'
2059 2059 msg %= comp_mode
2060 2060 raise error.RevlogError(msg)
2061 2061
2062 2062 return l
2063 2063
2064 2064 def deltaparent(self, rev):
2065 2065 """return deltaparent of the given revision"""
2066 2066 base = self.index[rev][3]
2067 2067 if base == rev:
2068 2068 return nullrev
2069 2069 elif self.delta_config.general_delta:
2070 2070 return base
2071 2071 else:
2072 2072 return rev - 1
2073 2073
2074 2074 def issnapshot(self, rev):
2075 2075 """tells whether rev is a snapshot"""
2076 2076 if not self._sparserevlog:
2077 2077 return self.deltaparent(rev) == nullrev
2078 2078 elif hasattr(self.index, 'issnapshot'):
2079 2079 # directly assign the method to cache the testing and access
2080 2080 self.issnapshot = self.index.issnapshot
2081 2081 return self.issnapshot(rev)
2082 2082 if rev == nullrev:
2083 2083 return True
2084 2084 entry = self.index[rev]
2085 2085 base = entry[3]
2086 2086 if base == rev:
2087 2087 return True
2088 2088 if base == nullrev:
2089 2089 return True
2090 2090 p1 = entry[5]
2091 2091 while self.length(p1) == 0:
2092 2092 b = self.deltaparent(p1)
2093 2093 if b == p1:
2094 2094 break
2095 2095 p1 = b
2096 2096 p2 = entry[6]
2097 2097 while self.length(p2) == 0:
2098 2098 b = self.deltaparent(p2)
2099 2099 if b == p2:
2100 2100 break
2101 2101 p2 = b
2102 2102 if base == p1 or base == p2:
2103 2103 return False
2104 2104 return self.issnapshot(base)
2105 2105
2106 2106 def snapshotdepth(self, rev):
2107 2107 """number of snapshot in the chain before this one"""
2108 2108 if not self.issnapshot(rev):
2109 2109 raise error.ProgrammingError(b'revision %d not a snapshot')
2110 2110 return len(self._deltachain(rev)[0]) - 1
2111 2111
2112 2112 def revdiff(self, rev1, rev2):
2113 2113 """return or calculate a delta between two revisions
2114 2114
2115 2115 The delta calculated is in binary form and is intended to be written to
2116 2116 revlog data directly. So this function needs raw revision data.
2117 2117 """
2118 2118 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2119 2119 return bytes(self._chunk(rev2))
2120 2120
2121 2121 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2122 2122
2123 2123 def revision(self, nodeorrev):
2124 2124 """return an uncompressed revision of a given node or revision
2125 2125 number.
2126 2126 """
2127 2127 return self._revisiondata(nodeorrev)
2128 2128
2129 2129 def sidedata(self, nodeorrev):
2130 2130 """a map of extra data related to the changeset but not part of the hash
2131 2131
2132 2132 This function currently return a dictionary. However, more advanced
2133 2133 mapping object will likely be used in the future for a more
2134 2134 efficient/lazy code.
2135 2135 """
2136 2136 # deal with <nodeorrev> argument type
2137 2137 if isinstance(nodeorrev, int):
2138 2138 rev = nodeorrev
2139 2139 else:
2140 2140 rev = self.rev(nodeorrev)
2141 2141 return self._sidedata(rev)
2142 2142
2143 2143 def _revisiondata(self, nodeorrev, raw=False):
2144 2144 # deal with <nodeorrev> argument type
2145 2145 if isinstance(nodeorrev, int):
2146 2146 rev = nodeorrev
2147 2147 node = self.node(rev)
2148 2148 else:
2149 2149 node = nodeorrev
2150 2150 rev = None
2151 2151
2152 2152 # fast path the special `nullid` rev
2153 2153 if node == self.nullid:
2154 2154 return b""
2155 2155
2156 2156 # ``rawtext`` is the text as stored inside the revlog. Might be the
2157 2157 # revision or might need to be processed to retrieve the revision.
2158 2158 rev, rawtext, validated = self._rawtext(node, rev)
2159 2159
2160 2160 if raw and validated:
2161 2161 # if we don't want to process the raw text and that raw
2162 2162 # text is cached, we can exit early.
2163 2163 return rawtext
2164 2164 if rev is None:
2165 2165 rev = self.rev(node)
2166 2166 # the revlog's flag for this revision
2167 2167 # (usually alter its state or content)
2168 2168 flags = self.flags(rev)
2169 2169
2170 2170 if validated and flags == REVIDX_DEFAULT_FLAGS:
2171 2171 # no extra flags set, no flag processor runs, text = rawtext
2172 2172 return rawtext
2173 2173
2174 2174 if raw:
2175 2175 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2176 2176 text = rawtext
2177 2177 else:
2178 2178 r = flagutil.processflagsread(self, rawtext, flags)
2179 2179 text, validatehash = r
2180 2180 if validatehash:
2181 2181 self.checkhash(text, node, rev=rev)
2182 2182 if not validated:
2183 2183 self._revisioncache = (node, rev, rawtext)
2184 2184
2185 2185 return text
2186 2186
2187 2187 def _rawtext(self, node, rev):
2188 2188 """return the possibly unvalidated rawtext for a revision
2189 2189
2190 2190 returns (rev, rawtext, validated)
2191 2191 """
2192 2192
2193 2193 # revision in the cache (could be useful to apply delta)
2194 2194 cachedrev = None
2195 2195 # An intermediate text to apply deltas to
2196 2196 basetext = None
2197 2197
2198 2198 # Check if we have the entry in cache
2199 2199 # The cache entry looks like (node, rev, rawtext)
2200 2200 if self._revisioncache:
2201 2201 if self._revisioncache[0] == node:
2202 2202 return (rev, self._revisioncache[2], True)
2203 2203 cachedrev = self._revisioncache[1]
2204 2204
2205 2205 if rev is None:
2206 2206 rev = self.rev(node)
2207 2207
2208 2208 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2209 2209 if stopped:
2210 2210 basetext = self._revisioncache[2]
2211 2211
2212 2212 # drop cache to save memory, the caller is expected to
2213 2213 # update self._revisioncache after validating the text
2214 2214 self._revisioncache = None
2215 2215
2216 2216 targetsize = None
2217 2217 rawsize = self.index[rev][2]
2218 2218 if 0 <= rawsize:
2219 2219 targetsize = 4 * rawsize
2220 2220
2221 2221 bins = self._chunks(chain, targetsize=targetsize)
2222 2222 if basetext is None:
2223 2223 basetext = bytes(bins[0])
2224 2224 bins = bins[1:]
2225 2225
2226 2226 rawtext = mdiff.patches(basetext, bins)
2227 2227 del basetext # let us have a chance to free memory early
2228 2228 return (rev, rawtext, False)
2229 2229
2230 2230 def _sidedata(self, rev):
2231 2231 """Return the sidedata for a given revision number."""
2232 2232 index_entry = self.index[rev]
2233 2233 sidedata_offset = index_entry[8]
2234 2234 sidedata_size = index_entry[9]
2235 2235
2236 2236 if self._inline:
2237 2237 sidedata_offset += self.index.entry_size * (1 + rev)
2238 2238 if sidedata_size == 0:
2239 2239 return {}
2240 2240
2241 2241 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2242 2242 filename = self._sidedatafile
2243 2243 end = self._docket.sidedata_end
2244 2244 offset = sidedata_offset
2245 2245 length = sidedata_size
2246 2246 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2247 2247 raise error.RevlogError(m)
2248 2248
2249 2249 comp_segment = self._segmentfile_sidedata.read_chunk(
2250 2250 sidedata_offset, sidedata_size
2251 2251 )
2252 2252
2253 2253 comp = self.index[rev][11]
2254 2254 if comp == COMP_MODE_PLAIN:
2255 2255 segment = comp_segment
2256 2256 elif comp == COMP_MODE_DEFAULT:
2257 2257 segment = self._decompressor(comp_segment)
2258 2258 elif comp == COMP_MODE_INLINE:
2259 2259 segment = self.decompress(comp_segment)
2260 2260 else:
2261 2261 msg = b'unknown compression mode %d'
2262 2262 msg %= comp
2263 2263 raise error.RevlogError(msg)
2264 2264
2265 2265 sidedata = sidedatautil.deserialize_sidedata(segment)
2266 2266 return sidedata
2267 2267
2268 2268 def rawdata(self, nodeorrev):
2269 2269 """return an uncompressed raw data of a given node or revision number."""
2270 2270 return self._revisiondata(nodeorrev, raw=True)
2271 2271
2272 2272 def hash(self, text, p1, p2):
2273 2273 """Compute a node hash.
2274 2274
2275 2275 Available as a function so that subclasses can replace the hash
2276 2276 as needed.
2277 2277 """
2278 2278 return storageutil.hashrevisionsha1(text, p1, p2)
2279 2279
2280 2280 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2281 2281 """Check node hash integrity.
2282 2282
2283 2283 Available as a function so that subclasses can extend hash mismatch
2284 2284 behaviors as needed.
2285 2285 """
2286 2286 try:
2287 2287 if p1 is None and p2 is None:
2288 2288 p1, p2 = self.parents(node)
2289 2289 if node != self.hash(text, p1, p2):
2290 2290 # Clear the revision cache on hash failure. The revision cache
2291 2291 # only stores the raw revision and clearing the cache does have
2292 2292 # the side-effect that we won't have a cache hit when the raw
2293 2293 # revision data is accessed. But this case should be rare and
2294 2294 # it is extra work to teach the cache about the hash
2295 2295 # verification state.
2296 2296 if self._revisioncache and self._revisioncache[0] == node:
2297 2297 self._revisioncache = None
2298 2298
2299 2299 revornode = rev
2300 2300 if revornode is None:
2301 2301 revornode = templatefilters.short(hex(node))
2302 2302 raise error.RevlogError(
2303 2303 _(b"integrity check failed on %s:%s")
2304 2304 % (self.display_id, pycompat.bytestr(revornode))
2305 2305 )
2306 2306 except error.RevlogError:
2307 if self._censorable and storageutil.iscensoredtext(text):
2307 if self.feature_config.censorable and storageutil.iscensoredtext(
2308 text
2309 ):
2308 2310 raise error.CensoredNodeError(self.display_id, node, text)
2309 2311 raise
2310 2312
2311 2313 @property
2312 2314 def _split_index_file(self):
2313 2315 """the path where to expect the index of an ongoing splitting operation
2314 2316
2315 2317 The file will only exist if a splitting operation is in progress, but
2316 2318 it is always expected at the same location."""
2317 2319 parts = self.radix.split(b'/')
2318 2320 if len(parts) > 1:
2319 2321 # adds a '-s' prefix to the ``data/` or `meta/` base
2320 2322 head = parts[0] + b'-s'
2321 2323 mids = parts[1:-1]
2322 2324 tail = parts[-1] + b'.i'
2323 2325 pieces = [head] + mids + [tail]
2324 2326 return b'/'.join(pieces)
2325 2327 else:
2326 2328 # the revlog is stored at the root of the store (changelog or
2327 2329 # manifest), no risk of collision.
2328 2330 return self.radix + b'.i.s'
2329 2331
2330 2332 def _enforceinlinesize(self, tr, side_write=True):
2331 2333 """Check if the revlog is too big for inline and convert if so.
2332 2334
2333 2335 This should be called after revisions are added to the revlog. If the
2334 2336 revlog has grown too large to be an inline revlog, it will convert it
2335 2337 to use multiple index and data files.
2336 2338 """
2337 2339 tiprev = len(self) - 1
2338 2340 total_size = self.start(tiprev) + self.length(tiprev)
2339 2341 if not self._inline or total_size < _maxinline:
2340 2342 return
2341 2343
2342 2344 troffset = tr.findoffset(self._indexfile)
2343 2345 if troffset is None:
2344 2346 raise error.RevlogError(
2345 2347 _(b"%s not found in the transaction") % self._indexfile
2346 2348 )
2347 2349 if troffset:
2348 2350 tr.addbackup(self._indexfile, for_offset=True)
2349 2351 tr.add(self._datafile, 0)
2350 2352
2351 2353 existing_handles = False
2352 2354 if self._writinghandles is not None:
2353 2355 existing_handles = True
2354 2356 fp = self._writinghandles[0]
2355 2357 fp.flush()
2356 2358 fp.close()
2357 2359 # We can't use the cached file handle after close(). So prevent
2358 2360 # its usage.
2359 2361 self._writinghandles = None
2360 2362 self._segmentfile.writing_handle = None
2361 2363 # No need to deal with sidedata writing handle as it is only
2362 2364 # relevant with revlog-v2 which is never inline, not reaching
2363 2365 # this code
2364 2366 if side_write:
2365 2367 old_index_file_path = self._indexfile
2366 2368 new_index_file_path = self._split_index_file
2367 2369 opener = self.opener
2368 2370 weak_self = weakref.ref(self)
2369 2371
2370 2372 # the "split" index replace the real index when the transaction is finalized
2371 2373 def finalize_callback(tr):
2372 2374 opener.rename(
2373 2375 new_index_file_path,
2374 2376 old_index_file_path,
2375 2377 checkambig=True,
2376 2378 )
2377 2379 maybe_self = weak_self()
2378 2380 if maybe_self is not None:
2379 2381 maybe_self._indexfile = old_index_file_path
2380 2382
2381 2383 def abort_callback(tr):
2382 2384 maybe_self = weak_self()
2383 2385 if maybe_self is not None:
2384 2386 maybe_self._indexfile = old_index_file_path
2385 2387
2386 2388 tr.registertmp(new_index_file_path)
2387 2389 if self.target[1] is not None:
2388 2390 callback_id = b'000-revlog-split-%d-%s' % self.target
2389 2391 else:
2390 2392 callback_id = b'000-revlog-split-%d' % self.target[0]
2391 2393 tr.addfinalize(callback_id, finalize_callback)
2392 2394 tr.addabort(callback_id, abort_callback)
2393 2395
2394 2396 new_dfh = self._datafp(b'w+')
2395 2397 new_dfh.truncate(0) # drop any potentially existing data
2396 2398 try:
2397 2399 with self.reading():
2398 2400 for r in self:
2399 2401 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2400 2402 new_dfh.flush()
2401 2403
2402 2404 if side_write:
2403 2405 self._indexfile = new_index_file_path
2404 2406 with self.__index_new_fp() as fp:
2405 2407 self._format_flags &= ~FLAG_INLINE_DATA
2406 2408 self._inline = False
2407 2409 for i in self:
2408 2410 e = self.index.entry_binary(i)
2409 2411 if i == 0 and self._docket is None:
2410 2412 header = self._format_flags | self._format_version
2411 2413 header = self.index.pack_header(header)
2412 2414 e = header + e
2413 2415 fp.write(e)
2414 2416 if self._docket is not None:
2415 2417 self._docket.index_end = fp.tell()
2416 2418
2417 2419 # If we don't use side-write, the temp file replace the real
2418 2420 # index when we exit the context manager
2419 2421
2420 2422 nodemaputil.setup_persistent_nodemap(tr, self)
2421 2423 self._segmentfile = randomaccessfile.randomaccessfile(
2422 2424 self.opener,
2423 2425 self._datafile,
2424 2426 self._chunkcachesize,
2425 2427 )
2426 2428
2427 2429 if existing_handles:
2428 2430 # switched from inline to conventional reopen the index
2429 2431 ifh = self.__index_write_fp()
2430 2432 self._writinghandles = (ifh, new_dfh, None)
2431 2433 self._segmentfile.writing_handle = new_dfh
2432 2434 new_dfh = None
2433 2435 # No need to deal with sidedata writing handle as it is only
2434 2436 # relevant with revlog-v2 which is never inline, not reaching
2435 2437 # this code
2436 2438 finally:
2437 2439 if new_dfh is not None:
2438 2440 new_dfh.close()
2439 2441
2440 2442 def _nodeduplicatecallback(self, transaction, node):
2441 2443 """called when trying to add a node already stored."""
2442 2444
2443 2445 @contextlib.contextmanager
2444 2446 def reading(self):
2445 2447 """Context manager that keeps data and sidedata files open for reading"""
2446 2448 if len(self.index) == 0:
2447 2449 yield # nothing to be read
2448 2450 else:
2449 2451 with self._segmentfile.reading():
2450 2452 with self._segmentfile_sidedata.reading():
2451 2453 yield
2452 2454
2453 2455 @contextlib.contextmanager
2454 2456 def _writing(self, transaction):
2455 2457 if self._trypending:
2456 2458 msg = b'try to write in a `trypending` revlog: %s'
2457 2459 msg %= self.display_id
2458 2460 raise error.ProgrammingError(msg)
2459 2461 if self._writinghandles is not None:
2460 2462 yield
2461 2463 else:
2462 2464 ifh = dfh = sdfh = None
2463 2465 try:
2464 2466 r = len(self)
2465 2467 # opening the data file.
2466 2468 dsize = 0
2467 2469 if r:
2468 2470 dsize = self.end(r - 1)
2469 2471 dfh = None
2470 2472 if not self._inline:
2471 2473 try:
2472 2474 dfh = self._datafp(b"r+")
2473 2475 if self._docket is None:
2474 2476 dfh.seek(0, os.SEEK_END)
2475 2477 else:
2476 2478 dfh.seek(self._docket.data_end, os.SEEK_SET)
2477 2479 except FileNotFoundError:
2478 2480 dfh = self._datafp(b"w+")
2479 2481 transaction.add(self._datafile, dsize)
2480 2482 if self._sidedatafile is not None:
2481 2483 # revlog-v2 does not inline, help Pytype
2482 2484 assert dfh is not None
2483 2485 try:
2484 2486 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2485 2487 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2486 2488 except FileNotFoundError:
2487 2489 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2488 2490 transaction.add(
2489 2491 self._sidedatafile, self._docket.sidedata_end
2490 2492 )
2491 2493
2492 2494 # opening the index file.
2493 2495 isize = r * self.index.entry_size
2494 2496 ifh = self.__index_write_fp()
2495 2497 if self._inline:
2496 2498 transaction.add(self._indexfile, dsize + isize)
2497 2499 else:
2498 2500 transaction.add(self._indexfile, isize)
2499 2501 # exposing all file handle for writing.
2500 2502 self._writinghandles = (ifh, dfh, sdfh)
2501 2503 self._segmentfile.writing_handle = ifh if self._inline else dfh
2502 2504 self._segmentfile_sidedata.writing_handle = sdfh
2503 2505 yield
2504 2506 if self._docket is not None:
2505 2507 self._write_docket(transaction)
2506 2508 finally:
2507 2509 self._writinghandles = None
2508 2510 self._segmentfile.writing_handle = None
2509 2511 self._segmentfile_sidedata.writing_handle = None
2510 2512 if dfh is not None:
2511 2513 dfh.close()
2512 2514 if sdfh is not None:
2513 2515 sdfh.close()
2514 2516 # closing the index file last to avoid exposing referent to
2515 2517 # potential unflushed data content.
2516 2518 if ifh is not None:
2517 2519 ifh.close()
2518 2520
2519 2521 def _write_docket(self, transaction):
2520 2522 """write the current docket on disk
2521 2523
2522 2524 Exist as a method to help changelog to implement transaction logic
2523 2525
2524 2526 We could also imagine using the same transaction logic for all revlog
2525 2527 since docket are cheap."""
2526 2528 self._docket.write(transaction)
2527 2529
2528 2530 def addrevision(
2529 2531 self,
2530 2532 text,
2531 2533 transaction,
2532 2534 link,
2533 2535 p1,
2534 2536 p2,
2535 2537 cachedelta=None,
2536 2538 node=None,
2537 2539 flags=REVIDX_DEFAULT_FLAGS,
2538 2540 deltacomputer=None,
2539 2541 sidedata=None,
2540 2542 ):
2541 2543 """add a revision to the log
2542 2544
2543 2545 text - the revision data to add
2544 2546 transaction - the transaction object used for rollback
2545 2547 link - the linkrev data to add
2546 2548 p1, p2 - the parent nodeids of the revision
2547 2549 cachedelta - an optional precomputed delta
2548 2550 node - nodeid of revision; typically node is not specified, and it is
2549 2551 computed by default as hash(text, p1, p2), however subclasses might
2550 2552 use different hashing method (and override checkhash() in such case)
2551 2553 flags - the known flags to set on the revision
2552 2554 deltacomputer - an optional deltacomputer instance shared between
2553 2555 multiple calls
2554 2556 """
2555 2557 if link == nullrev:
2556 2558 raise error.RevlogError(
2557 2559 _(b"attempted to add linkrev -1 to %s") % self.display_id
2558 2560 )
2559 2561
2560 2562 if sidedata is None:
2561 2563 sidedata = {}
2562 2564 elif sidedata and not self.hassidedata:
2563 2565 raise error.ProgrammingError(
2564 2566 _(b"trying to add sidedata to a revlog who don't support them")
2565 2567 )
2566 2568
2567 2569 if flags:
2568 2570 node = node or self.hash(text, p1, p2)
2569 2571
2570 2572 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2571 2573
2572 2574 # If the flag processor modifies the revision data, ignore any provided
2573 2575 # cachedelta.
2574 2576 if rawtext != text:
2575 2577 cachedelta = None
2576 2578
2577 2579 if len(rawtext) > _maxentrysize:
2578 2580 raise error.RevlogError(
2579 2581 _(
2580 2582 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2581 2583 )
2582 2584 % (self.display_id, len(rawtext))
2583 2585 )
2584 2586
2585 2587 node = node or self.hash(rawtext, p1, p2)
2586 2588 rev = self.index.get_rev(node)
2587 2589 if rev is not None:
2588 2590 return rev
2589 2591
2590 2592 if validatehash:
2591 2593 self.checkhash(rawtext, node, p1=p1, p2=p2)
2592 2594
2593 2595 return self.addrawrevision(
2594 2596 rawtext,
2595 2597 transaction,
2596 2598 link,
2597 2599 p1,
2598 2600 p2,
2599 2601 node,
2600 2602 flags,
2601 2603 cachedelta=cachedelta,
2602 2604 deltacomputer=deltacomputer,
2603 2605 sidedata=sidedata,
2604 2606 )
2605 2607
2606 2608 def addrawrevision(
2607 2609 self,
2608 2610 rawtext,
2609 2611 transaction,
2610 2612 link,
2611 2613 p1,
2612 2614 p2,
2613 2615 node,
2614 2616 flags,
2615 2617 cachedelta=None,
2616 2618 deltacomputer=None,
2617 2619 sidedata=None,
2618 2620 ):
2619 2621 """add a raw revision with known flags, node and parents
2620 2622 useful when reusing a revision not stored in this revlog (ex: received
2621 2623 over wire, or read from an external bundle).
2622 2624 """
2623 2625 with self._writing(transaction):
2624 2626 return self._addrevision(
2625 2627 node,
2626 2628 rawtext,
2627 2629 transaction,
2628 2630 link,
2629 2631 p1,
2630 2632 p2,
2631 2633 flags,
2632 2634 cachedelta,
2633 2635 deltacomputer=deltacomputer,
2634 2636 sidedata=sidedata,
2635 2637 )
2636 2638
2637 2639 def compress(self, data):
2638 2640 """Generate a possibly-compressed representation of data."""
2639 2641 if not data:
2640 2642 return b'', data
2641 2643
2642 2644 compressed = self._compressor.compress(data)
2643 2645
2644 2646 if compressed:
2645 2647 # The revlog compressor added the header in the returned data.
2646 2648 return b'', compressed
2647 2649
2648 2650 if data[0:1] == b'\0':
2649 2651 return b'', data
2650 2652 return b'u', data
2651 2653
2652 2654 def decompress(self, data):
2653 2655 """Decompress a revlog chunk.
2654 2656
2655 2657 The chunk is expected to begin with a header identifying the
2656 2658 format type so it can be routed to an appropriate decompressor.
2657 2659 """
2658 2660 if not data:
2659 2661 return data
2660 2662
2661 2663 # Revlogs are read much more frequently than they are written and many
2662 2664 # chunks only take microseconds to decompress, so performance is
2663 2665 # important here.
2664 2666 #
2665 2667 # We can make a few assumptions about revlogs:
2666 2668 #
2667 2669 # 1) the majority of chunks will be compressed (as opposed to inline
2668 2670 # raw data).
2669 2671 # 2) decompressing *any* data will likely by at least 10x slower than
2670 2672 # returning raw inline data.
2671 2673 # 3) we want to prioritize common and officially supported compression
2672 2674 # engines
2673 2675 #
2674 2676 # It follows that we want to optimize for "decompress compressed data
2675 2677 # when encoded with common and officially supported compression engines"
2676 2678 # case over "raw data" and "data encoded by less common or non-official
2677 2679 # compression engines." That is why we have the inline lookup first
2678 2680 # followed by the compengines lookup.
2679 2681 #
2680 2682 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2681 2683 # compressed chunks. And this matters for changelog and manifest reads.
2682 2684 t = data[0:1]
2683 2685
2684 2686 if t == b'x':
2685 2687 try:
2686 2688 return _zlibdecompress(data)
2687 2689 except zlib.error as e:
2688 2690 raise error.RevlogError(
2689 2691 _(b'revlog decompress error: %s')
2690 2692 % stringutil.forcebytestr(e)
2691 2693 )
2692 2694 # '\0' is more common than 'u' so it goes first.
2693 2695 elif t == b'\0':
2694 2696 return data
2695 2697 elif t == b'u':
2696 2698 return util.buffer(data, 1)
2697 2699
2698 2700 compressor = self._get_decompressor(t)
2699 2701
2700 2702 return compressor.decompress(data)
2701 2703
2702 2704 def _addrevision(
2703 2705 self,
2704 2706 node,
2705 2707 rawtext,
2706 2708 transaction,
2707 2709 link,
2708 2710 p1,
2709 2711 p2,
2710 2712 flags,
2711 2713 cachedelta,
2712 2714 alwayscache=False,
2713 2715 deltacomputer=None,
2714 2716 sidedata=None,
2715 2717 ):
2716 2718 """internal function to add revisions to the log
2717 2719
2718 2720 see addrevision for argument descriptions.
2719 2721
2720 2722 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2721 2723
2722 2724 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2723 2725 be used.
2724 2726
2725 2727 invariants:
2726 2728 - rawtext is optional (can be None); if not set, cachedelta must be set.
2727 2729 if both are set, they must correspond to each other.
2728 2730 """
2729 2731 if node == self.nullid:
2730 2732 raise error.RevlogError(
2731 2733 _(b"%s: attempt to add null revision") % self.display_id
2732 2734 )
2733 2735 if (
2734 2736 node == self.nodeconstants.wdirid
2735 2737 or node in self.nodeconstants.wdirfilenodeids
2736 2738 ):
2737 2739 raise error.RevlogError(
2738 2740 _(b"%s: attempt to add wdir revision") % self.display_id
2739 2741 )
2740 2742 if self._writinghandles is None:
2741 2743 msg = b'adding revision outside `revlog._writing` context'
2742 2744 raise error.ProgrammingError(msg)
2743 2745
2744 2746 btext = [rawtext]
2745 2747
2746 2748 curr = len(self)
2747 2749 prev = curr - 1
2748 2750
2749 2751 offset = self._get_data_offset(prev)
2750 2752
2751 2753 if self._concurrencychecker:
2752 2754 ifh, dfh, sdfh = self._writinghandles
2753 2755 # XXX no checking for the sidedata file
2754 2756 if self._inline:
2755 2757 # offset is "as if" it were in the .d file, so we need to add on
2756 2758 # the size of the entry metadata.
2757 2759 self._concurrencychecker(
2758 2760 ifh, self._indexfile, offset + curr * self.index.entry_size
2759 2761 )
2760 2762 else:
2761 2763 # Entries in the .i are a consistent size.
2762 2764 self._concurrencychecker(
2763 2765 ifh, self._indexfile, curr * self.index.entry_size
2764 2766 )
2765 2767 self._concurrencychecker(dfh, self._datafile, offset)
2766 2768
2767 2769 p1r, p2r = self.rev(p1), self.rev(p2)
2768 2770
2769 2771 # full versions are inserted when the needed deltas
2770 2772 # become comparable to the uncompressed text
2771 2773 if rawtext is None:
2772 2774 # need rawtext size, before changed by flag processors, which is
2773 2775 # the non-raw size. use revlog explicitly to avoid filelog's extra
2774 2776 # logic that might remove metadata size.
2775 2777 textlen = mdiff.patchedsize(
2776 2778 revlog.size(self, cachedelta[0]), cachedelta[1]
2777 2779 )
2778 2780 else:
2779 2781 textlen = len(rawtext)
2780 2782
2781 2783 if deltacomputer is None:
2782 2784 write_debug = None
2783 2785 if self._debug_delta:
2784 2786 write_debug = transaction._report
2785 2787 deltacomputer = deltautil.deltacomputer(
2786 2788 self, write_debug=write_debug
2787 2789 )
2788 2790
2789 2791 if cachedelta is not None and len(cachedelta) == 2:
2790 2792 # If the cached delta has no information about how it should be
2791 2793 # reused, add the default reuse instruction according to the
2792 2794 # revlog's configuration.
2793 2795 if (
2794 2796 self.delta_config.general_delta
2795 2797 and self.delta_config.lazy_delta_base
2796 2798 ):
2797 2799 delta_base_reuse = DELTA_BASE_REUSE_TRY
2798 2800 else:
2799 2801 delta_base_reuse = DELTA_BASE_REUSE_NO
2800 2802 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2801 2803
2802 2804 revinfo = revlogutils.revisioninfo(
2803 2805 node,
2804 2806 p1,
2805 2807 p2,
2806 2808 btext,
2807 2809 textlen,
2808 2810 cachedelta,
2809 2811 flags,
2810 2812 )
2811 2813
2812 2814 deltainfo = deltacomputer.finddeltainfo(revinfo)
2813 2815
2814 2816 compression_mode = COMP_MODE_INLINE
2815 2817 if self._docket is not None:
2816 2818 default_comp = self._docket.default_compression_header
2817 2819 r = deltautil.delta_compression(default_comp, deltainfo)
2818 2820 compression_mode, deltainfo = r
2819 2821
2820 2822 sidedata_compression_mode = COMP_MODE_INLINE
2821 2823 if sidedata and self.hassidedata:
2822 2824 sidedata_compression_mode = COMP_MODE_PLAIN
2823 2825 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2824 2826 sidedata_offset = self._docket.sidedata_end
2825 2827 h, comp_sidedata = self.compress(serialized_sidedata)
2826 2828 if (
2827 2829 h != b'u'
2828 2830 and comp_sidedata[0:1] != b'\0'
2829 2831 and len(comp_sidedata) < len(serialized_sidedata)
2830 2832 ):
2831 2833 assert not h
2832 2834 if (
2833 2835 comp_sidedata[0:1]
2834 2836 == self._docket.default_compression_header
2835 2837 ):
2836 2838 sidedata_compression_mode = COMP_MODE_DEFAULT
2837 2839 serialized_sidedata = comp_sidedata
2838 2840 else:
2839 2841 sidedata_compression_mode = COMP_MODE_INLINE
2840 2842 serialized_sidedata = comp_sidedata
2841 2843 else:
2842 2844 serialized_sidedata = b""
2843 2845 # Don't store the offset if the sidedata is empty, that way
2844 2846 # we can easily detect empty sidedata and they will be no different
2845 2847 # than ones we manually add.
2846 2848 sidedata_offset = 0
2847 2849
2848 2850 rank = RANK_UNKNOWN
2849 2851 if self._compute_rank:
2850 2852 if (p1r, p2r) == (nullrev, nullrev):
2851 2853 rank = 1
2852 2854 elif p1r != nullrev and p2r == nullrev:
2853 2855 rank = 1 + self.fast_rank(p1r)
2854 2856 elif p1r == nullrev and p2r != nullrev:
2855 2857 rank = 1 + self.fast_rank(p2r)
2856 2858 else: # merge node
2857 2859 if rustdagop is not None and self.index.rust_ext_compat:
2858 2860 rank = rustdagop.rank(self.index, p1r, p2r)
2859 2861 else:
2860 2862 pmin, pmax = sorted((p1r, p2r))
2861 2863 rank = 1 + self.fast_rank(pmax)
2862 2864 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2863 2865
2864 2866 e = revlogutils.entry(
2865 2867 flags=flags,
2866 2868 data_offset=offset,
2867 2869 data_compressed_length=deltainfo.deltalen,
2868 2870 data_uncompressed_length=textlen,
2869 2871 data_compression_mode=compression_mode,
2870 2872 data_delta_base=deltainfo.base,
2871 2873 link_rev=link,
2872 2874 parent_rev_1=p1r,
2873 2875 parent_rev_2=p2r,
2874 2876 node_id=node,
2875 2877 sidedata_offset=sidedata_offset,
2876 2878 sidedata_compressed_length=len(serialized_sidedata),
2877 2879 sidedata_compression_mode=sidedata_compression_mode,
2878 2880 rank=rank,
2879 2881 )
2880 2882
2881 2883 self.index.append(e)
2882 2884 entry = self.index.entry_binary(curr)
2883 2885 if curr == 0 and self._docket is None:
2884 2886 header = self._format_flags | self._format_version
2885 2887 header = self.index.pack_header(header)
2886 2888 entry = header + entry
2887 2889 self._writeentry(
2888 2890 transaction,
2889 2891 entry,
2890 2892 deltainfo.data,
2891 2893 link,
2892 2894 offset,
2893 2895 serialized_sidedata,
2894 2896 sidedata_offset,
2895 2897 )
2896 2898
2897 2899 rawtext = btext[0]
2898 2900
2899 2901 if alwayscache and rawtext is None:
2900 2902 rawtext = deltacomputer.buildtext(revinfo)
2901 2903
2902 2904 if type(rawtext) == bytes: # only accept immutable objects
2903 2905 self._revisioncache = (node, curr, rawtext)
2904 2906 self._chainbasecache[curr] = deltainfo.chainbase
2905 2907 return curr
2906 2908
2907 2909 def _get_data_offset(self, prev):
2908 2910 """Returns the current offset in the (in-transaction) data file.
2909 2911 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2910 2912 file to store that information: since sidedata can be rewritten to the
2911 2913 end of the data file within a transaction, you can have cases where, for
2912 2914 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2913 2915 to `n - 1`'s sidedata being written after `n`'s data.
2914 2916
2915 2917 TODO cache this in a docket file before getting out of experimental."""
2916 2918 if self._docket is None:
2917 2919 return self.end(prev)
2918 2920 else:
2919 2921 return self._docket.data_end
2920 2922
2921 2923 def _writeentry(
2922 2924 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2923 2925 ):
2924 2926 # Files opened in a+ mode have inconsistent behavior on various
2925 2927 # platforms. Windows requires that a file positioning call be made
2926 2928 # when the file handle transitions between reads and writes. See
2927 2929 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2928 2930 # platforms, Python or the platform itself can be buggy. Some versions
2929 2931 # of Solaris have been observed to not append at the end of the file
2930 2932 # if the file was seeked to before the end. See issue4943 for more.
2931 2933 #
2932 2934 # We work around this issue by inserting a seek() before writing.
2933 2935 # Note: This is likely not necessary on Python 3. However, because
2934 2936 # the file handle is reused for reads and may be seeked there, we need
2935 2937 # to be careful before changing this.
2936 2938 if self._writinghandles is None:
2937 2939 msg = b'adding revision outside `revlog._writing` context'
2938 2940 raise error.ProgrammingError(msg)
2939 2941 ifh, dfh, sdfh = self._writinghandles
2940 2942 if self._docket is None:
2941 2943 ifh.seek(0, os.SEEK_END)
2942 2944 else:
2943 2945 ifh.seek(self._docket.index_end, os.SEEK_SET)
2944 2946 if dfh:
2945 2947 if self._docket is None:
2946 2948 dfh.seek(0, os.SEEK_END)
2947 2949 else:
2948 2950 dfh.seek(self._docket.data_end, os.SEEK_SET)
2949 2951 if sdfh:
2950 2952 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2951 2953
2952 2954 curr = len(self) - 1
2953 2955 if not self._inline:
2954 2956 transaction.add(self._datafile, offset)
2955 2957 if self._sidedatafile:
2956 2958 transaction.add(self._sidedatafile, sidedata_offset)
2957 2959 transaction.add(self._indexfile, curr * len(entry))
2958 2960 if data[0]:
2959 2961 dfh.write(data[0])
2960 2962 dfh.write(data[1])
2961 2963 if sidedata:
2962 2964 sdfh.write(sidedata)
2963 2965 ifh.write(entry)
2964 2966 else:
2965 2967 offset += curr * self.index.entry_size
2966 2968 transaction.add(self._indexfile, offset)
2967 2969 ifh.write(entry)
2968 2970 ifh.write(data[0])
2969 2971 ifh.write(data[1])
2970 2972 assert not sidedata
2971 2973 self._enforceinlinesize(transaction)
2972 2974 if self._docket is not None:
2973 2975 # revlog-v2 always has 3 writing handles, help Pytype
2974 2976 wh1 = self._writinghandles[0]
2975 2977 wh2 = self._writinghandles[1]
2976 2978 wh3 = self._writinghandles[2]
2977 2979 assert wh1 is not None
2978 2980 assert wh2 is not None
2979 2981 assert wh3 is not None
2980 2982 self._docket.index_end = wh1.tell()
2981 2983 self._docket.data_end = wh2.tell()
2982 2984 self._docket.sidedata_end = wh3.tell()
2983 2985
2984 2986 nodemaputil.setup_persistent_nodemap(transaction, self)
2985 2987
2986 2988 def addgroup(
2987 2989 self,
2988 2990 deltas,
2989 2991 linkmapper,
2990 2992 transaction,
2991 2993 alwayscache=False,
2992 2994 addrevisioncb=None,
2993 2995 duplicaterevisioncb=None,
2994 2996 debug_info=None,
2995 2997 delta_base_reuse_policy=None,
2996 2998 ):
2997 2999 """
2998 3000 add a delta group
2999 3001
3000 3002 given a set of deltas, add them to the revision log. the
3001 3003 first delta is against its parent, which should be in our
3002 3004 log, the rest are against the previous delta.
3003 3005
3004 3006 If ``addrevisioncb`` is defined, it will be called with arguments of
3005 3007 this revlog and the node that was added.
3006 3008 """
3007 3009
3008 3010 if self._adding_group:
3009 3011 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3010 3012
3011 3013 # read the default delta-base reuse policy from revlog config if the
3012 3014 # group did not specify one.
3013 3015 if delta_base_reuse_policy is None:
3014 3016 if (
3015 3017 self.delta_config.general_delta
3016 3018 and self.delta_config.lazy_delta_base
3017 3019 ):
3018 3020 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3019 3021 else:
3020 3022 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3021 3023
3022 3024 self._adding_group = True
3023 3025 empty = True
3024 3026 try:
3025 3027 with self._writing(transaction):
3026 3028 write_debug = None
3027 3029 if self._debug_delta:
3028 3030 write_debug = transaction._report
3029 3031 deltacomputer = deltautil.deltacomputer(
3030 3032 self,
3031 3033 write_debug=write_debug,
3032 3034 debug_info=debug_info,
3033 3035 )
3034 3036 # loop through our set of deltas
3035 3037 for data in deltas:
3036 3038 (
3037 3039 node,
3038 3040 p1,
3039 3041 p2,
3040 3042 linknode,
3041 3043 deltabase,
3042 3044 delta,
3043 3045 flags,
3044 3046 sidedata,
3045 3047 ) = data
3046 3048 link = linkmapper(linknode)
3047 3049 flags = flags or REVIDX_DEFAULT_FLAGS
3048 3050
3049 3051 rev = self.index.get_rev(node)
3050 3052 if rev is not None:
3051 3053 # this can happen if two branches make the same change
3052 3054 self._nodeduplicatecallback(transaction, rev)
3053 3055 if duplicaterevisioncb:
3054 3056 duplicaterevisioncb(self, rev)
3055 3057 empty = False
3056 3058 continue
3057 3059
3058 3060 for p in (p1, p2):
3059 3061 if not self.index.has_node(p):
3060 3062 raise error.LookupError(
3061 3063 p, self.radix, _(b'unknown parent')
3062 3064 )
3063 3065
3064 3066 if not self.index.has_node(deltabase):
3065 3067 raise error.LookupError(
3066 3068 deltabase, self.display_id, _(b'unknown delta base')
3067 3069 )
3068 3070
3069 3071 baserev = self.rev(deltabase)
3070 3072
3071 3073 if baserev != nullrev and self.iscensored(baserev):
3072 3074 # if base is censored, delta must be full replacement in a
3073 3075 # single patch operation
3074 3076 hlen = struct.calcsize(b">lll")
3075 3077 oldlen = self.rawsize(baserev)
3076 3078 newlen = len(delta) - hlen
3077 3079 if delta[:hlen] != mdiff.replacediffheader(
3078 3080 oldlen, newlen
3079 3081 ):
3080 3082 raise error.CensoredBaseError(
3081 3083 self.display_id, self.node(baserev)
3082 3084 )
3083 3085
3084 3086 if not flags and self._peek_iscensored(baserev, delta):
3085 3087 flags |= REVIDX_ISCENSORED
3086 3088
3087 3089 # We assume consumers of addrevisioncb will want to retrieve
3088 3090 # the added revision, which will require a call to
3089 3091 # revision(). revision() will fast path if there is a cache
3090 3092 # hit. So, we tell _addrevision() to always cache in this case.
3091 3093 # We're only using addgroup() in the context of changegroup
3092 3094 # generation so the revision data can always be handled as raw
3093 3095 # by the flagprocessor.
3094 3096 rev = self._addrevision(
3095 3097 node,
3096 3098 None,
3097 3099 transaction,
3098 3100 link,
3099 3101 p1,
3100 3102 p2,
3101 3103 flags,
3102 3104 (baserev, delta, delta_base_reuse_policy),
3103 3105 alwayscache=alwayscache,
3104 3106 deltacomputer=deltacomputer,
3105 3107 sidedata=sidedata,
3106 3108 )
3107 3109
3108 3110 if addrevisioncb:
3109 3111 addrevisioncb(self, rev)
3110 3112 empty = False
3111 3113 finally:
3112 3114 self._adding_group = False
3113 3115 return not empty
3114 3116
3115 3117 def iscensored(self, rev):
3116 3118 """Check if a file revision is censored."""
3117 if not self._censorable:
3119 if not self.feature_config.censorable:
3118 3120 return False
3119 3121
3120 3122 return self.flags(rev) & REVIDX_ISCENSORED
3121 3123
3122 3124 def _peek_iscensored(self, baserev, delta):
3123 3125 """Quickly check if a delta produces a censored revision."""
3124 if not self._censorable:
3126 if not self.feature_config.censorable:
3125 3127 return False
3126 3128
3127 3129 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3128 3130
3129 3131 def getstrippoint(self, minlink):
3130 3132 """find the minimum rev that must be stripped to strip the linkrev
3131 3133
3132 3134 Returns a tuple containing the minimum rev and a set of all revs that
3133 3135 have linkrevs that will be broken by this strip.
3134 3136 """
3135 3137 return storageutil.resolvestripinfo(
3136 3138 minlink,
3137 3139 len(self) - 1,
3138 3140 self.headrevs(),
3139 3141 self.linkrev,
3140 3142 self.parentrevs,
3141 3143 )
3142 3144
3143 3145 def strip(self, minlink, transaction):
3144 3146 """truncate the revlog on the first revision with a linkrev >= minlink
3145 3147
3146 3148 This function is called when we're stripping revision minlink and
3147 3149 its descendants from the repository.
3148 3150
3149 3151 We have to remove all revisions with linkrev >= minlink, because
3150 3152 the equivalent changelog revisions will be renumbered after the
3151 3153 strip.
3152 3154
3153 3155 So we truncate the revlog on the first of these revisions, and
3154 3156 trust that the caller has saved the revisions that shouldn't be
3155 3157 removed and that it'll re-add them after this truncation.
3156 3158 """
3157 3159 if len(self) == 0:
3158 3160 return
3159 3161
3160 3162 rev, _ = self.getstrippoint(minlink)
3161 3163 if rev == len(self):
3162 3164 return
3163 3165
3164 3166 # first truncate the files on disk
3165 3167 data_end = self.start(rev)
3166 3168 if not self._inline:
3167 3169 transaction.add(self._datafile, data_end)
3168 3170 end = rev * self.index.entry_size
3169 3171 else:
3170 3172 end = data_end + (rev * self.index.entry_size)
3171 3173
3172 3174 if self._sidedatafile:
3173 3175 sidedata_end = self.sidedata_cut_off(rev)
3174 3176 transaction.add(self._sidedatafile, sidedata_end)
3175 3177
3176 3178 transaction.add(self._indexfile, end)
3177 3179 if self._docket is not None:
3178 3180 # XXX we could, leverage the docket while stripping. However it is
3179 3181 # not powerfull enough at the time of this comment
3180 3182 self._docket.index_end = end
3181 3183 self._docket.data_end = data_end
3182 3184 self._docket.sidedata_end = sidedata_end
3183 3185 self._docket.write(transaction, stripping=True)
3184 3186
3185 3187 # then reset internal state in memory to forget those revisions
3186 3188 self._revisioncache = None
3187 3189 self._chaininfocache = util.lrucachedict(500)
3188 3190 self._segmentfile.clear_cache()
3189 3191 self._segmentfile_sidedata.clear_cache()
3190 3192
3191 3193 del self.index[rev:-1]
3192 3194
3193 3195 def checksize(self):
3194 3196 """Check size of index and data files
3195 3197
3196 3198 return a (dd, di) tuple.
3197 3199 - dd: extra bytes for the "data" file
3198 3200 - di: extra bytes for the "index" file
3199 3201
3200 3202 A healthy revlog will return (0, 0).
3201 3203 """
3202 3204 expected = 0
3203 3205 if len(self):
3204 3206 expected = max(0, self.end(len(self) - 1))
3205 3207
3206 3208 try:
3207 3209 with self._datafp() as f:
3208 3210 f.seek(0, io.SEEK_END)
3209 3211 actual = f.tell()
3210 3212 dd = actual - expected
3211 3213 except FileNotFoundError:
3212 3214 dd = 0
3213 3215
3214 3216 try:
3215 3217 f = self.opener(self._indexfile)
3216 3218 f.seek(0, io.SEEK_END)
3217 3219 actual = f.tell()
3218 3220 f.close()
3219 3221 s = self.index.entry_size
3220 3222 i = max(0, actual // s)
3221 3223 di = actual - (i * s)
3222 3224 if self._inline:
3223 3225 databytes = 0
3224 3226 for r in self:
3225 3227 databytes += max(0, self.length(r))
3226 3228 dd = 0
3227 3229 di = actual - len(self) * s - databytes
3228 3230 except FileNotFoundError:
3229 3231 di = 0
3230 3232
3231 3233 return (dd, di)
3232 3234
3233 3235 def files(self):
3234 3236 res = [self._indexfile]
3235 3237 if self._docket_file is None:
3236 3238 if not self._inline:
3237 3239 res.append(self._datafile)
3238 3240 else:
3239 3241 res.append(self._docket_file)
3240 3242 res.extend(self._docket.old_index_filepaths(include_empty=False))
3241 3243 if self._docket.data_end:
3242 3244 res.append(self._datafile)
3243 3245 res.extend(self._docket.old_data_filepaths(include_empty=False))
3244 3246 if self._docket.sidedata_end:
3245 3247 res.append(self._sidedatafile)
3246 3248 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3247 3249 return res
3248 3250
3249 3251 def emitrevisions(
3250 3252 self,
3251 3253 nodes,
3252 3254 nodesorder=None,
3253 3255 revisiondata=False,
3254 3256 assumehaveparentrevisions=False,
3255 3257 deltamode=repository.CG_DELTAMODE_STD,
3256 3258 sidedata_helpers=None,
3257 3259 debug_info=None,
3258 3260 ):
3259 3261 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3260 3262 raise error.ProgrammingError(
3261 3263 b'unhandled value for nodesorder: %s' % nodesorder
3262 3264 )
3263 3265
3264 3266 if nodesorder is None and not self.delta_config.general_delta:
3265 3267 nodesorder = b'storage'
3266 3268
3267 3269 if (
3268 3270 not self._storedeltachains
3269 3271 and deltamode != repository.CG_DELTAMODE_PREV
3270 3272 ):
3271 3273 deltamode = repository.CG_DELTAMODE_FULL
3272 3274
3273 3275 return storageutil.emitrevisions(
3274 3276 self,
3275 3277 nodes,
3276 3278 nodesorder,
3277 3279 revlogrevisiondelta,
3278 3280 deltaparentfn=self.deltaparent,
3279 3281 candeltafn=self._candelta,
3280 3282 rawsizefn=self.rawsize,
3281 3283 revdifffn=self.revdiff,
3282 3284 flagsfn=self.flags,
3283 3285 deltamode=deltamode,
3284 3286 revisiondata=revisiondata,
3285 3287 assumehaveparentrevisions=assumehaveparentrevisions,
3286 3288 sidedata_helpers=sidedata_helpers,
3287 3289 debug_info=debug_info,
3288 3290 )
3289 3291
3290 3292 DELTAREUSEALWAYS = b'always'
3291 3293 DELTAREUSESAMEREVS = b'samerevs'
3292 3294 DELTAREUSENEVER = b'never'
3293 3295
3294 3296 DELTAREUSEFULLADD = b'fulladd'
3295 3297
3296 3298 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3297 3299
3298 3300 def clone(
3299 3301 self,
3300 3302 tr,
3301 3303 destrevlog,
3302 3304 addrevisioncb=None,
3303 3305 deltareuse=DELTAREUSESAMEREVS,
3304 3306 forcedeltabothparents=None,
3305 3307 sidedata_helpers=None,
3306 3308 ):
3307 3309 """Copy this revlog to another, possibly with format changes.
3308 3310
3309 3311 The destination revlog will contain the same revisions and nodes.
3310 3312 However, it may not be bit-for-bit identical due to e.g. delta encoding
3311 3313 differences.
3312 3314
3313 3315 The ``deltareuse`` argument control how deltas from the existing revlog
3314 3316 are preserved in the destination revlog. The argument can have the
3315 3317 following values:
3316 3318
3317 3319 DELTAREUSEALWAYS
3318 3320 Deltas will always be reused (if possible), even if the destination
3319 3321 revlog would not select the same revisions for the delta. This is the
3320 3322 fastest mode of operation.
3321 3323 DELTAREUSESAMEREVS
3322 3324 Deltas will be reused if the destination revlog would pick the same
3323 3325 revisions for the delta. This mode strikes a balance between speed
3324 3326 and optimization.
3325 3327 DELTAREUSENEVER
3326 3328 Deltas will never be reused. This is the slowest mode of execution.
3327 3329 This mode can be used to recompute deltas (e.g. if the diff/delta
3328 3330 algorithm changes).
3329 3331 DELTAREUSEFULLADD
3330 3332 Revision will be re-added as if their were new content. This is
3331 3333 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3332 3334 eg: large file detection and handling.
3333 3335
3334 3336 Delta computation can be slow, so the choice of delta reuse policy can
3335 3337 significantly affect run time.
3336 3338
3337 3339 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3338 3340 two extremes. Deltas will be reused if they are appropriate. But if the
3339 3341 delta could choose a better revision, it will do so. This means if you
3340 3342 are converting a non-generaldelta revlog to a generaldelta revlog,
3341 3343 deltas will be recomputed if the delta's parent isn't a parent of the
3342 3344 revision.
3343 3345
3344 3346 In addition to the delta policy, the ``forcedeltabothparents``
3345 3347 argument controls whether to force compute deltas against both parents
3346 3348 for merges. By default, the current default is used.
3347 3349
3348 3350 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3349 3351 `sidedata_helpers`.
3350 3352 """
3351 3353 if deltareuse not in self.DELTAREUSEALL:
3352 3354 raise ValueError(
3353 3355 _(b'value for deltareuse invalid: %s') % deltareuse
3354 3356 )
3355 3357
3356 3358 if len(destrevlog):
3357 3359 raise ValueError(_(b'destination revlog is not empty'))
3358 3360
3359 3361 if getattr(self, 'filteredrevs', None):
3360 3362 raise ValueError(_(b'source revlog has filtered revisions'))
3361 3363 if getattr(destrevlog, 'filteredrevs', None):
3362 3364 raise ValueError(_(b'destination revlog has filtered revisions'))
3363 3365
3364 3366 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3365 3367 # if possible.
3366 3368 old_delta_config = destrevlog.delta_config
3367 3369 destrevlog.delta_config = destrevlog.delta_config.copy()
3368 3370
3369 3371 try:
3370 3372 if deltareuse == self.DELTAREUSEALWAYS:
3371 3373 destrevlog.delta_config.lazy_delta_base = True
3372 3374 destrevlog.delta_config.lazy_delta = True
3373 3375 elif deltareuse == self.DELTAREUSESAMEREVS:
3374 3376 destrevlog.delta_config.lazy_delta_base = False
3375 3377 destrevlog.delta_config.lazy_delta = True
3376 3378 elif deltareuse == self.DELTAREUSENEVER:
3377 3379 destrevlog.delta_config.lazy_delta_base = False
3378 3380 destrevlog.delta_config.lazy_delta = False
3379 3381
3380 3382 delta_both_parents = (
3381 3383 forcedeltabothparents or old_delta_config.delta_both_parents
3382 3384 )
3383 3385 destrevlog.delta_config.delta_both_parents = delta_both_parents
3384 3386
3385 3387 with self.reading():
3386 3388 self._clone(
3387 3389 tr,
3388 3390 destrevlog,
3389 3391 addrevisioncb,
3390 3392 deltareuse,
3391 3393 forcedeltabothparents,
3392 3394 sidedata_helpers,
3393 3395 )
3394 3396
3395 3397 finally:
3396 3398 destrevlog.delta_config = old_delta_config
3397 3399
3398 3400 def _clone(
3399 3401 self,
3400 3402 tr,
3401 3403 destrevlog,
3402 3404 addrevisioncb,
3403 3405 deltareuse,
3404 3406 forcedeltabothparents,
3405 3407 sidedata_helpers,
3406 3408 ):
3407 3409 """perform the core duty of `revlog.clone` after parameter processing"""
3408 3410 write_debug = None
3409 3411 if self._debug_delta:
3410 3412 write_debug = tr._report
3411 3413 deltacomputer = deltautil.deltacomputer(
3412 3414 destrevlog,
3413 3415 write_debug=write_debug,
3414 3416 )
3415 3417 index = self.index
3416 3418 for rev in self:
3417 3419 entry = index[rev]
3418 3420
3419 3421 # Some classes override linkrev to take filtered revs into
3420 3422 # account. Use raw entry from index.
3421 3423 flags = entry[0] & 0xFFFF
3422 3424 linkrev = entry[4]
3423 3425 p1 = index[entry[5]][7]
3424 3426 p2 = index[entry[6]][7]
3425 3427 node = entry[7]
3426 3428
3427 3429 # (Possibly) reuse the delta from the revlog if allowed and
3428 3430 # the revlog chunk is a delta.
3429 3431 cachedelta = None
3430 3432 rawtext = None
3431 3433 if deltareuse == self.DELTAREUSEFULLADD:
3432 3434 text = self._revisiondata(rev)
3433 3435 sidedata = self.sidedata(rev)
3434 3436
3435 3437 if sidedata_helpers is not None:
3436 3438 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3437 3439 self, sidedata_helpers, sidedata, rev
3438 3440 )
3439 3441 flags = flags | new_flags[0] & ~new_flags[1]
3440 3442
3441 3443 destrevlog.addrevision(
3442 3444 text,
3443 3445 tr,
3444 3446 linkrev,
3445 3447 p1,
3446 3448 p2,
3447 3449 cachedelta=cachedelta,
3448 3450 node=node,
3449 3451 flags=flags,
3450 3452 deltacomputer=deltacomputer,
3451 3453 sidedata=sidedata,
3452 3454 )
3453 3455 else:
3454 3456 if destrevlog._lazydelta:
3455 3457 dp = self.deltaparent(rev)
3456 3458 if dp != nullrev:
3457 3459 cachedelta = (dp, bytes(self._chunk(rev)))
3458 3460
3459 3461 sidedata = None
3460 3462 if not cachedelta:
3461 3463 rawtext = self._revisiondata(rev)
3462 3464 sidedata = self.sidedata(rev)
3463 3465 if sidedata is None:
3464 3466 sidedata = self.sidedata(rev)
3465 3467
3466 3468 if sidedata_helpers is not None:
3467 3469 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3468 3470 self, sidedata_helpers, sidedata, rev
3469 3471 )
3470 3472 flags = flags | new_flags[0] & ~new_flags[1]
3471 3473
3472 3474 with destrevlog._writing(tr):
3473 3475 destrevlog._addrevision(
3474 3476 node,
3475 3477 rawtext,
3476 3478 tr,
3477 3479 linkrev,
3478 3480 p1,
3479 3481 p2,
3480 3482 flags,
3481 3483 cachedelta,
3482 3484 deltacomputer=deltacomputer,
3483 3485 sidedata=sidedata,
3484 3486 )
3485 3487
3486 3488 if addrevisioncb:
3487 3489 addrevisioncb(self, rev, node)
3488 3490
3489 3491 def censorrevision(self, tr, censornode, tombstone=b''):
3490 3492 if self._format_version == REVLOGV0:
3491 3493 raise error.RevlogError(
3492 3494 _(b'cannot censor with version %d revlogs')
3493 3495 % self._format_version
3494 3496 )
3495 3497 elif self._format_version == REVLOGV1:
3496 3498 rewrite.v1_censor(self, tr, censornode, tombstone)
3497 3499 else:
3498 3500 rewrite.v2_censor(self, tr, censornode, tombstone)
3499 3501
3500 3502 def verifyintegrity(self, state):
3501 3503 """Verifies the integrity of the revlog.
3502 3504
3503 3505 Yields ``revlogproblem`` instances describing problems that are
3504 3506 found.
3505 3507 """
3506 3508 dd, di = self.checksize()
3507 3509 if dd:
3508 3510 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3509 3511 if di:
3510 3512 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3511 3513
3512 3514 version = self._format_version
3513 3515
3514 3516 # The verifier tells us what version revlog we should be.
3515 3517 if version != state[b'expectedversion']:
3516 3518 yield revlogproblem(
3517 3519 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3518 3520 % (self.display_id, version, state[b'expectedversion'])
3519 3521 )
3520 3522
3521 3523 state[b'skipread'] = set()
3522 3524 state[b'safe_renamed'] = set()
3523 3525
3524 3526 for rev in self:
3525 3527 node = self.node(rev)
3526 3528
3527 3529 # Verify contents. 4 cases to care about:
3528 3530 #
3529 3531 # common: the most common case
3530 3532 # rename: with a rename
3531 3533 # meta: file content starts with b'\1\n', the metadata
3532 3534 # header defined in filelog.py, but without a rename
3533 3535 # ext: content stored externally
3534 3536 #
3535 3537 # More formally, their differences are shown below:
3536 3538 #
3537 3539 # | common | rename | meta | ext
3538 3540 # -------------------------------------------------------
3539 3541 # flags() | 0 | 0 | 0 | not 0
3540 3542 # renamed() | False | True | False | ?
3541 3543 # rawtext[0:2]=='\1\n'| False | True | True | ?
3542 3544 #
3543 3545 # "rawtext" means the raw text stored in revlog data, which
3544 3546 # could be retrieved by "rawdata(rev)". "text"
3545 3547 # mentioned below is "revision(rev)".
3546 3548 #
3547 3549 # There are 3 different lengths stored physically:
3548 3550 # 1. L1: rawsize, stored in revlog index
3549 3551 # 2. L2: len(rawtext), stored in revlog data
3550 3552 # 3. L3: len(text), stored in revlog data if flags==0, or
3551 3553 # possibly somewhere else if flags!=0
3552 3554 #
3553 3555 # L1 should be equal to L2. L3 could be different from them.
3554 3556 # "text" may or may not affect commit hash depending on flag
3555 3557 # processors (see flagutil.addflagprocessor).
3556 3558 #
3557 3559 # | common | rename | meta | ext
3558 3560 # -------------------------------------------------
3559 3561 # rawsize() | L1 | L1 | L1 | L1
3560 3562 # size() | L1 | L2-LM | L1(*) | L1 (?)
3561 3563 # len(rawtext) | L2 | L2 | L2 | L2
3562 3564 # len(text) | L2 | L2 | L2 | L3
3563 3565 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3564 3566 #
3565 3567 # LM: length of metadata, depending on rawtext
3566 3568 # (*): not ideal, see comment in filelog.size
3567 3569 # (?): could be "- len(meta)" if the resolved content has
3568 3570 # rename metadata
3569 3571 #
3570 3572 # Checks needed to be done:
3571 3573 # 1. length check: L1 == L2, in all cases.
3572 3574 # 2. hash check: depending on flag processor, we may need to
3573 3575 # use either "text" (external), or "rawtext" (in revlog).
3574 3576
3575 3577 try:
3576 3578 skipflags = state.get(b'skipflags', 0)
3577 3579 if skipflags:
3578 3580 skipflags &= self.flags(rev)
3579 3581
3580 3582 _verify_revision(self, skipflags, state, node)
3581 3583
3582 3584 l1 = self.rawsize(rev)
3583 3585 l2 = len(self.rawdata(node))
3584 3586
3585 3587 if l1 != l2:
3586 3588 yield revlogproblem(
3587 3589 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3588 3590 node=node,
3589 3591 )
3590 3592
3591 3593 except error.CensoredNodeError:
3592 3594 if state[b'erroroncensored']:
3593 3595 yield revlogproblem(
3594 3596 error=_(b'censored file data'), node=node
3595 3597 )
3596 3598 state[b'skipread'].add(node)
3597 3599 except Exception as e:
3598 3600 yield revlogproblem(
3599 3601 error=_(b'unpacking %s: %s')
3600 3602 % (short(node), stringutil.forcebytestr(e)),
3601 3603 node=node,
3602 3604 )
3603 3605 state[b'skipread'].add(node)
3604 3606
3605 3607 def storageinfo(
3606 3608 self,
3607 3609 exclusivefiles=False,
3608 3610 sharedfiles=False,
3609 3611 revisionscount=False,
3610 3612 trackedsize=False,
3611 3613 storedsize=False,
3612 3614 ):
3613 3615 d = {}
3614 3616
3615 3617 if exclusivefiles:
3616 3618 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3617 3619 if not self._inline:
3618 3620 d[b'exclusivefiles'].append((self.opener, self._datafile))
3619 3621
3620 3622 if sharedfiles:
3621 3623 d[b'sharedfiles'] = []
3622 3624
3623 3625 if revisionscount:
3624 3626 d[b'revisionscount'] = len(self)
3625 3627
3626 3628 if trackedsize:
3627 3629 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3628 3630
3629 3631 if storedsize:
3630 3632 d[b'storedsize'] = sum(
3631 3633 self.opener.stat(path).st_size for path in self.files()
3632 3634 )
3633 3635
3634 3636 return d
3635 3637
3636 3638 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3637 3639 if not self.hassidedata:
3638 3640 return
3639 3641 # revlog formats with sidedata support does not support inline
3640 3642 assert not self._inline
3641 3643 if not helpers[1] and not helpers[2]:
3642 3644 # Nothing to generate or remove
3643 3645 return
3644 3646
3645 3647 new_entries = []
3646 3648 # append the new sidedata
3647 3649 with self._writing(transaction):
3648 3650 ifh, dfh, sdfh = self._writinghandles
3649 3651 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3650 3652
3651 3653 current_offset = sdfh.tell()
3652 3654 for rev in range(startrev, endrev + 1):
3653 3655 entry = self.index[rev]
3654 3656 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3655 3657 store=self,
3656 3658 sidedata_helpers=helpers,
3657 3659 sidedata={},
3658 3660 rev=rev,
3659 3661 )
3660 3662
3661 3663 serialized_sidedata = sidedatautil.serialize_sidedata(
3662 3664 new_sidedata
3663 3665 )
3664 3666
3665 3667 sidedata_compression_mode = COMP_MODE_INLINE
3666 3668 if serialized_sidedata and self.hassidedata:
3667 3669 sidedata_compression_mode = COMP_MODE_PLAIN
3668 3670 h, comp_sidedata = self.compress(serialized_sidedata)
3669 3671 if (
3670 3672 h != b'u'
3671 3673 and comp_sidedata[0] != b'\0'
3672 3674 and len(comp_sidedata) < len(serialized_sidedata)
3673 3675 ):
3674 3676 assert not h
3675 3677 if (
3676 3678 comp_sidedata[0]
3677 3679 == self._docket.default_compression_header
3678 3680 ):
3679 3681 sidedata_compression_mode = COMP_MODE_DEFAULT
3680 3682 serialized_sidedata = comp_sidedata
3681 3683 else:
3682 3684 sidedata_compression_mode = COMP_MODE_INLINE
3683 3685 serialized_sidedata = comp_sidedata
3684 3686 if entry[8] != 0 or entry[9] != 0:
3685 3687 # rewriting entries that already have sidedata is not
3686 3688 # supported yet, because it introduces garbage data in the
3687 3689 # revlog.
3688 3690 msg = b"rewriting existing sidedata is not supported yet"
3689 3691 raise error.Abort(msg)
3690 3692
3691 3693 # Apply (potential) flags to add and to remove after running
3692 3694 # the sidedata helpers
3693 3695 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3694 3696 entry_update = (
3695 3697 current_offset,
3696 3698 len(serialized_sidedata),
3697 3699 new_offset_flags,
3698 3700 sidedata_compression_mode,
3699 3701 )
3700 3702
3701 3703 # the sidedata computation might have move the file cursors around
3702 3704 sdfh.seek(current_offset, os.SEEK_SET)
3703 3705 sdfh.write(serialized_sidedata)
3704 3706 new_entries.append(entry_update)
3705 3707 current_offset += len(serialized_sidedata)
3706 3708 self._docket.sidedata_end = sdfh.tell()
3707 3709
3708 3710 # rewrite the new index entries
3709 3711 ifh.seek(startrev * self.index.entry_size)
3710 3712 for i, e in enumerate(new_entries):
3711 3713 rev = startrev + i
3712 3714 self.index.replace_sidedata_info(rev, *e)
3713 3715 packed = self.index.entry_binary(rev)
3714 3716 if rev == 0 and self._docket is None:
3715 3717 header = self._format_flags | self._format_version
3716 3718 header = self.index.pack_header(header)
3717 3719 packed = header + packed
3718 3720 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now