##// END OF EJS Templates
revlog: remove legacy usage of `_compute_rank`...
marmoute -
r51957:81f38773 default
parent child Browse files
Show More
@@ -1,3724 +1,3724 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 opts = self.opener.options
614 614
615 615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 616 new_header = CHANGELOGV2
617 617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 618 self.feature_config.compute_rank = compute_rank
619 619 elif b'revlogv2' in opts:
620 620 new_header = REVLOGV2
621 621 elif b'revlogv1' in opts:
622 622 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 623 if b'generaldelta' in opts:
624 624 new_header |= FLAG_GENERALDELTA
625 625 elif b'revlogv0' in self.opener.options:
626 626 new_header = REVLOGV0
627 627 else:
628 628 new_header = REVLOG_DEFAULT_VERSION
629 629
630 630 mmapindexthreshold = None
631 631 if self.data_config.mmap_large_index:
632 632 mmapindexthreshold = self.data_config.mmap_index_threshold
633 633 if self.feature_config.enable_ellipsis:
634 634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
635 635
636 636 # revlog v0 doesn't have flag processors
637 637 for flag, processor in opts.get(b'flagprocessors', {}).items():
638 638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
639 639
640 640 chunk_cache_size = self.data_config.chunk_cache_size
641 641 if chunk_cache_size <= 0:
642 642 raise error.RevlogError(
643 643 _(b'revlog chunk cache size %r is not greater than 0')
644 644 % chunk_cache_size
645 645 )
646 646 elif chunk_cache_size & (chunk_cache_size - 1):
647 647 raise error.RevlogError(
648 648 _(b'revlog chunk cache size %r is not a power of 2')
649 649 % chunk_cache_size
650 650 )
651 651 force_nodemap = opts.get(b'devel-force-nodemap', False)
652 652 return new_header, mmapindexthreshold, force_nodemap
653 653
654 654 def _get_data(self, filepath, mmap_threshold, size=None):
655 655 """return a file content with or without mmap
656 656
657 657 If the file is missing return the empty string"""
658 658 try:
659 659 with self.opener(filepath) as fp:
660 660 if mmap_threshold is not None:
661 661 file_size = self.opener.fstat(fp).st_size
662 662 if file_size >= mmap_threshold:
663 663 if size is not None:
664 664 # avoid potentiel mmap crash
665 665 size = min(file_size, size)
666 666 # TODO: should .close() to release resources without
667 667 # relying on Python GC
668 668 if size is None:
669 669 return util.buffer(util.mmapread(fp))
670 670 else:
671 671 return util.buffer(util.mmapread(fp, size))
672 672 if size is None:
673 673 return fp.read()
674 674 else:
675 675 return fp.read(size)
676 676 except FileNotFoundError:
677 677 return b''
678 678
679 679 def get_streams(self, max_linkrev, force_inline=False):
680 680 """return a list of streams that represent this revlog
681 681
682 682 This is used by stream-clone to do bytes to bytes copies of a repository.
683 683
684 684 This streams data for all revisions that refer to a changelog revision up
685 685 to `max_linkrev`.
686 686
687 687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
688 688
689 689 It returns is a list of three-tuple:
690 690
691 691 [
692 692 (filename, bytes_stream, stream_size),
693 693 …
694 694 ]
695 695 """
696 696 n = len(self)
697 697 index = self.index
698 698 while n > 0:
699 699 linkrev = index[n - 1][4]
700 700 if linkrev < max_linkrev:
701 701 break
702 702 # note: this loop will rarely go through multiple iterations, since
703 703 # it only traverses commits created during the current streaming
704 704 # pull operation.
705 705 #
706 706 # If this become a problem, using a binary search should cap the
707 707 # runtime of this.
708 708 n = n - 1
709 709 if n == 0:
710 710 # no data to send
711 711 return []
712 712 index_size = n * index.entry_size
713 713 data_size = self.end(n - 1)
714 714
715 715 # XXX we might have been split (or stripped) since the object
716 716 # initialization, We need to close this race too, but having a way to
717 717 # pre-open the file we feed to the revlog and never closing them before
718 718 # we are done streaming.
719 719
720 720 if self._inline:
721 721
722 722 def get_stream():
723 723 with self._indexfp() as fp:
724 724 yield None
725 725 size = index_size + data_size
726 726 if size <= 65536:
727 727 yield fp.read(size)
728 728 else:
729 729 yield from util.filechunkiter(fp, limit=size)
730 730
731 731 inline_stream = get_stream()
732 732 next(inline_stream)
733 733 return [
734 734 (self._indexfile, inline_stream, index_size + data_size),
735 735 ]
736 736 elif force_inline:
737 737
738 738 def get_stream():
739 739 with self.reading():
740 740 yield None
741 741
742 742 for rev in range(n):
743 743 idx = self.index.entry_binary(rev)
744 744 if rev == 0 and self._docket is None:
745 745 # re-inject the inline flag
746 746 header = self._format_flags
747 747 header |= self._format_version
748 748 header |= FLAG_INLINE_DATA
749 749 header = self.index.pack_header(header)
750 750 idx = header + idx
751 751 yield idx
752 752 yield self._getsegmentforrevs(rev, rev)[1]
753 753
754 754 inline_stream = get_stream()
755 755 next(inline_stream)
756 756 return [
757 757 (self._indexfile, inline_stream, index_size + data_size),
758 758 ]
759 759 else:
760 760
761 761 def get_index_stream():
762 762 with self._indexfp() as fp:
763 763 yield None
764 764 if index_size <= 65536:
765 765 yield fp.read(index_size)
766 766 else:
767 767 yield from util.filechunkiter(fp, limit=index_size)
768 768
769 769 def get_data_stream():
770 770 with self._datafp() as fp:
771 771 yield None
772 772 if data_size <= 65536:
773 773 yield fp.read(data_size)
774 774 else:
775 775 yield from util.filechunkiter(fp, limit=data_size)
776 776
777 777 index_stream = get_index_stream()
778 778 next(index_stream)
779 779 data_stream = get_data_stream()
780 780 next(data_stream)
781 781 return [
782 782 (self._datafile, data_stream, data_size),
783 783 (self._indexfile, index_stream, index_size),
784 784 ]
785 785
786 786 def _loadindex(self, docket=None):
787 787
788 788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
789 789
790 790 if self.postfix is not None:
791 791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
792 792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
793 793 entry_point = b'%s.i.a' % self.radix
794 794 elif self._try_split and self.opener.exists(self._split_index_file):
795 795 entry_point = self._split_index_file
796 796 else:
797 797 entry_point = b'%s.i' % self.radix
798 798
799 799 if docket is not None:
800 800 self._docket = docket
801 801 self._docket_file = entry_point
802 802 else:
803 803 self._initempty = True
804 804 entry_data = self._get_data(entry_point, mmapindexthreshold)
805 805 if len(entry_data) > 0:
806 806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
807 807 self._initempty = False
808 808 else:
809 809 header = new_header
810 810
811 811 self._format_flags = header & ~0xFFFF
812 812 self._format_version = header & 0xFFFF
813 813
814 814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
815 815 if supported_flags is None:
816 816 msg = _(b'unknown version (%d) in revlog %s')
817 817 msg %= (self._format_version, self.display_id)
818 818 raise error.RevlogError(msg)
819 819 elif self._format_flags & ~supported_flags:
820 820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
821 821 display_flag = self._format_flags >> 16
822 822 msg %= (display_flag, self._format_version, self.display_id)
823 823 raise error.RevlogError(msg)
824 824
825 825 features = FEATURES_BY_VERSION[self._format_version]
826 826 self._inline = features[b'inline'](self._format_flags)
827 827 self.delta_config.general_delta = features[b'generaldelta'](
828 828 self._format_flags
829 829 )
830 830 self.feature_config.has_side_data = features[b'sidedata']
831 831
832 832 if not features[b'docket']:
833 833 self._indexfile = entry_point
834 834 index_data = entry_data
835 835 else:
836 836 self._docket_file = entry_point
837 837 if self._initempty:
838 838 self._docket = docketutil.default_docket(self, header)
839 839 else:
840 840 self._docket = docketutil.parse_docket(
841 841 self, entry_data, use_pending=self._trypending
842 842 )
843 843
844 844 if self._docket is not None:
845 845 self._indexfile = self._docket.index_filepath()
846 846 index_data = b''
847 847 index_size = self._docket.index_end
848 848 if index_size > 0:
849 849 index_data = self._get_data(
850 850 self._indexfile, mmapindexthreshold, size=index_size
851 851 )
852 852 if len(index_data) < index_size:
853 853 msg = _(b'too few index data for %s: got %d, expected %d')
854 854 msg %= (self.display_id, len(index_data), index_size)
855 855 raise error.RevlogError(msg)
856 856
857 857 self._inline = False
858 858 # generaldelta implied by version 2 revlogs.
859 859 self.delta_config.general_delta = True
860 860 # the logic for persistent nodemap will be dealt with within the
861 861 # main docket, so disable it for now.
862 862 self._nodemap_file = None
863 863
864 864 if self._docket is not None:
865 865 self._datafile = self._docket.data_filepath()
866 866 self._sidedatafile = self._docket.sidedata_filepath()
867 867 elif self.postfix is None:
868 868 self._datafile = b'%s.d' % self.radix
869 869 else:
870 870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
871 871
872 872 self.nodeconstants = sha1nodeconstants
873 873 self.nullid = self.nodeconstants.nullid
874 874
875 875 # sparse-revlog can't be on without general-delta (issue6056)
876 876 if not self.delta_config.general_delta:
877 877 self.delta_config.sparse_revlog = False
878 878
879 879 self._storedeltachains = True
880 880
881 881 devel_nodemap = (
882 882 self._nodemap_file
883 883 and force_nodemap
884 884 and parse_index_v1_nodemap is not None
885 885 )
886 886
887 887 use_rust_index = False
888 888 if rustrevlog is not None:
889 889 if self._nodemap_file is not None:
890 890 use_rust_index = True
891 891 else:
892 892 use_rust_index = self.opener.options.get(b'rust.index')
893 893
894 894 self._parse_index = parse_index_v1
895 895 if self._format_version == REVLOGV0:
896 896 self._parse_index = revlogv0.parse_index_v0
897 897 elif self._format_version == REVLOGV2:
898 898 self._parse_index = parse_index_v2
899 899 elif self._format_version == CHANGELOGV2:
900 900 self._parse_index = parse_index_cl_v2
901 901 elif devel_nodemap:
902 902 self._parse_index = parse_index_v1_nodemap
903 903 elif use_rust_index:
904 904 self._parse_index = parse_index_v1_mixed
905 905 try:
906 906 d = self._parse_index(index_data, self._inline)
907 907 index, chunkcache = d
908 908 use_nodemap = (
909 909 not self._inline
910 910 and self._nodemap_file is not None
911 911 and hasattr(index, 'update_nodemap_data')
912 912 )
913 913 if use_nodemap:
914 914 nodemap_data = nodemaputil.persisted_data(self)
915 915 if nodemap_data is not None:
916 916 docket = nodemap_data[0]
917 917 if (
918 918 len(d[0]) > docket.tip_rev
919 919 and d[0][docket.tip_rev][7] == docket.tip_node
920 920 ):
921 921 # no changelog tampering
922 922 self._nodemap_docket = docket
923 923 index.update_nodemap_data(*nodemap_data)
924 924 except (ValueError, IndexError):
925 925 raise error.RevlogError(
926 926 _(b"index %s is corrupted") % self.display_id
927 927 )
928 928 self.index = index
929 929 self._segmentfile = randomaccessfile.randomaccessfile(
930 930 self.opener,
931 931 (self._indexfile if self._inline else self._datafile),
932 932 self.data_config.chunk_cache_size,
933 933 chunkcache,
934 934 )
935 935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
936 936 self.opener,
937 937 self._sidedatafile,
938 938 self.data_config.chunk_cache_size,
939 939 )
940 940 # revnum -> (chain-length, sum-delta-length)
941 941 self._chaininfocache = util.lrucachedict(500)
942 942 # revlog header -> revlog compressor
943 943 self._decompressors = {}
944 944
945 945 def get_revlog(self):
946 946 """simple function to mirror API of other not-really-revlog API"""
947 947 return self
948 948
949 949 @util.propertycache
950 950 def revlog_kind(self):
951 951 return self.target[0]
952 952
953 953 @util.propertycache
954 954 def display_id(self):
955 955 """The public facing "ID" of the revlog that we use in message"""
956 956 if self.revlog_kind == KIND_FILELOG:
957 957 # Reference the file without the "data/" prefix, so it is familiar
958 958 # to the user.
959 959 return self.target[1]
960 960 else:
961 961 return self.radix
962 962
963 963 def _get_decompressor(self, t):
964 964 try:
965 965 compressor = self._decompressors[t]
966 966 except KeyError:
967 967 try:
968 968 engine = util.compengines.forrevlogheader(t)
969 969 compressor = engine.revlogcompressor(
970 970 self.feature_config.compression_engine_options
971 971 )
972 972 self._decompressors[t] = compressor
973 973 except KeyError:
974 974 raise error.RevlogError(
975 975 _(b'unknown compression type %s') % binascii.hexlify(t)
976 976 )
977 977 return compressor
978 978
979 979 @util.propertycache
980 980 def _compressor(self):
981 981 engine = util.compengines[self.feature_config.compression_engine]
982 982 return engine.revlogcompressor(
983 983 self.feature_config.compression_engine_options
984 984 )
985 985
986 986 @util.propertycache
987 987 def _decompressor(self):
988 988 """the default decompressor"""
989 989 if self._docket is None:
990 990 return None
991 991 t = self._docket.default_compression_header
992 992 c = self._get_decompressor(t)
993 993 return c.decompress
994 994
995 995 def _indexfp(self):
996 996 """file object for the revlog's index file"""
997 997 return self.opener(self._indexfile, mode=b"r")
998 998
999 999 def __index_write_fp(self):
1000 1000 # You should not use this directly and use `_writing` instead
1001 1001 try:
1002 1002 f = self.opener(
1003 1003 self._indexfile,
1004 1004 mode=b"r+",
1005 1005 checkambig=self.data_config.check_ambig,
1006 1006 )
1007 1007 if self._docket is None:
1008 1008 f.seek(0, os.SEEK_END)
1009 1009 else:
1010 1010 f.seek(self._docket.index_end, os.SEEK_SET)
1011 1011 return f
1012 1012 except FileNotFoundError:
1013 1013 return self.opener(
1014 1014 self._indexfile,
1015 1015 mode=b"w+",
1016 1016 checkambig=self.data_config.check_ambig,
1017 1017 )
1018 1018
1019 1019 def __index_new_fp(self):
1020 1020 # You should not use this unless you are upgrading from inline revlog
1021 1021 return self.opener(
1022 1022 self._indexfile,
1023 1023 mode=b"w",
1024 1024 checkambig=self.data_config.check_ambig,
1025 1025 atomictemp=True,
1026 1026 )
1027 1027
1028 1028 def _datafp(self, mode=b'r'):
1029 1029 """file object for the revlog's data file"""
1030 1030 return self.opener(self._datafile, mode=mode)
1031 1031
1032 1032 @contextlib.contextmanager
1033 1033 def _sidedatareadfp(self):
1034 1034 """file object suitable to read sidedata"""
1035 1035 if self._writinghandles:
1036 1036 yield self._writinghandles[2]
1037 1037 else:
1038 1038 with self.opener(self._sidedatafile) as fp:
1039 1039 yield fp
1040 1040
1041 1041 def tiprev(self):
1042 1042 return len(self.index) - 1
1043 1043
1044 1044 def tip(self):
1045 1045 return self.node(self.tiprev())
1046 1046
1047 1047 def __contains__(self, rev):
1048 1048 return 0 <= rev < len(self)
1049 1049
1050 1050 def __len__(self):
1051 1051 return len(self.index)
1052 1052
1053 1053 def __iter__(self):
1054 1054 return iter(range(len(self)))
1055 1055
1056 1056 def revs(self, start=0, stop=None):
1057 1057 """iterate over all rev in this revlog (from start to stop)"""
1058 1058 return storageutil.iterrevs(len(self), start=start, stop=stop)
1059 1059
1060 1060 def hasnode(self, node):
1061 1061 try:
1062 1062 self.rev(node)
1063 1063 return True
1064 1064 except KeyError:
1065 1065 return False
1066 1066
1067 1067 def _candelta(self, baserev, rev):
1068 1068 """whether two revisions (baserev, rev) can be delta-ed or not"""
1069 1069 # Disable delta if either rev requires a content-changing flag
1070 1070 # processor (ex. LFS). This is because such flag processor can alter
1071 1071 # the rawtext content that the delta will be based on, and two clients
1072 1072 # could have a same revlog node with different flags (i.e. different
1073 1073 # rawtext contents) and the delta could be incompatible.
1074 1074 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1075 1075 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1076 1076 ):
1077 1077 return False
1078 1078 return True
1079 1079
1080 1080 def update_caches(self, transaction):
1081 1081 """update on disk cache
1082 1082
1083 1083 If a transaction is passed, the update may be delayed to transaction
1084 1084 commit."""
1085 1085 if self._nodemap_file is not None:
1086 1086 if transaction is None:
1087 1087 nodemaputil.update_persistent_nodemap(self)
1088 1088 else:
1089 1089 nodemaputil.setup_persistent_nodemap(transaction, self)
1090 1090
1091 1091 def clearcaches(self):
1092 1092 """Clear in-memory caches"""
1093 1093 self._revisioncache = None
1094 1094 self._chainbasecache.clear()
1095 1095 self._segmentfile.clear_cache()
1096 1096 self._segmentfile_sidedata.clear_cache()
1097 1097 self._pcache = {}
1098 1098 self._nodemap_docket = None
1099 1099 self.index.clearcaches()
1100 1100 # The python code is the one responsible for validating the docket, we
1101 1101 # end up having to refresh it here.
1102 1102 use_nodemap = (
1103 1103 not self._inline
1104 1104 and self._nodemap_file is not None
1105 1105 and hasattr(self.index, 'update_nodemap_data')
1106 1106 )
1107 1107 if use_nodemap:
1108 1108 nodemap_data = nodemaputil.persisted_data(self)
1109 1109 if nodemap_data is not None:
1110 1110 self._nodemap_docket = nodemap_data[0]
1111 1111 self.index.update_nodemap_data(*nodemap_data)
1112 1112
1113 1113 def rev(self, node):
1114 1114 """return the revision number associated with a <nodeid>"""
1115 1115 try:
1116 1116 return self.index.rev(node)
1117 1117 except TypeError:
1118 1118 raise
1119 1119 except error.RevlogError:
1120 1120 # parsers.c radix tree lookup failed
1121 1121 if (
1122 1122 node == self.nodeconstants.wdirid
1123 1123 or node in self.nodeconstants.wdirfilenodeids
1124 1124 ):
1125 1125 raise error.WdirUnsupported
1126 1126 raise error.LookupError(node, self.display_id, _(b'no node'))
1127 1127
1128 1128 # Accessors for index entries.
1129 1129
1130 1130 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1131 1131 # are flags.
1132 1132 def start(self, rev):
1133 1133 return int(self.index[rev][0] >> 16)
1134 1134
1135 1135 def sidedata_cut_off(self, rev):
1136 1136 sd_cut_off = self.index[rev][8]
1137 1137 if sd_cut_off != 0:
1138 1138 return sd_cut_off
1139 1139 # This is some annoying dance, because entries without sidedata
1140 1140 # currently use 0 as their ofsset. (instead of previous-offset +
1141 1141 # previous-size)
1142 1142 #
1143 1143 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1144 1144 # In the meantime, we need this.
1145 1145 while 0 <= rev:
1146 1146 e = self.index[rev]
1147 1147 if e[9] != 0:
1148 1148 return e[8] + e[9]
1149 1149 rev -= 1
1150 1150 return 0
1151 1151
1152 1152 def flags(self, rev):
1153 1153 return self.index[rev][0] & 0xFFFF
1154 1154
1155 1155 def length(self, rev):
1156 1156 return self.index[rev][1]
1157 1157
1158 1158 def sidedata_length(self, rev):
1159 1159 if not self.feature_config.has_side_data:
1160 1160 return 0
1161 1161 return self.index[rev][9]
1162 1162
1163 1163 def rawsize(self, rev):
1164 1164 """return the length of the uncompressed text for a given revision"""
1165 1165 l = self.index[rev][2]
1166 1166 if l >= 0:
1167 1167 return l
1168 1168
1169 1169 t = self.rawdata(rev)
1170 1170 return len(t)
1171 1171
1172 1172 def size(self, rev):
1173 1173 """length of non-raw text (processed by a "read" flag processor)"""
1174 1174 # fast path: if no "read" flag processor could change the content,
1175 1175 # size is rawsize. note: ELLIPSIS is known to not change the content.
1176 1176 flags = self.flags(rev)
1177 1177 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1178 1178 return self.rawsize(rev)
1179 1179
1180 1180 return len(self.revision(rev))
1181 1181
1182 1182 def fast_rank(self, rev):
1183 1183 """Return the rank of a revision if already known, or None otherwise.
1184 1184
1185 1185 The rank of a revision is the size of the sub-graph it defines as a
1186 1186 head. Equivalently, the rank of a revision `r` is the size of the set
1187 1187 `ancestors(r)`, `r` included.
1188 1188
1189 1189 This method returns the rank retrieved from the revlog in constant
1190 1190 time. It makes no attempt at computing unknown values for versions of
1191 1191 the revlog which do not persist the rank.
1192 1192 """
1193 1193 rank = self.index[rev][ENTRY_RANK]
1194 1194 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1195 1195 return None
1196 1196 if rev == nullrev:
1197 1197 return 0 # convention
1198 1198 return rank
1199 1199
1200 1200 def chainbase(self, rev):
1201 1201 base = self._chainbasecache.get(rev)
1202 1202 if base is not None:
1203 1203 return base
1204 1204
1205 1205 index = self.index
1206 1206 iterrev = rev
1207 1207 base = index[iterrev][3]
1208 1208 while base != iterrev:
1209 1209 iterrev = base
1210 1210 base = index[iterrev][3]
1211 1211
1212 1212 self._chainbasecache[rev] = base
1213 1213 return base
1214 1214
1215 1215 def linkrev(self, rev):
1216 1216 return self.index[rev][4]
1217 1217
1218 1218 def parentrevs(self, rev):
1219 1219 try:
1220 1220 entry = self.index[rev]
1221 1221 except IndexError:
1222 1222 if rev == wdirrev:
1223 1223 raise error.WdirUnsupported
1224 1224 raise
1225 1225
1226 1226 if self.canonical_parent_order and entry[5] == nullrev:
1227 1227 return entry[6], entry[5]
1228 1228 else:
1229 1229 return entry[5], entry[6]
1230 1230
1231 1231 # fast parentrevs(rev) where rev isn't filtered
1232 1232 _uncheckedparentrevs = parentrevs
1233 1233
1234 1234 def node(self, rev):
1235 1235 try:
1236 1236 return self.index[rev][7]
1237 1237 except IndexError:
1238 1238 if rev == wdirrev:
1239 1239 raise error.WdirUnsupported
1240 1240 raise
1241 1241
1242 1242 # Derived from index values.
1243 1243
1244 1244 def end(self, rev):
1245 1245 return self.start(rev) + self.length(rev)
1246 1246
1247 1247 def parents(self, node):
1248 1248 i = self.index
1249 1249 d = i[self.rev(node)]
1250 1250 # inline node() to avoid function call overhead
1251 1251 if self.canonical_parent_order and d[5] == self.nullid:
1252 1252 return i[d[6]][7], i[d[5]][7]
1253 1253 else:
1254 1254 return i[d[5]][7], i[d[6]][7]
1255 1255
1256 1256 def chainlen(self, rev):
1257 1257 return self._chaininfo(rev)[0]
1258 1258
1259 1259 def _chaininfo(self, rev):
1260 1260 chaininfocache = self._chaininfocache
1261 1261 if rev in chaininfocache:
1262 1262 return chaininfocache[rev]
1263 1263 index = self.index
1264 1264 generaldelta = self.delta_config.general_delta
1265 1265 iterrev = rev
1266 1266 e = index[iterrev]
1267 1267 clen = 0
1268 1268 compresseddeltalen = 0
1269 1269 while iterrev != e[3]:
1270 1270 clen += 1
1271 1271 compresseddeltalen += e[1]
1272 1272 if generaldelta:
1273 1273 iterrev = e[3]
1274 1274 else:
1275 1275 iterrev -= 1
1276 1276 if iterrev in chaininfocache:
1277 1277 t = chaininfocache[iterrev]
1278 1278 clen += t[0]
1279 1279 compresseddeltalen += t[1]
1280 1280 break
1281 1281 e = index[iterrev]
1282 1282 else:
1283 1283 # Add text length of base since decompressing that also takes
1284 1284 # work. For cache hits the length is already included.
1285 1285 compresseddeltalen += e[1]
1286 1286 r = (clen, compresseddeltalen)
1287 1287 chaininfocache[rev] = r
1288 1288 return r
1289 1289
1290 1290 def _deltachain(self, rev, stoprev=None):
1291 1291 """Obtain the delta chain for a revision.
1292 1292
1293 1293 ``stoprev`` specifies a revision to stop at. If not specified, we
1294 1294 stop at the base of the chain.
1295 1295
1296 1296 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1297 1297 revs in ascending order and ``stopped`` is a bool indicating whether
1298 1298 ``stoprev`` was hit.
1299 1299 """
1300 1300 generaldelta = self.delta_config.general_delta
1301 1301 # Try C implementation.
1302 1302 try:
1303 1303 return self.index.deltachain(rev, stoprev, generaldelta)
1304 1304 except AttributeError:
1305 1305 pass
1306 1306
1307 1307 chain = []
1308 1308
1309 1309 # Alias to prevent attribute lookup in tight loop.
1310 1310 index = self.index
1311 1311
1312 1312 iterrev = rev
1313 1313 e = index[iterrev]
1314 1314 while iterrev != e[3] and iterrev != stoprev:
1315 1315 chain.append(iterrev)
1316 1316 if generaldelta:
1317 1317 iterrev = e[3]
1318 1318 else:
1319 1319 iterrev -= 1
1320 1320 e = index[iterrev]
1321 1321
1322 1322 if iterrev == stoprev:
1323 1323 stopped = True
1324 1324 else:
1325 1325 chain.append(iterrev)
1326 1326 stopped = False
1327 1327
1328 1328 chain.reverse()
1329 1329 return chain, stopped
1330 1330
1331 1331 def ancestors(self, revs, stoprev=0, inclusive=False):
1332 1332 """Generate the ancestors of 'revs' in reverse revision order.
1333 1333 Does not generate revs lower than stoprev.
1334 1334
1335 1335 See the documentation for ancestor.lazyancestors for more details."""
1336 1336
1337 1337 # first, make sure start revisions aren't filtered
1338 1338 revs = list(revs)
1339 1339 checkrev = self.node
1340 1340 for r in revs:
1341 1341 checkrev(r)
1342 1342 # and we're sure ancestors aren't filtered as well
1343 1343
1344 1344 if rustancestor is not None and self.index.rust_ext_compat:
1345 1345 lazyancestors = rustancestor.LazyAncestors
1346 1346 arg = self.index
1347 1347 else:
1348 1348 lazyancestors = ancestor.lazyancestors
1349 1349 arg = self._uncheckedparentrevs
1350 1350 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1351 1351
1352 1352 def descendants(self, revs):
1353 1353 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1354 1354
1355 1355 def findcommonmissing(self, common=None, heads=None):
1356 1356 """Return a tuple of the ancestors of common and the ancestors of heads
1357 1357 that are not ancestors of common. In revset terminology, we return the
1358 1358 tuple:
1359 1359
1360 1360 ::common, (::heads) - (::common)
1361 1361
1362 1362 The list is sorted by revision number, meaning it is
1363 1363 topologically sorted.
1364 1364
1365 1365 'heads' and 'common' are both lists of node IDs. If heads is
1366 1366 not supplied, uses all of the revlog's heads. If common is not
1367 1367 supplied, uses nullid."""
1368 1368 if common is None:
1369 1369 common = [self.nullid]
1370 1370 if heads is None:
1371 1371 heads = self.heads()
1372 1372
1373 1373 common = [self.rev(n) for n in common]
1374 1374 heads = [self.rev(n) for n in heads]
1375 1375
1376 1376 # we want the ancestors, but inclusive
1377 1377 class lazyset:
1378 1378 def __init__(self, lazyvalues):
1379 1379 self.addedvalues = set()
1380 1380 self.lazyvalues = lazyvalues
1381 1381
1382 1382 def __contains__(self, value):
1383 1383 return value in self.addedvalues or value in self.lazyvalues
1384 1384
1385 1385 def __iter__(self):
1386 1386 added = self.addedvalues
1387 1387 for r in added:
1388 1388 yield r
1389 1389 for r in self.lazyvalues:
1390 1390 if not r in added:
1391 1391 yield r
1392 1392
1393 1393 def add(self, value):
1394 1394 self.addedvalues.add(value)
1395 1395
1396 1396 def update(self, values):
1397 1397 self.addedvalues.update(values)
1398 1398
1399 1399 has = lazyset(self.ancestors(common))
1400 1400 has.add(nullrev)
1401 1401 has.update(common)
1402 1402
1403 1403 # take all ancestors from heads that aren't in has
1404 1404 missing = set()
1405 1405 visit = collections.deque(r for r in heads if r not in has)
1406 1406 while visit:
1407 1407 r = visit.popleft()
1408 1408 if r in missing:
1409 1409 continue
1410 1410 else:
1411 1411 missing.add(r)
1412 1412 for p in self.parentrevs(r):
1413 1413 if p not in has:
1414 1414 visit.append(p)
1415 1415 missing = list(missing)
1416 1416 missing.sort()
1417 1417 return has, [self.node(miss) for miss in missing]
1418 1418
1419 1419 def incrementalmissingrevs(self, common=None):
1420 1420 """Return an object that can be used to incrementally compute the
1421 1421 revision numbers of the ancestors of arbitrary sets that are not
1422 1422 ancestors of common. This is an ancestor.incrementalmissingancestors
1423 1423 object.
1424 1424
1425 1425 'common' is a list of revision numbers. If common is not supplied, uses
1426 1426 nullrev.
1427 1427 """
1428 1428 if common is None:
1429 1429 common = [nullrev]
1430 1430
1431 1431 if rustancestor is not None and self.index.rust_ext_compat:
1432 1432 return rustancestor.MissingAncestors(self.index, common)
1433 1433 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1434 1434
1435 1435 def findmissingrevs(self, common=None, heads=None):
1436 1436 """Return the revision numbers of the ancestors of heads that
1437 1437 are not ancestors of common.
1438 1438
1439 1439 More specifically, return a list of revision numbers corresponding to
1440 1440 nodes N such that every N satisfies the following constraints:
1441 1441
1442 1442 1. N is an ancestor of some node in 'heads'
1443 1443 2. N is not an ancestor of any node in 'common'
1444 1444
1445 1445 The list is sorted by revision number, meaning it is
1446 1446 topologically sorted.
1447 1447
1448 1448 'heads' and 'common' are both lists of revision numbers. If heads is
1449 1449 not supplied, uses all of the revlog's heads. If common is not
1450 1450 supplied, uses nullid."""
1451 1451 if common is None:
1452 1452 common = [nullrev]
1453 1453 if heads is None:
1454 1454 heads = self.headrevs()
1455 1455
1456 1456 inc = self.incrementalmissingrevs(common=common)
1457 1457 return inc.missingancestors(heads)
1458 1458
1459 1459 def findmissing(self, common=None, heads=None):
1460 1460 """Return the ancestors of heads that are not ancestors of common.
1461 1461
1462 1462 More specifically, return a list of nodes N such that every N
1463 1463 satisfies the following constraints:
1464 1464
1465 1465 1. N is an ancestor of some node in 'heads'
1466 1466 2. N is not an ancestor of any node in 'common'
1467 1467
1468 1468 The list is sorted by revision number, meaning it is
1469 1469 topologically sorted.
1470 1470
1471 1471 'heads' and 'common' are both lists of node IDs. If heads is
1472 1472 not supplied, uses all of the revlog's heads. If common is not
1473 1473 supplied, uses nullid."""
1474 1474 if common is None:
1475 1475 common = [self.nullid]
1476 1476 if heads is None:
1477 1477 heads = self.heads()
1478 1478
1479 1479 common = [self.rev(n) for n in common]
1480 1480 heads = [self.rev(n) for n in heads]
1481 1481
1482 1482 inc = self.incrementalmissingrevs(common=common)
1483 1483 return [self.node(r) for r in inc.missingancestors(heads)]
1484 1484
1485 1485 def nodesbetween(self, roots=None, heads=None):
1486 1486 """Return a topological path from 'roots' to 'heads'.
1487 1487
1488 1488 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1489 1489 topologically sorted list of all nodes N that satisfy both of
1490 1490 these constraints:
1491 1491
1492 1492 1. N is a descendant of some node in 'roots'
1493 1493 2. N is an ancestor of some node in 'heads'
1494 1494
1495 1495 Every node is considered to be both a descendant and an ancestor
1496 1496 of itself, so every reachable node in 'roots' and 'heads' will be
1497 1497 included in 'nodes'.
1498 1498
1499 1499 'outroots' is the list of reachable nodes in 'roots', i.e., the
1500 1500 subset of 'roots' that is returned in 'nodes'. Likewise,
1501 1501 'outheads' is the subset of 'heads' that is also in 'nodes'.
1502 1502
1503 1503 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1504 1504 unspecified, uses nullid as the only root. If 'heads' is
1505 1505 unspecified, uses list of all of the revlog's heads."""
1506 1506 nonodes = ([], [], [])
1507 1507 if roots is not None:
1508 1508 roots = list(roots)
1509 1509 if not roots:
1510 1510 return nonodes
1511 1511 lowestrev = min([self.rev(n) for n in roots])
1512 1512 else:
1513 1513 roots = [self.nullid] # Everybody's a descendant of nullid
1514 1514 lowestrev = nullrev
1515 1515 if (lowestrev == nullrev) and (heads is None):
1516 1516 # We want _all_ the nodes!
1517 1517 return (
1518 1518 [self.node(r) for r in self],
1519 1519 [self.nullid],
1520 1520 list(self.heads()),
1521 1521 )
1522 1522 if heads is None:
1523 1523 # All nodes are ancestors, so the latest ancestor is the last
1524 1524 # node.
1525 1525 highestrev = len(self) - 1
1526 1526 # Set ancestors to None to signal that every node is an ancestor.
1527 1527 ancestors = None
1528 1528 # Set heads to an empty dictionary for later discovery of heads
1529 1529 heads = {}
1530 1530 else:
1531 1531 heads = list(heads)
1532 1532 if not heads:
1533 1533 return nonodes
1534 1534 ancestors = set()
1535 1535 # Turn heads into a dictionary so we can remove 'fake' heads.
1536 1536 # Also, later we will be using it to filter out the heads we can't
1537 1537 # find from roots.
1538 1538 heads = dict.fromkeys(heads, False)
1539 1539 # Start at the top and keep marking parents until we're done.
1540 1540 nodestotag = set(heads)
1541 1541 # Remember where the top was so we can use it as a limit later.
1542 1542 highestrev = max([self.rev(n) for n in nodestotag])
1543 1543 while nodestotag:
1544 1544 # grab a node to tag
1545 1545 n = nodestotag.pop()
1546 1546 # Never tag nullid
1547 1547 if n == self.nullid:
1548 1548 continue
1549 1549 # A node's revision number represents its place in a
1550 1550 # topologically sorted list of nodes.
1551 1551 r = self.rev(n)
1552 1552 if r >= lowestrev:
1553 1553 if n not in ancestors:
1554 1554 # If we are possibly a descendant of one of the roots
1555 1555 # and we haven't already been marked as an ancestor
1556 1556 ancestors.add(n) # Mark as ancestor
1557 1557 # Add non-nullid parents to list of nodes to tag.
1558 1558 nodestotag.update(
1559 1559 [p for p in self.parents(n) if p != self.nullid]
1560 1560 )
1561 1561 elif n in heads: # We've seen it before, is it a fake head?
1562 1562 # So it is, real heads should not be the ancestors of
1563 1563 # any other heads.
1564 1564 heads.pop(n)
1565 1565 if not ancestors:
1566 1566 return nonodes
1567 1567 # Now that we have our set of ancestors, we want to remove any
1568 1568 # roots that are not ancestors.
1569 1569
1570 1570 # If one of the roots was nullid, everything is included anyway.
1571 1571 if lowestrev > nullrev:
1572 1572 # But, since we weren't, let's recompute the lowest rev to not
1573 1573 # include roots that aren't ancestors.
1574 1574
1575 1575 # Filter out roots that aren't ancestors of heads
1576 1576 roots = [root for root in roots if root in ancestors]
1577 1577 # Recompute the lowest revision
1578 1578 if roots:
1579 1579 lowestrev = min([self.rev(root) for root in roots])
1580 1580 else:
1581 1581 # No more roots? Return empty list
1582 1582 return nonodes
1583 1583 else:
1584 1584 # We are descending from nullid, and don't need to care about
1585 1585 # any other roots.
1586 1586 lowestrev = nullrev
1587 1587 roots = [self.nullid]
1588 1588 # Transform our roots list into a set.
1589 1589 descendants = set(roots)
1590 1590 # Also, keep the original roots so we can filter out roots that aren't
1591 1591 # 'real' roots (i.e. are descended from other roots).
1592 1592 roots = descendants.copy()
1593 1593 # Our topologically sorted list of output nodes.
1594 1594 orderedout = []
1595 1595 # Don't start at nullid since we don't want nullid in our output list,
1596 1596 # and if nullid shows up in descendants, empty parents will look like
1597 1597 # they're descendants.
1598 1598 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1599 1599 n = self.node(r)
1600 1600 isdescendant = False
1601 1601 if lowestrev == nullrev: # Everybody is a descendant of nullid
1602 1602 isdescendant = True
1603 1603 elif n in descendants:
1604 1604 # n is already a descendant
1605 1605 isdescendant = True
1606 1606 # This check only needs to be done here because all the roots
1607 1607 # will start being marked is descendants before the loop.
1608 1608 if n in roots:
1609 1609 # If n was a root, check if it's a 'real' root.
1610 1610 p = tuple(self.parents(n))
1611 1611 # If any of its parents are descendants, it's not a root.
1612 1612 if (p[0] in descendants) or (p[1] in descendants):
1613 1613 roots.remove(n)
1614 1614 else:
1615 1615 p = tuple(self.parents(n))
1616 1616 # A node is a descendant if either of its parents are
1617 1617 # descendants. (We seeded the dependents list with the roots
1618 1618 # up there, remember?)
1619 1619 if (p[0] in descendants) or (p[1] in descendants):
1620 1620 descendants.add(n)
1621 1621 isdescendant = True
1622 1622 if isdescendant and ((ancestors is None) or (n in ancestors)):
1623 1623 # Only include nodes that are both descendants and ancestors.
1624 1624 orderedout.append(n)
1625 1625 if (ancestors is not None) and (n in heads):
1626 1626 # We're trying to figure out which heads are reachable
1627 1627 # from roots.
1628 1628 # Mark this head as having been reached
1629 1629 heads[n] = True
1630 1630 elif ancestors is None:
1631 1631 # Otherwise, we're trying to discover the heads.
1632 1632 # Assume this is a head because if it isn't, the next step
1633 1633 # will eventually remove it.
1634 1634 heads[n] = True
1635 1635 # But, obviously its parents aren't.
1636 1636 for p in self.parents(n):
1637 1637 heads.pop(p, None)
1638 1638 heads = [head for head, flag in heads.items() if flag]
1639 1639 roots = list(roots)
1640 1640 assert orderedout
1641 1641 assert roots
1642 1642 assert heads
1643 1643 return (orderedout, roots, heads)
1644 1644
1645 1645 def headrevs(self, revs=None):
1646 1646 if revs is None:
1647 1647 try:
1648 1648 return self.index.headrevs()
1649 1649 except AttributeError:
1650 1650 return self._headrevs()
1651 1651 if rustdagop is not None and self.index.rust_ext_compat:
1652 1652 return rustdagop.headrevs(self.index, revs)
1653 1653 return dagop.headrevs(revs, self._uncheckedparentrevs)
1654 1654
1655 1655 def computephases(self, roots):
1656 1656 return self.index.computephasesmapsets(roots)
1657 1657
1658 1658 def _headrevs(self):
1659 1659 count = len(self)
1660 1660 if not count:
1661 1661 return [nullrev]
1662 1662 # we won't iter over filtered rev so nobody is a head at start
1663 1663 ishead = [0] * (count + 1)
1664 1664 index = self.index
1665 1665 for r in self:
1666 1666 ishead[r] = 1 # I may be an head
1667 1667 e = index[r]
1668 1668 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1669 1669 return [r for r, val in enumerate(ishead) if val]
1670 1670
1671 1671 def heads(self, start=None, stop=None):
1672 1672 """return the list of all nodes that have no children
1673 1673
1674 1674 if start is specified, only heads that are descendants of
1675 1675 start will be returned
1676 1676 if stop is specified, it will consider all the revs from stop
1677 1677 as if they had no children
1678 1678 """
1679 1679 if start is None and stop is None:
1680 1680 if not len(self):
1681 1681 return [self.nullid]
1682 1682 return [self.node(r) for r in self.headrevs()]
1683 1683
1684 1684 if start is None:
1685 1685 start = nullrev
1686 1686 else:
1687 1687 start = self.rev(start)
1688 1688
1689 1689 stoprevs = {self.rev(n) for n in stop or []}
1690 1690
1691 1691 revs = dagop.headrevssubset(
1692 1692 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1693 1693 )
1694 1694
1695 1695 return [self.node(rev) for rev in revs]
1696 1696
1697 1697 def children(self, node):
1698 1698 """find the children of a given node"""
1699 1699 c = []
1700 1700 p = self.rev(node)
1701 1701 for r in self.revs(start=p + 1):
1702 1702 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1703 1703 if prevs:
1704 1704 for pr in prevs:
1705 1705 if pr == p:
1706 1706 c.append(self.node(r))
1707 1707 elif p == nullrev:
1708 1708 c.append(self.node(r))
1709 1709 return c
1710 1710
1711 1711 def commonancestorsheads(self, a, b):
1712 1712 """calculate all the heads of the common ancestors of nodes a and b"""
1713 1713 a, b = self.rev(a), self.rev(b)
1714 1714 ancs = self._commonancestorsheads(a, b)
1715 1715 return pycompat.maplist(self.node, ancs)
1716 1716
1717 1717 def _commonancestorsheads(self, *revs):
1718 1718 """calculate all the heads of the common ancestors of revs"""
1719 1719 try:
1720 1720 ancs = self.index.commonancestorsheads(*revs)
1721 1721 except (AttributeError, OverflowError): # C implementation failed
1722 1722 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1723 1723 return ancs
1724 1724
1725 1725 def isancestor(self, a, b):
1726 1726 """return True if node a is an ancestor of node b
1727 1727
1728 1728 A revision is considered an ancestor of itself."""
1729 1729 a, b = self.rev(a), self.rev(b)
1730 1730 return self.isancestorrev(a, b)
1731 1731
1732 1732 def isancestorrev(self, a, b):
1733 1733 """return True if revision a is an ancestor of revision b
1734 1734
1735 1735 A revision is considered an ancestor of itself.
1736 1736
1737 1737 The implementation of this is trivial but the use of
1738 1738 reachableroots is not."""
1739 1739 if a == nullrev:
1740 1740 return True
1741 1741 elif a == b:
1742 1742 return True
1743 1743 elif a > b:
1744 1744 return False
1745 1745 return bool(self.reachableroots(a, [b], [a], includepath=False))
1746 1746
1747 1747 def reachableroots(self, minroot, heads, roots, includepath=False):
1748 1748 """return (heads(::(<roots> and <roots>::<heads>)))
1749 1749
1750 1750 If includepath is True, return (<roots>::<heads>)."""
1751 1751 try:
1752 1752 return self.index.reachableroots2(
1753 1753 minroot, heads, roots, includepath
1754 1754 )
1755 1755 except AttributeError:
1756 1756 return dagop._reachablerootspure(
1757 1757 self.parentrevs, minroot, roots, heads, includepath
1758 1758 )
1759 1759
1760 1760 def ancestor(self, a, b):
1761 1761 """calculate the "best" common ancestor of nodes a and b"""
1762 1762
1763 1763 a, b = self.rev(a), self.rev(b)
1764 1764 try:
1765 1765 ancs = self.index.ancestors(a, b)
1766 1766 except (AttributeError, OverflowError):
1767 1767 ancs = ancestor.ancestors(self.parentrevs, a, b)
1768 1768 if ancs:
1769 1769 # choose a consistent winner when there's a tie
1770 1770 return min(map(self.node, ancs))
1771 1771 return self.nullid
1772 1772
1773 1773 def _match(self, id):
1774 1774 if isinstance(id, int):
1775 1775 # rev
1776 1776 return self.node(id)
1777 1777 if len(id) == self.nodeconstants.nodelen:
1778 1778 # possibly a binary node
1779 1779 # odds of a binary node being all hex in ASCII are 1 in 10**25
1780 1780 try:
1781 1781 node = id
1782 1782 self.rev(node) # quick search the index
1783 1783 return node
1784 1784 except error.LookupError:
1785 1785 pass # may be partial hex id
1786 1786 try:
1787 1787 # str(rev)
1788 1788 rev = int(id)
1789 1789 if b"%d" % rev != id:
1790 1790 raise ValueError
1791 1791 if rev < 0:
1792 1792 rev = len(self) + rev
1793 1793 if rev < 0 or rev >= len(self):
1794 1794 raise ValueError
1795 1795 return self.node(rev)
1796 1796 except (ValueError, OverflowError):
1797 1797 pass
1798 1798 if len(id) == 2 * self.nodeconstants.nodelen:
1799 1799 try:
1800 1800 # a full hex nodeid?
1801 1801 node = bin(id)
1802 1802 self.rev(node)
1803 1803 return node
1804 1804 except (binascii.Error, error.LookupError):
1805 1805 pass
1806 1806
1807 1807 def _partialmatch(self, id):
1808 1808 # we don't care wdirfilenodeids as they should be always full hash
1809 1809 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1810 1810 ambiguous = False
1811 1811 try:
1812 1812 partial = self.index.partialmatch(id)
1813 1813 if partial and self.hasnode(partial):
1814 1814 if maybewdir:
1815 1815 # single 'ff...' match in radix tree, ambiguous with wdir
1816 1816 ambiguous = True
1817 1817 else:
1818 1818 return partial
1819 1819 elif maybewdir:
1820 1820 # no 'ff...' match in radix tree, wdir identified
1821 1821 raise error.WdirUnsupported
1822 1822 else:
1823 1823 return None
1824 1824 except error.RevlogError:
1825 1825 # parsers.c radix tree lookup gave multiple matches
1826 1826 # fast path: for unfiltered changelog, radix tree is accurate
1827 1827 if not getattr(self, 'filteredrevs', None):
1828 1828 ambiguous = True
1829 1829 # fall through to slow path that filters hidden revisions
1830 1830 except (AttributeError, ValueError):
1831 1831 # we are pure python, or key is not hex
1832 1832 pass
1833 1833 if ambiguous:
1834 1834 raise error.AmbiguousPrefixLookupError(
1835 1835 id, self.display_id, _(b'ambiguous identifier')
1836 1836 )
1837 1837
1838 1838 if id in self._pcache:
1839 1839 return self._pcache[id]
1840 1840
1841 1841 if len(id) <= 40:
1842 1842 # hex(node)[:...]
1843 1843 l = len(id) // 2 * 2 # grab an even number of digits
1844 1844 try:
1845 1845 # we're dropping the last digit, so let's check that it's hex,
1846 1846 # to avoid the expensive computation below if it's not
1847 1847 if len(id) % 2 > 0:
1848 1848 if not (id[-1] in hexdigits):
1849 1849 return None
1850 1850 prefix = bin(id[:l])
1851 1851 except binascii.Error:
1852 1852 pass
1853 1853 else:
1854 1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1855 1855 nl = [
1856 1856 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1857 1857 ]
1858 1858 if self.nodeconstants.nullhex.startswith(id):
1859 1859 nl.append(self.nullid)
1860 1860 if len(nl) > 0:
1861 1861 if len(nl) == 1 and not maybewdir:
1862 1862 self._pcache[id] = nl[0]
1863 1863 return nl[0]
1864 1864 raise error.AmbiguousPrefixLookupError(
1865 1865 id, self.display_id, _(b'ambiguous identifier')
1866 1866 )
1867 1867 if maybewdir:
1868 1868 raise error.WdirUnsupported
1869 1869 return None
1870 1870
1871 1871 def lookup(self, id):
1872 1872 """locate a node based on:
1873 1873 - revision number or str(revision number)
1874 1874 - nodeid or subset of hex nodeid
1875 1875 """
1876 1876 n = self._match(id)
1877 1877 if n is not None:
1878 1878 return n
1879 1879 n = self._partialmatch(id)
1880 1880 if n:
1881 1881 return n
1882 1882
1883 1883 raise error.LookupError(id, self.display_id, _(b'no match found'))
1884 1884
1885 1885 def shortest(self, node, minlength=1):
1886 1886 """Find the shortest unambiguous prefix that matches node."""
1887 1887
1888 1888 def isvalid(prefix):
1889 1889 try:
1890 1890 matchednode = self._partialmatch(prefix)
1891 1891 except error.AmbiguousPrefixLookupError:
1892 1892 return False
1893 1893 except error.WdirUnsupported:
1894 1894 # single 'ff...' match
1895 1895 return True
1896 1896 if matchednode is None:
1897 1897 raise error.LookupError(node, self.display_id, _(b'no node'))
1898 1898 return True
1899 1899
1900 1900 def maybewdir(prefix):
1901 1901 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1902 1902
1903 1903 hexnode = hex(node)
1904 1904
1905 1905 def disambiguate(hexnode, minlength):
1906 1906 """Disambiguate against wdirid."""
1907 1907 for length in range(minlength, len(hexnode) + 1):
1908 1908 prefix = hexnode[:length]
1909 1909 if not maybewdir(prefix):
1910 1910 return prefix
1911 1911
1912 1912 if not getattr(self, 'filteredrevs', None):
1913 1913 try:
1914 1914 length = max(self.index.shortest(node), minlength)
1915 1915 return disambiguate(hexnode, length)
1916 1916 except error.RevlogError:
1917 1917 if node != self.nodeconstants.wdirid:
1918 1918 raise error.LookupError(
1919 1919 node, self.display_id, _(b'no node')
1920 1920 )
1921 1921 except AttributeError:
1922 1922 # Fall through to pure code
1923 1923 pass
1924 1924
1925 1925 if node == self.nodeconstants.wdirid:
1926 1926 for length in range(minlength, len(hexnode) + 1):
1927 1927 prefix = hexnode[:length]
1928 1928 if isvalid(prefix):
1929 1929 return prefix
1930 1930
1931 1931 for length in range(minlength, len(hexnode) + 1):
1932 1932 prefix = hexnode[:length]
1933 1933 if isvalid(prefix):
1934 1934 return disambiguate(hexnode, length)
1935 1935
1936 1936 def cmp(self, node, text):
1937 1937 """compare text with a given file revision
1938 1938
1939 1939 returns True if text is different than what is stored.
1940 1940 """
1941 1941 p1, p2 = self.parents(node)
1942 1942 return storageutil.hashrevisionsha1(text, p1, p2) != node
1943 1943
1944 1944 def _getsegmentforrevs(self, startrev, endrev):
1945 1945 """Obtain a segment of raw data corresponding to a range of revisions.
1946 1946
1947 1947 Accepts the start and end revisions and an optional already-open
1948 1948 file handle to be used for reading. If the file handle is read, its
1949 1949 seek position will not be preserved.
1950 1950
1951 1951 Requests for data may be satisfied by a cache.
1952 1952
1953 1953 Returns a 2-tuple of (offset, data) for the requested range of
1954 1954 revisions. Offset is the integer offset from the beginning of the
1955 1955 revlog and data is a str or buffer of the raw byte data.
1956 1956
1957 1957 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1958 1958 to determine where each revision's data begins and ends.
1959 1959 """
1960 1960 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1961 1961 # (functions are expensive).
1962 1962 index = self.index
1963 1963 istart = index[startrev]
1964 1964 start = int(istart[0] >> 16)
1965 1965 if startrev == endrev:
1966 1966 end = start + istart[1]
1967 1967 else:
1968 1968 iend = index[endrev]
1969 1969 end = int(iend[0] >> 16) + iend[1]
1970 1970
1971 1971 if self._inline:
1972 1972 start += (startrev + 1) * self.index.entry_size
1973 1973 end += (endrev + 1) * self.index.entry_size
1974 1974 length = end - start
1975 1975
1976 1976 return start, self._segmentfile.read_chunk(start, length)
1977 1977
1978 1978 def _chunk(self, rev):
1979 1979 """Obtain a single decompressed chunk for a revision.
1980 1980
1981 1981 Accepts an integer revision and an optional already-open file handle
1982 1982 to be used for reading. If used, the seek position of the file will not
1983 1983 be preserved.
1984 1984
1985 1985 Returns a str holding uncompressed data for the requested revision.
1986 1986 """
1987 1987 compression_mode = self.index[rev][10]
1988 1988 data = self._getsegmentforrevs(rev, rev)[1]
1989 1989 if compression_mode == COMP_MODE_PLAIN:
1990 1990 return data
1991 1991 elif compression_mode == COMP_MODE_DEFAULT:
1992 1992 return self._decompressor(data)
1993 1993 elif compression_mode == COMP_MODE_INLINE:
1994 1994 return self.decompress(data)
1995 1995 else:
1996 1996 msg = b'unknown compression mode %d'
1997 1997 msg %= compression_mode
1998 1998 raise error.RevlogError(msg)
1999 1999
2000 2000 def _chunks(self, revs, targetsize=None):
2001 2001 """Obtain decompressed chunks for the specified revisions.
2002 2002
2003 2003 Accepts an iterable of numeric revisions that are assumed to be in
2004 2004 ascending order. Also accepts an optional already-open file handle
2005 2005 to be used for reading. If used, the seek position of the file will
2006 2006 not be preserved.
2007 2007
2008 2008 This function is similar to calling ``self._chunk()`` multiple times,
2009 2009 but is faster.
2010 2010
2011 2011 Returns a list with decompressed data for each requested revision.
2012 2012 """
2013 2013 if not revs:
2014 2014 return []
2015 2015 start = self.start
2016 2016 length = self.length
2017 2017 inline = self._inline
2018 2018 iosize = self.index.entry_size
2019 2019 buffer = util.buffer
2020 2020
2021 2021 l = []
2022 2022 ladd = l.append
2023 2023
2024 2024 if not self.data_config.with_sparse_read:
2025 2025 slicedchunks = (revs,)
2026 2026 else:
2027 2027 slicedchunks = deltautil.slicechunk(
2028 2028 self, revs, targetsize=targetsize
2029 2029 )
2030 2030
2031 2031 for revschunk in slicedchunks:
2032 2032 firstrev = revschunk[0]
2033 2033 # Skip trailing revisions with empty diff
2034 2034 for lastrev in revschunk[::-1]:
2035 2035 if length(lastrev) != 0:
2036 2036 break
2037 2037
2038 2038 try:
2039 2039 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2040 2040 except OverflowError:
2041 2041 # issue4215 - we can't cache a run of chunks greater than
2042 2042 # 2G on Windows
2043 2043 return [self._chunk(rev) for rev in revschunk]
2044 2044
2045 2045 decomp = self.decompress
2046 2046 # self._decompressor might be None, but will not be used in that case
2047 2047 def_decomp = self._decompressor
2048 2048 for rev in revschunk:
2049 2049 chunkstart = start(rev)
2050 2050 if inline:
2051 2051 chunkstart += (rev + 1) * iosize
2052 2052 chunklength = length(rev)
2053 2053 comp_mode = self.index[rev][10]
2054 2054 c = buffer(data, chunkstart - offset, chunklength)
2055 2055 if comp_mode == COMP_MODE_PLAIN:
2056 2056 ladd(c)
2057 2057 elif comp_mode == COMP_MODE_INLINE:
2058 2058 ladd(decomp(c))
2059 2059 elif comp_mode == COMP_MODE_DEFAULT:
2060 2060 ladd(def_decomp(c))
2061 2061 else:
2062 2062 msg = b'unknown compression mode %d'
2063 2063 msg %= comp_mode
2064 2064 raise error.RevlogError(msg)
2065 2065
2066 2066 return l
2067 2067
2068 2068 def deltaparent(self, rev):
2069 2069 """return deltaparent of the given revision"""
2070 2070 base = self.index[rev][3]
2071 2071 if base == rev:
2072 2072 return nullrev
2073 2073 elif self.delta_config.general_delta:
2074 2074 return base
2075 2075 else:
2076 2076 return rev - 1
2077 2077
2078 2078 def issnapshot(self, rev):
2079 2079 """tells whether rev is a snapshot"""
2080 2080 if not self.delta_config.sparse_revlog:
2081 2081 return self.deltaparent(rev) == nullrev
2082 2082 elif hasattr(self.index, 'issnapshot'):
2083 2083 # directly assign the method to cache the testing and access
2084 2084 self.issnapshot = self.index.issnapshot
2085 2085 return self.issnapshot(rev)
2086 2086 if rev == nullrev:
2087 2087 return True
2088 2088 entry = self.index[rev]
2089 2089 base = entry[3]
2090 2090 if base == rev:
2091 2091 return True
2092 2092 if base == nullrev:
2093 2093 return True
2094 2094 p1 = entry[5]
2095 2095 while self.length(p1) == 0:
2096 2096 b = self.deltaparent(p1)
2097 2097 if b == p1:
2098 2098 break
2099 2099 p1 = b
2100 2100 p2 = entry[6]
2101 2101 while self.length(p2) == 0:
2102 2102 b = self.deltaparent(p2)
2103 2103 if b == p2:
2104 2104 break
2105 2105 p2 = b
2106 2106 if base == p1 or base == p2:
2107 2107 return False
2108 2108 return self.issnapshot(base)
2109 2109
2110 2110 def snapshotdepth(self, rev):
2111 2111 """number of snapshot in the chain before this one"""
2112 2112 if not self.issnapshot(rev):
2113 2113 raise error.ProgrammingError(b'revision %d not a snapshot')
2114 2114 return len(self._deltachain(rev)[0]) - 1
2115 2115
2116 2116 def revdiff(self, rev1, rev2):
2117 2117 """return or calculate a delta between two revisions
2118 2118
2119 2119 The delta calculated is in binary form and is intended to be written to
2120 2120 revlog data directly. So this function needs raw revision data.
2121 2121 """
2122 2122 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2123 2123 return bytes(self._chunk(rev2))
2124 2124
2125 2125 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2126 2126
2127 2127 def revision(self, nodeorrev):
2128 2128 """return an uncompressed revision of a given node or revision
2129 2129 number.
2130 2130 """
2131 2131 return self._revisiondata(nodeorrev)
2132 2132
2133 2133 def sidedata(self, nodeorrev):
2134 2134 """a map of extra data related to the changeset but not part of the hash
2135 2135
2136 2136 This function currently return a dictionary. However, more advanced
2137 2137 mapping object will likely be used in the future for a more
2138 2138 efficient/lazy code.
2139 2139 """
2140 2140 # deal with <nodeorrev> argument type
2141 2141 if isinstance(nodeorrev, int):
2142 2142 rev = nodeorrev
2143 2143 else:
2144 2144 rev = self.rev(nodeorrev)
2145 2145 return self._sidedata(rev)
2146 2146
2147 2147 def _revisiondata(self, nodeorrev, raw=False):
2148 2148 # deal with <nodeorrev> argument type
2149 2149 if isinstance(nodeorrev, int):
2150 2150 rev = nodeorrev
2151 2151 node = self.node(rev)
2152 2152 else:
2153 2153 node = nodeorrev
2154 2154 rev = None
2155 2155
2156 2156 # fast path the special `nullid` rev
2157 2157 if node == self.nullid:
2158 2158 return b""
2159 2159
2160 2160 # ``rawtext`` is the text as stored inside the revlog. Might be the
2161 2161 # revision or might need to be processed to retrieve the revision.
2162 2162 rev, rawtext, validated = self._rawtext(node, rev)
2163 2163
2164 2164 if raw and validated:
2165 2165 # if we don't want to process the raw text and that raw
2166 2166 # text is cached, we can exit early.
2167 2167 return rawtext
2168 2168 if rev is None:
2169 2169 rev = self.rev(node)
2170 2170 # the revlog's flag for this revision
2171 2171 # (usually alter its state or content)
2172 2172 flags = self.flags(rev)
2173 2173
2174 2174 if validated and flags == REVIDX_DEFAULT_FLAGS:
2175 2175 # no extra flags set, no flag processor runs, text = rawtext
2176 2176 return rawtext
2177 2177
2178 2178 if raw:
2179 2179 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2180 2180 text = rawtext
2181 2181 else:
2182 2182 r = flagutil.processflagsread(self, rawtext, flags)
2183 2183 text, validatehash = r
2184 2184 if validatehash:
2185 2185 self.checkhash(text, node, rev=rev)
2186 2186 if not validated:
2187 2187 self._revisioncache = (node, rev, rawtext)
2188 2188
2189 2189 return text
2190 2190
2191 2191 def _rawtext(self, node, rev):
2192 2192 """return the possibly unvalidated rawtext for a revision
2193 2193
2194 2194 returns (rev, rawtext, validated)
2195 2195 """
2196 2196
2197 2197 # revision in the cache (could be useful to apply delta)
2198 2198 cachedrev = None
2199 2199 # An intermediate text to apply deltas to
2200 2200 basetext = None
2201 2201
2202 2202 # Check if we have the entry in cache
2203 2203 # The cache entry looks like (node, rev, rawtext)
2204 2204 if self._revisioncache:
2205 2205 if self._revisioncache[0] == node:
2206 2206 return (rev, self._revisioncache[2], True)
2207 2207 cachedrev = self._revisioncache[1]
2208 2208
2209 2209 if rev is None:
2210 2210 rev = self.rev(node)
2211 2211
2212 2212 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2213 2213 if stopped:
2214 2214 basetext = self._revisioncache[2]
2215 2215
2216 2216 # drop cache to save memory, the caller is expected to
2217 2217 # update self._revisioncache after validating the text
2218 2218 self._revisioncache = None
2219 2219
2220 2220 targetsize = None
2221 2221 rawsize = self.index[rev][2]
2222 2222 if 0 <= rawsize:
2223 2223 targetsize = 4 * rawsize
2224 2224
2225 2225 bins = self._chunks(chain, targetsize=targetsize)
2226 2226 if basetext is None:
2227 2227 basetext = bytes(bins[0])
2228 2228 bins = bins[1:]
2229 2229
2230 2230 rawtext = mdiff.patches(basetext, bins)
2231 2231 del basetext # let us have a chance to free memory early
2232 2232 return (rev, rawtext, False)
2233 2233
2234 2234 def _sidedata(self, rev):
2235 2235 """Return the sidedata for a given revision number."""
2236 2236 index_entry = self.index[rev]
2237 2237 sidedata_offset = index_entry[8]
2238 2238 sidedata_size = index_entry[9]
2239 2239
2240 2240 if self._inline:
2241 2241 sidedata_offset += self.index.entry_size * (1 + rev)
2242 2242 if sidedata_size == 0:
2243 2243 return {}
2244 2244
2245 2245 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2246 2246 filename = self._sidedatafile
2247 2247 end = self._docket.sidedata_end
2248 2248 offset = sidedata_offset
2249 2249 length = sidedata_size
2250 2250 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2251 2251 raise error.RevlogError(m)
2252 2252
2253 2253 comp_segment = self._segmentfile_sidedata.read_chunk(
2254 2254 sidedata_offset, sidedata_size
2255 2255 )
2256 2256
2257 2257 comp = self.index[rev][11]
2258 2258 if comp == COMP_MODE_PLAIN:
2259 2259 segment = comp_segment
2260 2260 elif comp == COMP_MODE_DEFAULT:
2261 2261 segment = self._decompressor(comp_segment)
2262 2262 elif comp == COMP_MODE_INLINE:
2263 2263 segment = self.decompress(comp_segment)
2264 2264 else:
2265 2265 msg = b'unknown compression mode %d'
2266 2266 msg %= comp
2267 2267 raise error.RevlogError(msg)
2268 2268
2269 2269 sidedata = sidedatautil.deserialize_sidedata(segment)
2270 2270 return sidedata
2271 2271
2272 2272 def rawdata(self, nodeorrev):
2273 2273 """return an uncompressed raw data of a given node or revision number."""
2274 2274 return self._revisiondata(nodeorrev, raw=True)
2275 2275
2276 2276 def hash(self, text, p1, p2):
2277 2277 """Compute a node hash.
2278 2278
2279 2279 Available as a function so that subclasses can replace the hash
2280 2280 as needed.
2281 2281 """
2282 2282 return storageutil.hashrevisionsha1(text, p1, p2)
2283 2283
2284 2284 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2285 2285 """Check node hash integrity.
2286 2286
2287 2287 Available as a function so that subclasses can extend hash mismatch
2288 2288 behaviors as needed.
2289 2289 """
2290 2290 try:
2291 2291 if p1 is None and p2 is None:
2292 2292 p1, p2 = self.parents(node)
2293 2293 if node != self.hash(text, p1, p2):
2294 2294 # Clear the revision cache on hash failure. The revision cache
2295 2295 # only stores the raw revision and clearing the cache does have
2296 2296 # the side-effect that we won't have a cache hit when the raw
2297 2297 # revision data is accessed. But this case should be rare and
2298 2298 # it is extra work to teach the cache about the hash
2299 2299 # verification state.
2300 2300 if self._revisioncache and self._revisioncache[0] == node:
2301 2301 self._revisioncache = None
2302 2302
2303 2303 revornode = rev
2304 2304 if revornode is None:
2305 2305 revornode = templatefilters.short(hex(node))
2306 2306 raise error.RevlogError(
2307 2307 _(b"integrity check failed on %s:%s")
2308 2308 % (self.display_id, pycompat.bytestr(revornode))
2309 2309 )
2310 2310 except error.RevlogError:
2311 2311 if self.feature_config.censorable and storageutil.iscensoredtext(
2312 2312 text
2313 2313 ):
2314 2314 raise error.CensoredNodeError(self.display_id, node, text)
2315 2315 raise
2316 2316
2317 2317 @property
2318 2318 def _split_index_file(self):
2319 2319 """the path where to expect the index of an ongoing splitting operation
2320 2320
2321 2321 The file will only exist if a splitting operation is in progress, but
2322 2322 it is always expected at the same location."""
2323 2323 parts = self.radix.split(b'/')
2324 2324 if len(parts) > 1:
2325 2325 # adds a '-s' prefix to the ``data/` or `meta/` base
2326 2326 head = parts[0] + b'-s'
2327 2327 mids = parts[1:-1]
2328 2328 tail = parts[-1] + b'.i'
2329 2329 pieces = [head] + mids + [tail]
2330 2330 return b'/'.join(pieces)
2331 2331 else:
2332 2332 # the revlog is stored at the root of the store (changelog or
2333 2333 # manifest), no risk of collision.
2334 2334 return self.radix + b'.i.s'
2335 2335
2336 2336 def _enforceinlinesize(self, tr, side_write=True):
2337 2337 """Check if the revlog is too big for inline and convert if so.
2338 2338
2339 2339 This should be called after revisions are added to the revlog. If the
2340 2340 revlog has grown too large to be an inline revlog, it will convert it
2341 2341 to use multiple index and data files.
2342 2342 """
2343 2343 tiprev = len(self) - 1
2344 2344 total_size = self.start(tiprev) + self.length(tiprev)
2345 2345 if not self._inline or total_size < _maxinline:
2346 2346 return
2347 2347
2348 2348 troffset = tr.findoffset(self._indexfile)
2349 2349 if troffset is None:
2350 2350 raise error.RevlogError(
2351 2351 _(b"%s not found in the transaction") % self._indexfile
2352 2352 )
2353 2353 if troffset:
2354 2354 tr.addbackup(self._indexfile, for_offset=True)
2355 2355 tr.add(self._datafile, 0)
2356 2356
2357 2357 existing_handles = False
2358 2358 if self._writinghandles is not None:
2359 2359 existing_handles = True
2360 2360 fp = self._writinghandles[0]
2361 2361 fp.flush()
2362 2362 fp.close()
2363 2363 # We can't use the cached file handle after close(). So prevent
2364 2364 # its usage.
2365 2365 self._writinghandles = None
2366 2366 self._segmentfile.writing_handle = None
2367 2367 # No need to deal with sidedata writing handle as it is only
2368 2368 # relevant with revlog-v2 which is never inline, not reaching
2369 2369 # this code
2370 2370 if side_write:
2371 2371 old_index_file_path = self._indexfile
2372 2372 new_index_file_path = self._split_index_file
2373 2373 opener = self.opener
2374 2374 weak_self = weakref.ref(self)
2375 2375
2376 2376 # the "split" index replace the real index when the transaction is finalized
2377 2377 def finalize_callback(tr):
2378 2378 opener.rename(
2379 2379 new_index_file_path,
2380 2380 old_index_file_path,
2381 2381 checkambig=True,
2382 2382 )
2383 2383 maybe_self = weak_self()
2384 2384 if maybe_self is not None:
2385 2385 maybe_self._indexfile = old_index_file_path
2386 2386
2387 2387 def abort_callback(tr):
2388 2388 maybe_self = weak_self()
2389 2389 if maybe_self is not None:
2390 2390 maybe_self._indexfile = old_index_file_path
2391 2391
2392 2392 tr.registertmp(new_index_file_path)
2393 2393 if self.target[1] is not None:
2394 2394 callback_id = b'000-revlog-split-%d-%s' % self.target
2395 2395 else:
2396 2396 callback_id = b'000-revlog-split-%d' % self.target[0]
2397 2397 tr.addfinalize(callback_id, finalize_callback)
2398 2398 tr.addabort(callback_id, abort_callback)
2399 2399
2400 2400 new_dfh = self._datafp(b'w+')
2401 2401 new_dfh.truncate(0) # drop any potentially existing data
2402 2402 try:
2403 2403 with self.reading():
2404 2404 for r in self:
2405 2405 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2406 2406 new_dfh.flush()
2407 2407
2408 2408 if side_write:
2409 2409 self._indexfile = new_index_file_path
2410 2410 with self.__index_new_fp() as fp:
2411 2411 self._format_flags &= ~FLAG_INLINE_DATA
2412 2412 self._inline = False
2413 2413 for i in self:
2414 2414 e = self.index.entry_binary(i)
2415 2415 if i == 0 and self._docket is None:
2416 2416 header = self._format_flags | self._format_version
2417 2417 header = self.index.pack_header(header)
2418 2418 e = header + e
2419 2419 fp.write(e)
2420 2420 if self._docket is not None:
2421 2421 self._docket.index_end = fp.tell()
2422 2422
2423 2423 # If we don't use side-write, the temp file replace the real
2424 2424 # index when we exit the context manager
2425 2425
2426 2426 nodemaputil.setup_persistent_nodemap(tr, self)
2427 2427 self._segmentfile = randomaccessfile.randomaccessfile(
2428 2428 self.opener,
2429 2429 self._datafile,
2430 2430 self.data_config.chunk_cache_size,
2431 2431 )
2432 2432
2433 2433 if existing_handles:
2434 2434 # switched from inline to conventional reopen the index
2435 2435 ifh = self.__index_write_fp()
2436 2436 self._writinghandles = (ifh, new_dfh, None)
2437 2437 self._segmentfile.writing_handle = new_dfh
2438 2438 new_dfh = None
2439 2439 # No need to deal with sidedata writing handle as it is only
2440 2440 # relevant with revlog-v2 which is never inline, not reaching
2441 2441 # this code
2442 2442 finally:
2443 2443 if new_dfh is not None:
2444 2444 new_dfh.close()
2445 2445
2446 2446 def _nodeduplicatecallback(self, transaction, node):
2447 2447 """called when trying to add a node already stored."""
2448 2448
2449 2449 @contextlib.contextmanager
2450 2450 def reading(self):
2451 2451 """Context manager that keeps data and sidedata files open for reading"""
2452 2452 if len(self.index) == 0:
2453 2453 yield # nothing to be read
2454 2454 else:
2455 2455 with self._segmentfile.reading():
2456 2456 with self._segmentfile_sidedata.reading():
2457 2457 yield
2458 2458
2459 2459 @contextlib.contextmanager
2460 2460 def _writing(self, transaction):
2461 2461 if self._trypending:
2462 2462 msg = b'try to write in a `trypending` revlog: %s'
2463 2463 msg %= self.display_id
2464 2464 raise error.ProgrammingError(msg)
2465 2465 if self._writinghandles is not None:
2466 2466 yield
2467 2467 else:
2468 2468 ifh = dfh = sdfh = None
2469 2469 try:
2470 2470 r = len(self)
2471 2471 # opening the data file.
2472 2472 dsize = 0
2473 2473 if r:
2474 2474 dsize = self.end(r - 1)
2475 2475 dfh = None
2476 2476 if not self._inline:
2477 2477 try:
2478 2478 dfh = self._datafp(b"r+")
2479 2479 if self._docket is None:
2480 2480 dfh.seek(0, os.SEEK_END)
2481 2481 else:
2482 2482 dfh.seek(self._docket.data_end, os.SEEK_SET)
2483 2483 except FileNotFoundError:
2484 2484 dfh = self._datafp(b"w+")
2485 2485 transaction.add(self._datafile, dsize)
2486 2486 if self._sidedatafile is not None:
2487 2487 # revlog-v2 does not inline, help Pytype
2488 2488 assert dfh is not None
2489 2489 try:
2490 2490 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2491 2491 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2492 2492 except FileNotFoundError:
2493 2493 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2494 2494 transaction.add(
2495 2495 self._sidedatafile, self._docket.sidedata_end
2496 2496 )
2497 2497
2498 2498 # opening the index file.
2499 2499 isize = r * self.index.entry_size
2500 2500 ifh = self.__index_write_fp()
2501 2501 if self._inline:
2502 2502 transaction.add(self._indexfile, dsize + isize)
2503 2503 else:
2504 2504 transaction.add(self._indexfile, isize)
2505 2505 # exposing all file handle for writing.
2506 2506 self._writinghandles = (ifh, dfh, sdfh)
2507 2507 self._segmentfile.writing_handle = ifh if self._inline else dfh
2508 2508 self._segmentfile_sidedata.writing_handle = sdfh
2509 2509 yield
2510 2510 if self._docket is not None:
2511 2511 self._write_docket(transaction)
2512 2512 finally:
2513 2513 self._writinghandles = None
2514 2514 self._segmentfile.writing_handle = None
2515 2515 self._segmentfile_sidedata.writing_handle = None
2516 2516 if dfh is not None:
2517 2517 dfh.close()
2518 2518 if sdfh is not None:
2519 2519 sdfh.close()
2520 2520 # closing the index file last to avoid exposing referent to
2521 2521 # potential unflushed data content.
2522 2522 if ifh is not None:
2523 2523 ifh.close()
2524 2524
2525 2525 def _write_docket(self, transaction):
2526 2526 """write the current docket on disk
2527 2527
2528 2528 Exist as a method to help changelog to implement transaction logic
2529 2529
2530 2530 We could also imagine using the same transaction logic for all revlog
2531 2531 since docket are cheap."""
2532 2532 self._docket.write(transaction)
2533 2533
2534 2534 def addrevision(
2535 2535 self,
2536 2536 text,
2537 2537 transaction,
2538 2538 link,
2539 2539 p1,
2540 2540 p2,
2541 2541 cachedelta=None,
2542 2542 node=None,
2543 2543 flags=REVIDX_DEFAULT_FLAGS,
2544 2544 deltacomputer=None,
2545 2545 sidedata=None,
2546 2546 ):
2547 2547 """add a revision to the log
2548 2548
2549 2549 text - the revision data to add
2550 2550 transaction - the transaction object used for rollback
2551 2551 link - the linkrev data to add
2552 2552 p1, p2 - the parent nodeids of the revision
2553 2553 cachedelta - an optional precomputed delta
2554 2554 node - nodeid of revision; typically node is not specified, and it is
2555 2555 computed by default as hash(text, p1, p2), however subclasses might
2556 2556 use different hashing method (and override checkhash() in such case)
2557 2557 flags - the known flags to set on the revision
2558 2558 deltacomputer - an optional deltacomputer instance shared between
2559 2559 multiple calls
2560 2560 """
2561 2561 if link == nullrev:
2562 2562 raise error.RevlogError(
2563 2563 _(b"attempted to add linkrev -1 to %s") % self.display_id
2564 2564 )
2565 2565
2566 2566 if sidedata is None:
2567 2567 sidedata = {}
2568 2568 elif sidedata and not self.feature_config.has_side_data:
2569 2569 raise error.ProgrammingError(
2570 2570 _(b"trying to add sidedata to a revlog who don't support them")
2571 2571 )
2572 2572
2573 2573 if flags:
2574 2574 node = node or self.hash(text, p1, p2)
2575 2575
2576 2576 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2577 2577
2578 2578 # If the flag processor modifies the revision data, ignore any provided
2579 2579 # cachedelta.
2580 2580 if rawtext != text:
2581 2581 cachedelta = None
2582 2582
2583 2583 if len(rawtext) > _maxentrysize:
2584 2584 raise error.RevlogError(
2585 2585 _(
2586 2586 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2587 2587 )
2588 2588 % (self.display_id, len(rawtext))
2589 2589 )
2590 2590
2591 2591 node = node or self.hash(rawtext, p1, p2)
2592 2592 rev = self.index.get_rev(node)
2593 2593 if rev is not None:
2594 2594 return rev
2595 2595
2596 2596 if validatehash:
2597 2597 self.checkhash(rawtext, node, p1=p1, p2=p2)
2598 2598
2599 2599 return self.addrawrevision(
2600 2600 rawtext,
2601 2601 transaction,
2602 2602 link,
2603 2603 p1,
2604 2604 p2,
2605 2605 node,
2606 2606 flags,
2607 2607 cachedelta=cachedelta,
2608 2608 deltacomputer=deltacomputer,
2609 2609 sidedata=sidedata,
2610 2610 )
2611 2611
2612 2612 def addrawrevision(
2613 2613 self,
2614 2614 rawtext,
2615 2615 transaction,
2616 2616 link,
2617 2617 p1,
2618 2618 p2,
2619 2619 node,
2620 2620 flags,
2621 2621 cachedelta=None,
2622 2622 deltacomputer=None,
2623 2623 sidedata=None,
2624 2624 ):
2625 2625 """add a raw revision with known flags, node and parents
2626 2626 useful when reusing a revision not stored in this revlog (ex: received
2627 2627 over wire, or read from an external bundle).
2628 2628 """
2629 2629 with self._writing(transaction):
2630 2630 return self._addrevision(
2631 2631 node,
2632 2632 rawtext,
2633 2633 transaction,
2634 2634 link,
2635 2635 p1,
2636 2636 p2,
2637 2637 flags,
2638 2638 cachedelta,
2639 2639 deltacomputer=deltacomputer,
2640 2640 sidedata=sidedata,
2641 2641 )
2642 2642
2643 2643 def compress(self, data):
2644 2644 """Generate a possibly-compressed representation of data."""
2645 2645 if not data:
2646 2646 return b'', data
2647 2647
2648 2648 compressed = self._compressor.compress(data)
2649 2649
2650 2650 if compressed:
2651 2651 # The revlog compressor added the header in the returned data.
2652 2652 return b'', compressed
2653 2653
2654 2654 if data[0:1] == b'\0':
2655 2655 return b'', data
2656 2656 return b'u', data
2657 2657
2658 2658 def decompress(self, data):
2659 2659 """Decompress a revlog chunk.
2660 2660
2661 2661 The chunk is expected to begin with a header identifying the
2662 2662 format type so it can be routed to an appropriate decompressor.
2663 2663 """
2664 2664 if not data:
2665 2665 return data
2666 2666
2667 2667 # Revlogs are read much more frequently than they are written and many
2668 2668 # chunks only take microseconds to decompress, so performance is
2669 2669 # important here.
2670 2670 #
2671 2671 # We can make a few assumptions about revlogs:
2672 2672 #
2673 2673 # 1) the majority of chunks will be compressed (as opposed to inline
2674 2674 # raw data).
2675 2675 # 2) decompressing *any* data will likely by at least 10x slower than
2676 2676 # returning raw inline data.
2677 2677 # 3) we want to prioritize common and officially supported compression
2678 2678 # engines
2679 2679 #
2680 2680 # It follows that we want to optimize for "decompress compressed data
2681 2681 # when encoded with common and officially supported compression engines"
2682 2682 # case over "raw data" and "data encoded by less common or non-official
2683 2683 # compression engines." That is why we have the inline lookup first
2684 2684 # followed by the compengines lookup.
2685 2685 #
2686 2686 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2687 2687 # compressed chunks. And this matters for changelog and manifest reads.
2688 2688 t = data[0:1]
2689 2689
2690 2690 if t == b'x':
2691 2691 try:
2692 2692 return _zlibdecompress(data)
2693 2693 except zlib.error as e:
2694 2694 raise error.RevlogError(
2695 2695 _(b'revlog decompress error: %s')
2696 2696 % stringutil.forcebytestr(e)
2697 2697 )
2698 2698 # '\0' is more common than 'u' so it goes first.
2699 2699 elif t == b'\0':
2700 2700 return data
2701 2701 elif t == b'u':
2702 2702 return util.buffer(data, 1)
2703 2703
2704 2704 compressor = self._get_decompressor(t)
2705 2705
2706 2706 return compressor.decompress(data)
2707 2707
2708 2708 def _addrevision(
2709 2709 self,
2710 2710 node,
2711 2711 rawtext,
2712 2712 transaction,
2713 2713 link,
2714 2714 p1,
2715 2715 p2,
2716 2716 flags,
2717 2717 cachedelta,
2718 2718 alwayscache=False,
2719 2719 deltacomputer=None,
2720 2720 sidedata=None,
2721 2721 ):
2722 2722 """internal function to add revisions to the log
2723 2723
2724 2724 see addrevision for argument descriptions.
2725 2725
2726 2726 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2727 2727
2728 2728 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2729 2729 be used.
2730 2730
2731 2731 invariants:
2732 2732 - rawtext is optional (can be None); if not set, cachedelta must be set.
2733 2733 if both are set, they must correspond to each other.
2734 2734 """
2735 2735 if node == self.nullid:
2736 2736 raise error.RevlogError(
2737 2737 _(b"%s: attempt to add null revision") % self.display_id
2738 2738 )
2739 2739 if (
2740 2740 node == self.nodeconstants.wdirid
2741 2741 or node in self.nodeconstants.wdirfilenodeids
2742 2742 ):
2743 2743 raise error.RevlogError(
2744 2744 _(b"%s: attempt to add wdir revision") % self.display_id
2745 2745 )
2746 2746 if self._writinghandles is None:
2747 2747 msg = b'adding revision outside `revlog._writing` context'
2748 2748 raise error.ProgrammingError(msg)
2749 2749
2750 2750 btext = [rawtext]
2751 2751
2752 2752 curr = len(self)
2753 2753 prev = curr - 1
2754 2754
2755 2755 offset = self._get_data_offset(prev)
2756 2756
2757 2757 if self._concurrencychecker:
2758 2758 ifh, dfh, sdfh = self._writinghandles
2759 2759 # XXX no checking for the sidedata file
2760 2760 if self._inline:
2761 2761 # offset is "as if" it were in the .d file, so we need to add on
2762 2762 # the size of the entry metadata.
2763 2763 self._concurrencychecker(
2764 2764 ifh, self._indexfile, offset + curr * self.index.entry_size
2765 2765 )
2766 2766 else:
2767 2767 # Entries in the .i are a consistent size.
2768 2768 self._concurrencychecker(
2769 2769 ifh, self._indexfile, curr * self.index.entry_size
2770 2770 )
2771 2771 self._concurrencychecker(dfh, self._datafile, offset)
2772 2772
2773 2773 p1r, p2r = self.rev(p1), self.rev(p2)
2774 2774
2775 2775 # full versions are inserted when the needed deltas
2776 2776 # become comparable to the uncompressed text
2777 2777 if rawtext is None:
2778 2778 # need rawtext size, before changed by flag processors, which is
2779 2779 # the non-raw size. use revlog explicitly to avoid filelog's extra
2780 2780 # logic that might remove metadata size.
2781 2781 textlen = mdiff.patchedsize(
2782 2782 revlog.size(self, cachedelta[0]), cachedelta[1]
2783 2783 )
2784 2784 else:
2785 2785 textlen = len(rawtext)
2786 2786
2787 2787 if deltacomputer is None:
2788 2788 write_debug = None
2789 2789 if self.delta_config.debug_delta:
2790 2790 write_debug = transaction._report
2791 2791 deltacomputer = deltautil.deltacomputer(
2792 2792 self, write_debug=write_debug
2793 2793 )
2794 2794
2795 2795 if cachedelta is not None and len(cachedelta) == 2:
2796 2796 # If the cached delta has no information about how it should be
2797 2797 # reused, add the default reuse instruction according to the
2798 2798 # revlog's configuration.
2799 2799 if (
2800 2800 self.delta_config.general_delta
2801 2801 and self.delta_config.lazy_delta_base
2802 2802 ):
2803 2803 delta_base_reuse = DELTA_BASE_REUSE_TRY
2804 2804 else:
2805 2805 delta_base_reuse = DELTA_BASE_REUSE_NO
2806 2806 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2807 2807
2808 2808 revinfo = revlogutils.revisioninfo(
2809 2809 node,
2810 2810 p1,
2811 2811 p2,
2812 2812 btext,
2813 2813 textlen,
2814 2814 cachedelta,
2815 2815 flags,
2816 2816 )
2817 2817
2818 2818 deltainfo = deltacomputer.finddeltainfo(revinfo)
2819 2819
2820 2820 compression_mode = COMP_MODE_INLINE
2821 2821 if self._docket is not None:
2822 2822 default_comp = self._docket.default_compression_header
2823 2823 r = deltautil.delta_compression(default_comp, deltainfo)
2824 2824 compression_mode, deltainfo = r
2825 2825
2826 2826 sidedata_compression_mode = COMP_MODE_INLINE
2827 2827 if sidedata and self.feature_config.has_side_data:
2828 2828 sidedata_compression_mode = COMP_MODE_PLAIN
2829 2829 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2830 2830 sidedata_offset = self._docket.sidedata_end
2831 2831 h, comp_sidedata = self.compress(serialized_sidedata)
2832 2832 if (
2833 2833 h != b'u'
2834 2834 and comp_sidedata[0:1] != b'\0'
2835 2835 and len(comp_sidedata) < len(serialized_sidedata)
2836 2836 ):
2837 2837 assert not h
2838 2838 if (
2839 2839 comp_sidedata[0:1]
2840 2840 == self._docket.default_compression_header
2841 2841 ):
2842 2842 sidedata_compression_mode = COMP_MODE_DEFAULT
2843 2843 serialized_sidedata = comp_sidedata
2844 2844 else:
2845 2845 sidedata_compression_mode = COMP_MODE_INLINE
2846 2846 serialized_sidedata = comp_sidedata
2847 2847 else:
2848 2848 serialized_sidedata = b""
2849 2849 # Don't store the offset if the sidedata is empty, that way
2850 2850 # we can easily detect empty sidedata and they will be no different
2851 2851 # than ones we manually add.
2852 2852 sidedata_offset = 0
2853 2853
2854 2854 rank = RANK_UNKNOWN
2855 if self._compute_rank:
2855 if self.feature_config.compute_rank:
2856 2856 if (p1r, p2r) == (nullrev, nullrev):
2857 2857 rank = 1
2858 2858 elif p1r != nullrev and p2r == nullrev:
2859 2859 rank = 1 + self.fast_rank(p1r)
2860 2860 elif p1r == nullrev and p2r != nullrev:
2861 2861 rank = 1 + self.fast_rank(p2r)
2862 2862 else: # merge node
2863 2863 if rustdagop is not None and self.index.rust_ext_compat:
2864 2864 rank = rustdagop.rank(self.index, p1r, p2r)
2865 2865 else:
2866 2866 pmin, pmax = sorted((p1r, p2r))
2867 2867 rank = 1 + self.fast_rank(pmax)
2868 2868 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2869 2869
2870 2870 e = revlogutils.entry(
2871 2871 flags=flags,
2872 2872 data_offset=offset,
2873 2873 data_compressed_length=deltainfo.deltalen,
2874 2874 data_uncompressed_length=textlen,
2875 2875 data_compression_mode=compression_mode,
2876 2876 data_delta_base=deltainfo.base,
2877 2877 link_rev=link,
2878 2878 parent_rev_1=p1r,
2879 2879 parent_rev_2=p2r,
2880 2880 node_id=node,
2881 2881 sidedata_offset=sidedata_offset,
2882 2882 sidedata_compressed_length=len(serialized_sidedata),
2883 2883 sidedata_compression_mode=sidedata_compression_mode,
2884 2884 rank=rank,
2885 2885 )
2886 2886
2887 2887 self.index.append(e)
2888 2888 entry = self.index.entry_binary(curr)
2889 2889 if curr == 0 and self._docket is None:
2890 2890 header = self._format_flags | self._format_version
2891 2891 header = self.index.pack_header(header)
2892 2892 entry = header + entry
2893 2893 self._writeentry(
2894 2894 transaction,
2895 2895 entry,
2896 2896 deltainfo.data,
2897 2897 link,
2898 2898 offset,
2899 2899 serialized_sidedata,
2900 2900 sidedata_offset,
2901 2901 )
2902 2902
2903 2903 rawtext = btext[0]
2904 2904
2905 2905 if alwayscache and rawtext is None:
2906 2906 rawtext = deltacomputer.buildtext(revinfo)
2907 2907
2908 2908 if type(rawtext) == bytes: # only accept immutable objects
2909 2909 self._revisioncache = (node, curr, rawtext)
2910 2910 self._chainbasecache[curr] = deltainfo.chainbase
2911 2911 return curr
2912 2912
2913 2913 def _get_data_offset(self, prev):
2914 2914 """Returns the current offset in the (in-transaction) data file.
2915 2915 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2916 2916 file to store that information: since sidedata can be rewritten to the
2917 2917 end of the data file within a transaction, you can have cases where, for
2918 2918 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2919 2919 to `n - 1`'s sidedata being written after `n`'s data.
2920 2920
2921 2921 TODO cache this in a docket file before getting out of experimental."""
2922 2922 if self._docket is None:
2923 2923 return self.end(prev)
2924 2924 else:
2925 2925 return self._docket.data_end
2926 2926
2927 2927 def _writeentry(
2928 2928 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2929 2929 ):
2930 2930 # Files opened in a+ mode have inconsistent behavior on various
2931 2931 # platforms. Windows requires that a file positioning call be made
2932 2932 # when the file handle transitions between reads and writes. See
2933 2933 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2934 2934 # platforms, Python or the platform itself can be buggy. Some versions
2935 2935 # of Solaris have been observed to not append at the end of the file
2936 2936 # if the file was seeked to before the end. See issue4943 for more.
2937 2937 #
2938 2938 # We work around this issue by inserting a seek() before writing.
2939 2939 # Note: This is likely not necessary on Python 3. However, because
2940 2940 # the file handle is reused for reads and may be seeked there, we need
2941 2941 # to be careful before changing this.
2942 2942 if self._writinghandles is None:
2943 2943 msg = b'adding revision outside `revlog._writing` context'
2944 2944 raise error.ProgrammingError(msg)
2945 2945 ifh, dfh, sdfh = self._writinghandles
2946 2946 if self._docket is None:
2947 2947 ifh.seek(0, os.SEEK_END)
2948 2948 else:
2949 2949 ifh.seek(self._docket.index_end, os.SEEK_SET)
2950 2950 if dfh:
2951 2951 if self._docket is None:
2952 2952 dfh.seek(0, os.SEEK_END)
2953 2953 else:
2954 2954 dfh.seek(self._docket.data_end, os.SEEK_SET)
2955 2955 if sdfh:
2956 2956 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2957 2957
2958 2958 curr = len(self) - 1
2959 2959 if not self._inline:
2960 2960 transaction.add(self._datafile, offset)
2961 2961 if self._sidedatafile:
2962 2962 transaction.add(self._sidedatafile, sidedata_offset)
2963 2963 transaction.add(self._indexfile, curr * len(entry))
2964 2964 if data[0]:
2965 2965 dfh.write(data[0])
2966 2966 dfh.write(data[1])
2967 2967 if sidedata:
2968 2968 sdfh.write(sidedata)
2969 2969 ifh.write(entry)
2970 2970 else:
2971 2971 offset += curr * self.index.entry_size
2972 2972 transaction.add(self._indexfile, offset)
2973 2973 ifh.write(entry)
2974 2974 ifh.write(data[0])
2975 2975 ifh.write(data[1])
2976 2976 assert not sidedata
2977 2977 self._enforceinlinesize(transaction)
2978 2978 if self._docket is not None:
2979 2979 # revlog-v2 always has 3 writing handles, help Pytype
2980 2980 wh1 = self._writinghandles[0]
2981 2981 wh2 = self._writinghandles[1]
2982 2982 wh3 = self._writinghandles[2]
2983 2983 assert wh1 is not None
2984 2984 assert wh2 is not None
2985 2985 assert wh3 is not None
2986 2986 self._docket.index_end = wh1.tell()
2987 2987 self._docket.data_end = wh2.tell()
2988 2988 self._docket.sidedata_end = wh3.tell()
2989 2989
2990 2990 nodemaputil.setup_persistent_nodemap(transaction, self)
2991 2991
2992 2992 def addgroup(
2993 2993 self,
2994 2994 deltas,
2995 2995 linkmapper,
2996 2996 transaction,
2997 2997 alwayscache=False,
2998 2998 addrevisioncb=None,
2999 2999 duplicaterevisioncb=None,
3000 3000 debug_info=None,
3001 3001 delta_base_reuse_policy=None,
3002 3002 ):
3003 3003 """
3004 3004 add a delta group
3005 3005
3006 3006 given a set of deltas, add them to the revision log. the
3007 3007 first delta is against its parent, which should be in our
3008 3008 log, the rest are against the previous delta.
3009 3009
3010 3010 If ``addrevisioncb`` is defined, it will be called with arguments of
3011 3011 this revlog and the node that was added.
3012 3012 """
3013 3013
3014 3014 if self._adding_group:
3015 3015 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3016 3016
3017 3017 # read the default delta-base reuse policy from revlog config if the
3018 3018 # group did not specify one.
3019 3019 if delta_base_reuse_policy is None:
3020 3020 if (
3021 3021 self.delta_config.general_delta
3022 3022 and self.delta_config.lazy_delta_base
3023 3023 ):
3024 3024 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3025 3025 else:
3026 3026 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3027 3027
3028 3028 self._adding_group = True
3029 3029 empty = True
3030 3030 try:
3031 3031 with self._writing(transaction):
3032 3032 write_debug = None
3033 3033 if self.delta_config.debug_delta:
3034 3034 write_debug = transaction._report
3035 3035 deltacomputer = deltautil.deltacomputer(
3036 3036 self,
3037 3037 write_debug=write_debug,
3038 3038 debug_info=debug_info,
3039 3039 )
3040 3040 # loop through our set of deltas
3041 3041 for data in deltas:
3042 3042 (
3043 3043 node,
3044 3044 p1,
3045 3045 p2,
3046 3046 linknode,
3047 3047 deltabase,
3048 3048 delta,
3049 3049 flags,
3050 3050 sidedata,
3051 3051 ) = data
3052 3052 link = linkmapper(linknode)
3053 3053 flags = flags or REVIDX_DEFAULT_FLAGS
3054 3054
3055 3055 rev = self.index.get_rev(node)
3056 3056 if rev is not None:
3057 3057 # this can happen if two branches make the same change
3058 3058 self._nodeduplicatecallback(transaction, rev)
3059 3059 if duplicaterevisioncb:
3060 3060 duplicaterevisioncb(self, rev)
3061 3061 empty = False
3062 3062 continue
3063 3063
3064 3064 for p in (p1, p2):
3065 3065 if not self.index.has_node(p):
3066 3066 raise error.LookupError(
3067 3067 p, self.radix, _(b'unknown parent')
3068 3068 )
3069 3069
3070 3070 if not self.index.has_node(deltabase):
3071 3071 raise error.LookupError(
3072 3072 deltabase, self.display_id, _(b'unknown delta base')
3073 3073 )
3074 3074
3075 3075 baserev = self.rev(deltabase)
3076 3076
3077 3077 if baserev != nullrev and self.iscensored(baserev):
3078 3078 # if base is censored, delta must be full replacement in a
3079 3079 # single patch operation
3080 3080 hlen = struct.calcsize(b">lll")
3081 3081 oldlen = self.rawsize(baserev)
3082 3082 newlen = len(delta) - hlen
3083 3083 if delta[:hlen] != mdiff.replacediffheader(
3084 3084 oldlen, newlen
3085 3085 ):
3086 3086 raise error.CensoredBaseError(
3087 3087 self.display_id, self.node(baserev)
3088 3088 )
3089 3089
3090 3090 if not flags and self._peek_iscensored(baserev, delta):
3091 3091 flags |= REVIDX_ISCENSORED
3092 3092
3093 3093 # We assume consumers of addrevisioncb will want to retrieve
3094 3094 # the added revision, which will require a call to
3095 3095 # revision(). revision() will fast path if there is a cache
3096 3096 # hit. So, we tell _addrevision() to always cache in this case.
3097 3097 # We're only using addgroup() in the context of changegroup
3098 3098 # generation so the revision data can always be handled as raw
3099 3099 # by the flagprocessor.
3100 3100 rev = self._addrevision(
3101 3101 node,
3102 3102 None,
3103 3103 transaction,
3104 3104 link,
3105 3105 p1,
3106 3106 p2,
3107 3107 flags,
3108 3108 (baserev, delta, delta_base_reuse_policy),
3109 3109 alwayscache=alwayscache,
3110 3110 deltacomputer=deltacomputer,
3111 3111 sidedata=sidedata,
3112 3112 )
3113 3113
3114 3114 if addrevisioncb:
3115 3115 addrevisioncb(self, rev)
3116 3116 empty = False
3117 3117 finally:
3118 3118 self._adding_group = False
3119 3119 return not empty
3120 3120
3121 3121 def iscensored(self, rev):
3122 3122 """Check if a file revision is censored."""
3123 3123 if not self.feature_config.censorable:
3124 3124 return False
3125 3125
3126 3126 return self.flags(rev) & REVIDX_ISCENSORED
3127 3127
3128 3128 def _peek_iscensored(self, baserev, delta):
3129 3129 """Quickly check if a delta produces a censored revision."""
3130 3130 if not self.feature_config.censorable:
3131 3131 return False
3132 3132
3133 3133 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3134 3134
3135 3135 def getstrippoint(self, minlink):
3136 3136 """find the minimum rev that must be stripped to strip the linkrev
3137 3137
3138 3138 Returns a tuple containing the minimum rev and a set of all revs that
3139 3139 have linkrevs that will be broken by this strip.
3140 3140 """
3141 3141 return storageutil.resolvestripinfo(
3142 3142 minlink,
3143 3143 len(self) - 1,
3144 3144 self.headrevs(),
3145 3145 self.linkrev,
3146 3146 self.parentrevs,
3147 3147 )
3148 3148
3149 3149 def strip(self, minlink, transaction):
3150 3150 """truncate the revlog on the first revision with a linkrev >= minlink
3151 3151
3152 3152 This function is called when we're stripping revision minlink and
3153 3153 its descendants from the repository.
3154 3154
3155 3155 We have to remove all revisions with linkrev >= minlink, because
3156 3156 the equivalent changelog revisions will be renumbered after the
3157 3157 strip.
3158 3158
3159 3159 So we truncate the revlog on the first of these revisions, and
3160 3160 trust that the caller has saved the revisions that shouldn't be
3161 3161 removed and that it'll re-add them after this truncation.
3162 3162 """
3163 3163 if len(self) == 0:
3164 3164 return
3165 3165
3166 3166 rev, _ = self.getstrippoint(minlink)
3167 3167 if rev == len(self):
3168 3168 return
3169 3169
3170 3170 # first truncate the files on disk
3171 3171 data_end = self.start(rev)
3172 3172 if not self._inline:
3173 3173 transaction.add(self._datafile, data_end)
3174 3174 end = rev * self.index.entry_size
3175 3175 else:
3176 3176 end = data_end + (rev * self.index.entry_size)
3177 3177
3178 3178 if self._sidedatafile:
3179 3179 sidedata_end = self.sidedata_cut_off(rev)
3180 3180 transaction.add(self._sidedatafile, sidedata_end)
3181 3181
3182 3182 transaction.add(self._indexfile, end)
3183 3183 if self._docket is not None:
3184 3184 # XXX we could, leverage the docket while stripping. However it is
3185 3185 # not powerfull enough at the time of this comment
3186 3186 self._docket.index_end = end
3187 3187 self._docket.data_end = data_end
3188 3188 self._docket.sidedata_end = sidedata_end
3189 3189 self._docket.write(transaction, stripping=True)
3190 3190
3191 3191 # then reset internal state in memory to forget those revisions
3192 3192 self._revisioncache = None
3193 3193 self._chaininfocache = util.lrucachedict(500)
3194 3194 self._segmentfile.clear_cache()
3195 3195 self._segmentfile_sidedata.clear_cache()
3196 3196
3197 3197 del self.index[rev:-1]
3198 3198
3199 3199 def checksize(self):
3200 3200 """Check size of index and data files
3201 3201
3202 3202 return a (dd, di) tuple.
3203 3203 - dd: extra bytes for the "data" file
3204 3204 - di: extra bytes for the "index" file
3205 3205
3206 3206 A healthy revlog will return (0, 0).
3207 3207 """
3208 3208 expected = 0
3209 3209 if len(self):
3210 3210 expected = max(0, self.end(len(self) - 1))
3211 3211
3212 3212 try:
3213 3213 with self._datafp() as f:
3214 3214 f.seek(0, io.SEEK_END)
3215 3215 actual = f.tell()
3216 3216 dd = actual - expected
3217 3217 except FileNotFoundError:
3218 3218 dd = 0
3219 3219
3220 3220 try:
3221 3221 f = self.opener(self._indexfile)
3222 3222 f.seek(0, io.SEEK_END)
3223 3223 actual = f.tell()
3224 3224 f.close()
3225 3225 s = self.index.entry_size
3226 3226 i = max(0, actual // s)
3227 3227 di = actual - (i * s)
3228 3228 if self._inline:
3229 3229 databytes = 0
3230 3230 for r in self:
3231 3231 databytes += max(0, self.length(r))
3232 3232 dd = 0
3233 3233 di = actual - len(self) * s - databytes
3234 3234 except FileNotFoundError:
3235 3235 di = 0
3236 3236
3237 3237 return (dd, di)
3238 3238
3239 3239 def files(self):
3240 3240 res = [self._indexfile]
3241 3241 if self._docket_file is None:
3242 3242 if not self._inline:
3243 3243 res.append(self._datafile)
3244 3244 else:
3245 3245 res.append(self._docket_file)
3246 3246 res.extend(self._docket.old_index_filepaths(include_empty=False))
3247 3247 if self._docket.data_end:
3248 3248 res.append(self._datafile)
3249 3249 res.extend(self._docket.old_data_filepaths(include_empty=False))
3250 3250 if self._docket.sidedata_end:
3251 3251 res.append(self._sidedatafile)
3252 3252 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3253 3253 return res
3254 3254
3255 3255 def emitrevisions(
3256 3256 self,
3257 3257 nodes,
3258 3258 nodesorder=None,
3259 3259 revisiondata=False,
3260 3260 assumehaveparentrevisions=False,
3261 3261 deltamode=repository.CG_DELTAMODE_STD,
3262 3262 sidedata_helpers=None,
3263 3263 debug_info=None,
3264 3264 ):
3265 3265 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3266 3266 raise error.ProgrammingError(
3267 3267 b'unhandled value for nodesorder: %s' % nodesorder
3268 3268 )
3269 3269
3270 3270 if nodesorder is None and not self.delta_config.general_delta:
3271 3271 nodesorder = b'storage'
3272 3272
3273 3273 if (
3274 3274 not self._storedeltachains
3275 3275 and deltamode != repository.CG_DELTAMODE_PREV
3276 3276 ):
3277 3277 deltamode = repository.CG_DELTAMODE_FULL
3278 3278
3279 3279 return storageutil.emitrevisions(
3280 3280 self,
3281 3281 nodes,
3282 3282 nodesorder,
3283 3283 revlogrevisiondelta,
3284 3284 deltaparentfn=self.deltaparent,
3285 3285 candeltafn=self._candelta,
3286 3286 rawsizefn=self.rawsize,
3287 3287 revdifffn=self.revdiff,
3288 3288 flagsfn=self.flags,
3289 3289 deltamode=deltamode,
3290 3290 revisiondata=revisiondata,
3291 3291 assumehaveparentrevisions=assumehaveparentrevisions,
3292 3292 sidedata_helpers=sidedata_helpers,
3293 3293 debug_info=debug_info,
3294 3294 )
3295 3295
3296 3296 DELTAREUSEALWAYS = b'always'
3297 3297 DELTAREUSESAMEREVS = b'samerevs'
3298 3298 DELTAREUSENEVER = b'never'
3299 3299
3300 3300 DELTAREUSEFULLADD = b'fulladd'
3301 3301
3302 3302 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3303 3303
3304 3304 def clone(
3305 3305 self,
3306 3306 tr,
3307 3307 destrevlog,
3308 3308 addrevisioncb=None,
3309 3309 deltareuse=DELTAREUSESAMEREVS,
3310 3310 forcedeltabothparents=None,
3311 3311 sidedata_helpers=None,
3312 3312 ):
3313 3313 """Copy this revlog to another, possibly with format changes.
3314 3314
3315 3315 The destination revlog will contain the same revisions and nodes.
3316 3316 However, it may not be bit-for-bit identical due to e.g. delta encoding
3317 3317 differences.
3318 3318
3319 3319 The ``deltareuse`` argument control how deltas from the existing revlog
3320 3320 are preserved in the destination revlog. The argument can have the
3321 3321 following values:
3322 3322
3323 3323 DELTAREUSEALWAYS
3324 3324 Deltas will always be reused (if possible), even if the destination
3325 3325 revlog would not select the same revisions for the delta. This is the
3326 3326 fastest mode of operation.
3327 3327 DELTAREUSESAMEREVS
3328 3328 Deltas will be reused if the destination revlog would pick the same
3329 3329 revisions for the delta. This mode strikes a balance between speed
3330 3330 and optimization.
3331 3331 DELTAREUSENEVER
3332 3332 Deltas will never be reused. This is the slowest mode of execution.
3333 3333 This mode can be used to recompute deltas (e.g. if the diff/delta
3334 3334 algorithm changes).
3335 3335 DELTAREUSEFULLADD
3336 3336 Revision will be re-added as if their were new content. This is
3337 3337 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3338 3338 eg: large file detection and handling.
3339 3339
3340 3340 Delta computation can be slow, so the choice of delta reuse policy can
3341 3341 significantly affect run time.
3342 3342
3343 3343 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3344 3344 two extremes. Deltas will be reused if they are appropriate. But if the
3345 3345 delta could choose a better revision, it will do so. This means if you
3346 3346 are converting a non-generaldelta revlog to a generaldelta revlog,
3347 3347 deltas will be recomputed if the delta's parent isn't a parent of the
3348 3348 revision.
3349 3349
3350 3350 In addition to the delta policy, the ``forcedeltabothparents``
3351 3351 argument controls whether to force compute deltas against both parents
3352 3352 for merges. By default, the current default is used.
3353 3353
3354 3354 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3355 3355 `sidedata_helpers`.
3356 3356 """
3357 3357 if deltareuse not in self.DELTAREUSEALL:
3358 3358 raise ValueError(
3359 3359 _(b'value for deltareuse invalid: %s') % deltareuse
3360 3360 )
3361 3361
3362 3362 if len(destrevlog):
3363 3363 raise ValueError(_(b'destination revlog is not empty'))
3364 3364
3365 3365 if getattr(self, 'filteredrevs', None):
3366 3366 raise ValueError(_(b'source revlog has filtered revisions'))
3367 3367 if getattr(destrevlog, 'filteredrevs', None):
3368 3368 raise ValueError(_(b'destination revlog has filtered revisions'))
3369 3369
3370 3370 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3371 3371 # if possible.
3372 3372 old_delta_config = destrevlog.delta_config
3373 3373 destrevlog.delta_config = destrevlog.delta_config.copy()
3374 3374
3375 3375 try:
3376 3376 if deltareuse == self.DELTAREUSEALWAYS:
3377 3377 destrevlog.delta_config.lazy_delta_base = True
3378 3378 destrevlog.delta_config.lazy_delta = True
3379 3379 elif deltareuse == self.DELTAREUSESAMEREVS:
3380 3380 destrevlog.delta_config.lazy_delta_base = False
3381 3381 destrevlog.delta_config.lazy_delta = True
3382 3382 elif deltareuse == self.DELTAREUSENEVER:
3383 3383 destrevlog.delta_config.lazy_delta_base = False
3384 3384 destrevlog.delta_config.lazy_delta = False
3385 3385
3386 3386 delta_both_parents = (
3387 3387 forcedeltabothparents or old_delta_config.delta_both_parents
3388 3388 )
3389 3389 destrevlog.delta_config.delta_both_parents = delta_both_parents
3390 3390
3391 3391 with self.reading():
3392 3392 self._clone(
3393 3393 tr,
3394 3394 destrevlog,
3395 3395 addrevisioncb,
3396 3396 deltareuse,
3397 3397 forcedeltabothparents,
3398 3398 sidedata_helpers,
3399 3399 )
3400 3400
3401 3401 finally:
3402 3402 destrevlog.delta_config = old_delta_config
3403 3403
3404 3404 def _clone(
3405 3405 self,
3406 3406 tr,
3407 3407 destrevlog,
3408 3408 addrevisioncb,
3409 3409 deltareuse,
3410 3410 forcedeltabothparents,
3411 3411 sidedata_helpers,
3412 3412 ):
3413 3413 """perform the core duty of `revlog.clone` after parameter processing"""
3414 3414 write_debug = None
3415 3415 if self.delta_config.debug_delta:
3416 3416 write_debug = tr._report
3417 3417 deltacomputer = deltautil.deltacomputer(
3418 3418 destrevlog,
3419 3419 write_debug=write_debug,
3420 3420 )
3421 3421 index = self.index
3422 3422 for rev in self:
3423 3423 entry = index[rev]
3424 3424
3425 3425 # Some classes override linkrev to take filtered revs into
3426 3426 # account. Use raw entry from index.
3427 3427 flags = entry[0] & 0xFFFF
3428 3428 linkrev = entry[4]
3429 3429 p1 = index[entry[5]][7]
3430 3430 p2 = index[entry[6]][7]
3431 3431 node = entry[7]
3432 3432
3433 3433 # (Possibly) reuse the delta from the revlog if allowed and
3434 3434 # the revlog chunk is a delta.
3435 3435 cachedelta = None
3436 3436 rawtext = None
3437 3437 if deltareuse == self.DELTAREUSEFULLADD:
3438 3438 text = self._revisiondata(rev)
3439 3439 sidedata = self.sidedata(rev)
3440 3440
3441 3441 if sidedata_helpers is not None:
3442 3442 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3443 3443 self, sidedata_helpers, sidedata, rev
3444 3444 )
3445 3445 flags = flags | new_flags[0] & ~new_flags[1]
3446 3446
3447 3447 destrevlog.addrevision(
3448 3448 text,
3449 3449 tr,
3450 3450 linkrev,
3451 3451 p1,
3452 3452 p2,
3453 3453 cachedelta=cachedelta,
3454 3454 node=node,
3455 3455 flags=flags,
3456 3456 deltacomputer=deltacomputer,
3457 3457 sidedata=sidedata,
3458 3458 )
3459 3459 else:
3460 3460 if destrevlog._lazydelta:
3461 3461 dp = self.deltaparent(rev)
3462 3462 if dp != nullrev:
3463 3463 cachedelta = (dp, bytes(self._chunk(rev)))
3464 3464
3465 3465 sidedata = None
3466 3466 if not cachedelta:
3467 3467 rawtext = self._revisiondata(rev)
3468 3468 sidedata = self.sidedata(rev)
3469 3469 if sidedata is None:
3470 3470 sidedata = self.sidedata(rev)
3471 3471
3472 3472 if sidedata_helpers is not None:
3473 3473 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3474 3474 self, sidedata_helpers, sidedata, rev
3475 3475 )
3476 3476 flags = flags | new_flags[0] & ~new_flags[1]
3477 3477
3478 3478 with destrevlog._writing(tr):
3479 3479 destrevlog._addrevision(
3480 3480 node,
3481 3481 rawtext,
3482 3482 tr,
3483 3483 linkrev,
3484 3484 p1,
3485 3485 p2,
3486 3486 flags,
3487 3487 cachedelta,
3488 3488 deltacomputer=deltacomputer,
3489 3489 sidedata=sidedata,
3490 3490 )
3491 3491
3492 3492 if addrevisioncb:
3493 3493 addrevisioncb(self, rev, node)
3494 3494
3495 3495 def censorrevision(self, tr, censornode, tombstone=b''):
3496 3496 if self._format_version == REVLOGV0:
3497 3497 raise error.RevlogError(
3498 3498 _(b'cannot censor with version %d revlogs')
3499 3499 % self._format_version
3500 3500 )
3501 3501 elif self._format_version == REVLOGV1:
3502 3502 rewrite.v1_censor(self, tr, censornode, tombstone)
3503 3503 else:
3504 3504 rewrite.v2_censor(self, tr, censornode, tombstone)
3505 3505
3506 3506 def verifyintegrity(self, state):
3507 3507 """Verifies the integrity of the revlog.
3508 3508
3509 3509 Yields ``revlogproblem`` instances describing problems that are
3510 3510 found.
3511 3511 """
3512 3512 dd, di = self.checksize()
3513 3513 if dd:
3514 3514 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3515 3515 if di:
3516 3516 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3517 3517
3518 3518 version = self._format_version
3519 3519
3520 3520 # The verifier tells us what version revlog we should be.
3521 3521 if version != state[b'expectedversion']:
3522 3522 yield revlogproblem(
3523 3523 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3524 3524 % (self.display_id, version, state[b'expectedversion'])
3525 3525 )
3526 3526
3527 3527 state[b'skipread'] = set()
3528 3528 state[b'safe_renamed'] = set()
3529 3529
3530 3530 for rev in self:
3531 3531 node = self.node(rev)
3532 3532
3533 3533 # Verify contents. 4 cases to care about:
3534 3534 #
3535 3535 # common: the most common case
3536 3536 # rename: with a rename
3537 3537 # meta: file content starts with b'\1\n', the metadata
3538 3538 # header defined in filelog.py, but without a rename
3539 3539 # ext: content stored externally
3540 3540 #
3541 3541 # More formally, their differences are shown below:
3542 3542 #
3543 3543 # | common | rename | meta | ext
3544 3544 # -------------------------------------------------------
3545 3545 # flags() | 0 | 0 | 0 | not 0
3546 3546 # renamed() | False | True | False | ?
3547 3547 # rawtext[0:2]=='\1\n'| False | True | True | ?
3548 3548 #
3549 3549 # "rawtext" means the raw text stored in revlog data, which
3550 3550 # could be retrieved by "rawdata(rev)". "text"
3551 3551 # mentioned below is "revision(rev)".
3552 3552 #
3553 3553 # There are 3 different lengths stored physically:
3554 3554 # 1. L1: rawsize, stored in revlog index
3555 3555 # 2. L2: len(rawtext), stored in revlog data
3556 3556 # 3. L3: len(text), stored in revlog data if flags==0, or
3557 3557 # possibly somewhere else if flags!=0
3558 3558 #
3559 3559 # L1 should be equal to L2. L3 could be different from them.
3560 3560 # "text" may or may not affect commit hash depending on flag
3561 3561 # processors (see flagutil.addflagprocessor).
3562 3562 #
3563 3563 # | common | rename | meta | ext
3564 3564 # -------------------------------------------------
3565 3565 # rawsize() | L1 | L1 | L1 | L1
3566 3566 # size() | L1 | L2-LM | L1(*) | L1 (?)
3567 3567 # len(rawtext) | L2 | L2 | L2 | L2
3568 3568 # len(text) | L2 | L2 | L2 | L3
3569 3569 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3570 3570 #
3571 3571 # LM: length of metadata, depending on rawtext
3572 3572 # (*): not ideal, see comment in filelog.size
3573 3573 # (?): could be "- len(meta)" if the resolved content has
3574 3574 # rename metadata
3575 3575 #
3576 3576 # Checks needed to be done:
3577 3577 # 1. length check: L1 == L2, in all cases.
3578 3578 # 2. hash check: depending on flag processor, we may need to
3579 3579 # use either "text" (external), or "rawtext" (in revlog).
3580 3580
3581 3581 try:
3582 3582 skipflags = state.get(b'skipflags', 0)
3583 3583 if skipflags:
3584 3584 skipflags &= self.flags(rev)
3585 3585
3586 3586 _verify_revision(self, skipflags, state, node)
3587 3587
3588 3588 l1 = self.rawsize(rev)
3589 3589 l2 = len(self.rawdata(node))
3590 3590
3591 3591 if l1 != l2:
3592 3592 yield revlogproblem(
3593 3593 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3594 3594 node=node,
3595 3595 )
3596 3596
3597 3597 except error.CensoredNodeError:
3598 3598 if state[b'erroroncensored']:
3599 3599 yield revlogproblem(
3600 3600 error=_(b'censored file data'), node=node
3601 3601 )
3602 3602 state[b'skipread'].add(node)
3603 3603 except Exception as e:
3604 3604 yield revlogproblem(
3605 3605 error=_(b'unpacking %s: %s')
3606 3606 % (short(node), stringutil.forcebytestr(e)),
3607 3607 node=node,
3608 3608 )
3609 3609 state[b'skipread'].add(node)
3610 3610
3611 3611 def storageinfo(
3612 3612 self,
3613 3613 exclusivefiles=False,
3614 3614 sharedfiles=False,
3615 3615 revisionscount=False,
3616 3616 trackedsize=False,
3617 3617 storedsize=False,
3618 3618 ):
3619 3619 d = {}
3620 3620
3621 3621 if exclusivefiles:
3622 3622 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3623 3623 if not self._inline:
3624 3624 d[b'exclusivefiles'].append((self.opener, self._datafile))
3625 3625
3626 3626 if sharedfiles:
3627 3627 d[b'sharedfiles'] = []
3628 3628
3629 3629 if revisionscount:
3630 3630 d[b'revisionscount'] = len(self)
3631 3631
3632 3632 if trackedsize:
3633 3633 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3634 3634
3635 3635 if storedsize:
3636 3636 d[b'storedsize'] = sum(
3637 3637 self.opener.stat(path).st_size for path in self.files()
3638 3638 )
3639 3639
3640 3640 return d
3641 3641
3642 3642 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3643 3643 if not self.feature_config.has_side_data:
3644 3644 return
3645 3645 # revlog formats with sidedata support does not support inline
3646 3646 assert not self._inline
3647 3647 if not helpers[1] and not helpers[2]:
3648 3648 # Nothing to generate or remove
3649 3649 return
3650 3650
3651 3651 new_entries = []
3652 3652 # append the new sidedata
3653 3653 with self._writing(transaction):
3654 3654 ifh, dfh, sdfh = self._writinghandles
3655 3655 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3656 3656
3657 3657 current_offset = sdfh.tell()
3658 3658 for rev in range(startrev, endrev + 1):
3659 3659 entry = self.index[rev]
3660 3660 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3661 3661 store=self,
3662 3662 sidedata_helpers=helpers,
3663 3663 sidedata={},
3664 3664 rev=rev,
3665 3665 )
3666 3666
3667 3667 serialized_sidedata = sidedatautil.serialize_sidedata(
3668 3668 new_sidedata
3669 3669 )
3670 3670
3671 3671 sidedata_compression_mode = COMP_MODE_INLINE
3672 3672 if serialized_sidedata and self.feature_config.has_side_data:
3673 3673 sidedata_compression_mode = COMP_MODE_PLAIN
3674 3674 h, comp_sidedata = self.compress(serialized_sidedata)
3675 3675 if (
3676 3676 h != b'u'
3677 3677 and comp_sidedata[0] != b'\0'
3678 3678 and len(comp_sidedata) < len(serialized_sidedata)
3679 3679 ):
3680 3680 assert not h
3681 3681 if (
3682 3682 comp_sidedata[0]
3683 3683 == self._docket.default_compression_header
3684 3684 ):
3685 3685 sidedata_compression_mode = COMP_MODE_DEFAULT
3686 3686 serialized_sidedata = comp_sidedata
3687 3687 else:
3688 3688 sidedata_compression_mode = COMP_MODE_INLINE
3689 3689 serialized_sidedata = comp_sidedata
3690 3690 if entry[8] != 0 or entry[9] != 0:
3691 3691 # rewriting entries that already have sidedata is not
3692 3692 # supported yet, because it introduces garbage data in the
3693 3693 # revlog.
3694 3694 msg = b"rewriting existing sidedata is not supported yet"
3695 3695 raise error.Abort(msg)
3696 3696
3697 3697 # Apply (potential) flags to add and to remove after running
3698 3698 # the sidedata helpers
3699 3699 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3700 3700 entry_update = (
3701 3701 current_offset,
3702 3702 len(serialized_sidedata),
3703 3703 new_offset_flags,
3704 3704 sidedata_compression_mode,
3705 3705 )
3706 3706
3707 3707 # the sidedata computation might have move the file cursors around
3708 3708 sdfh.seek(current_offset, os.SEEK_SET)
3709 3709 sdfh.write(serialized_sidedata)
3710 3710 new_entries.append(entry_update)
3711 3711 current_offset += len(serialized_sidedata)
3712 3712 self._docket.sidedata_end = sdfh.tell()
3713 3713
3714 3714 # rewrite the new index entries
3715 3715 ifh.seek(startrev * self.index.entry_size)
3716 3716 for i, e in enumerate(new_entries):
3717 3717 rev = startrev + i
3718 3718 self.index.replace_sidedata_info(rev, *e)
3719 3719 packed = self.index.entry_binary(rev)
3720 3720 if rev == 0 and self._docket is None:
3721 3721 header = self._format_flags | self._format_version
3722 3722 header = self.index.pack_header(header)
3723 3723 packed = header + packed
3724 3724 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now