##// END OF EJS Templates
revlog: remove legacy usage of `_checkambig`...
marmoute -
r51941:59c6f997 default
parent child Browse files
Show More
@@ -1,3714 +1,3718 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 opts = self.opener.options
614 614
615 615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 616 new_header = CHANGELOGV2
617 617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 618 self.feature_config.compute_rank = compute_rank
619 619 elif b'revlogv2' in opts:
620 620 new_header = REVLOGV2
621 621 elif b'revlogv1' in opts:
622 622 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 623 if b'generaldelta' in opts:
624 624 new_header |= FLAG_GENERALDELTA
625 625 elif b'revlogv0' in self.opener.options:
626 626 new_header = REVLOGV0
627 627 else:
628 628 new_header = REVLOG_DEFAULT_VERSION
629 629
630 630 mmapindexthreshold = None
631 631 if self._mmaplargeindex:
632 632 mmapindexthreshold = self.data_config.mmap_index_threshold
633 633 if self.feature_config.enable_ellipsis:
634 634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
635 635
636 636 # revlog v0 doesn't have flag processors
637 637 for flag, processor in opts.get(b'flagprocessors', {}).items():
638 638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
639 639
640 640 chunk_cache_size = self.data_config.chunk_cache_size
641 641 if chunk_cache_size <= 0:
642 642 raise error.RevlogError(
643 643 _(b'revlog chunk cache size %r is not greater than 0')
644 644 % chunk_cache_size
645 645 )
646 646 elif chunk_cache_size & (chunk_cache_size - 1):
647 647 raise error.RevlogError(
648 648 _(b'revlog chunk cache size %r is not a power of 2')
649 649 % chunk_cache_size
650 650 )
651 651 force_nodemap = opts.get(b'devel-force-nodemap', False)
652 652 return new_header, mmapindexthreshold, force_nodemap
653 653
654 654 def _get_data(self, filepath, mmap_threshold, size=None):
655 655 """return a file content with or without mmap
656 656
657 657 If the file is missing return the empty string"""
658 658 try:
659 659 with self.opener(filepath) as fp:
660 660 if mmap_threshold is not None:
661 661 file_size = self.opener.fstat(fp).st_size
662 662 if file_size >= mmap_threshold:
663 663 if size is not None:
664 664 # avoid potentiel mmap crash
665 665 size = min(file_size, size)
666 666 # TODO: should .close() to release resources without
667 667 # relying on Python GC
668 668 if size is None:
669 669 return util.buffer(util.mmapread(fp))
670 670 else:
671 671 return util.buffer(util.mmapread(fp, size))
672 672 if size is None:
673 673 return fp.read()
674 674 else:
675 675 return fp.read(size)
676 676 except FileNotFoundError:
677 677 return b''
678 678
679 679 def get_streams(self, max_linkrev, force_inline=False):
680 680 """return a list of streams that represent this revlog
681 681
682 682 This is used by stream-clone to do bytes to bytes copies of a repository.
683 683
684 684 This streams data for all revisions that refer to a changelog revision up
685 685 to `max_linkrev`.
686 686
687 687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
688 688
689 689 It returns is a list of three-tuple:
690 690
691 691 [
692 692 (filename, bytes_stream, stream_size),
693 693 …
694 694 ]
695 695 """
696 696 n = len(self)
697 697 index = self.index
698 698 while n > 0:
699 699 linkrev = index[n - 1][4]
700 700 if linkrev < max_linkrev:
701 701 break
702 702 # note: this loop will rarely go through multiple iterations, since
703 703 # it only traverses commits created during the current streaming
704 704 # pull operation.
705 705 #
706 706 # If this become a problem, using a binary search should cap the
707 707 # runtime of this.
708 708 n = n - 1
709 709 if n == 0:
710 710 # no data to send
711 711 return []
712 712 index_size = n * index.entry_size
713 713 data_size = self.end(n - 1)
714 714
715 715 # XXX we might have been split (or stripped) since the object
716 716 # initialization, We need to close this race too, but having a way to
717 717 # pre-open the file we feed to the revlog and never closing them before
718 718 # we are done streaming.
719 719
720 720 if self._inline:
721 721
722 722 def get_stream():
723 723 with self._indexfp() as fp:
724 724 yield None
725 725 size = index_size + data_size
726 726 if size <= 65536:
727 727 yield fp.read(size)
728 728 else:
729 729 yield from util.filechunkiter(fp, limit=size)
730 730
731 731 inline_stream = get_stream()
732 732 next(inline_stream)
733 733 return [
734 734 (self._indexfile, inline_stream, index_size + data_size),
735 735 ]
736 736 elif force_inline:
737 737
738 738 def get_stream():
739 739 with self.reading():
740 740 yield None
741 741
742 742 for rev in range(n):
743 743 idx = self.index.entry_binary(rev)
744 744 if rev == 0 and self._docket is None:
745 745 # re-inject the inline flag
746 746 header = self._format_flags
747 747 header |= self._format_version
748 748 header |= FLAG_INLINE_DATA
749 749 header = self.index.pack_header(header)
750 750 idx = header + idx
751 751 yield idx
752 752 yield self._getsegmentforrevs(rev, rev)[1]
753 753
754 754 inline_stream = get_stream()
755 755 next(inline_stream)
756 756 return [
757 757 (self._indexfile, inline_stream, index_size + data_size),
758 758 ]
759 759 else:
760 760
761 761 def get_index_stream():
762 762 with self._indexfp() as fp:
763 763 yield None
764 764 if index_size <= 65536:
765 765 yield fp.read(index_size)
766 766 else:
767 767 yield from util.filechunkiter(fp, limit=index_size)
768 768
769 769 def get_data_stream():
770 770 with self._datafp() as fp:
771 771 yield None
772 772 if data_size <= 65536:
773 773 yield fp.read(data_size)
774 774 else:
775 775 yield from util.filechunkiter(fp, limit=data_size)
776 776
777 777 index_stream = get_index_stream()
778 778 next(index_stream)
779 779 data_stream = get_data_stream()
780 780 next(data_stream)
781 781 return [
782 782 (self._datafile, data_stream, data_size),
783 783 (self._indexfile, index_stream, index_size),
784 784 ]
785 785
786 786 def _loadindex(self, docket=None):
787 787
788 788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
789 789
790 790 if self.postfix is not None:
791 791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
792 792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
793 793 entry_point = b'%s.i.a' % self.radix
794 794 elif self._try_split and self.opener.exists(self._split_index_file):
795 795 entry_point = self._split_index_file
796 796 else:
797 797 entry_point = b'%s.i' % self.radix
798 798
799 799 if docket is not None:
800 800 self._docket = docket
801 801 self._docket_file = entry_point
802 802 else:
803 803 self._initempty = True
804 804 entry_data = self._get_data(entry_point, mmapindexthreshold)
805 805 if len(entry_data) > 0:
806 806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
807 807 self._initempty = False
808 808 else:
809 809 header = new_header
810 810
811 811 self._format_flags = header & ~0xFFFF
812 812 self._format_version = header & 0xFFFF
813 813
814 814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
815 815 if supported_flags is None:
816 816 msg = _(b'unknown version (%d) in revlog %s')
817 817 msg %= (self._format_version, self.display_id)
818 818 raise error.RevlogError(msg)
819 819 elif self._format_flags & ~supported_flags:
820 820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
821 821 display_flag = self._format_flags >> 16
822 822 msg %= (display_flag, self._format_version, self.display_id)
823 823 raise error.RevlogError(msg)
824 824
825 825 features = FEATURES_BY_VERSION[self._format_version]
826 826 self._inline = features[b'inline'](self._format_flags)
827 827 self.delta_config.general_delta = features[b'generaldelta'](
828 828 self._format_flags
829 829 )
830 830 self.feature_config.has_side_data = features[b'sidedata']
831 831
832 832 if not features[b'docket']:
833 833 self._indexfile = entry_point
834 834 index_data = entry_data
835 835 else:
836 836 self._docket_file = entry_point
837 837 if self._initempty:
838 838 self._docket = docketutil.default_docket(self, header)
839 839 else:
840 840 self._docket = docketutil.parse_docket(
841 841 self, entry_data, use_pending=self._trypending
842 842 )
843 843
844 844 if self._docket is not None:
845 845 self._indexfile = self._docket.index_filepath()
846 846 index_data = b''
847 847 index_size = self._docket.index_end
848 848 if index_size > 0:
849 849 index_data = self._get_data(
850 850 self._indexfile, mmapindexthreshold, size=index_size
851 851 )
852 852 if len(index_data) < index_size:
853 853 msg = _(b'too few index data for %s: got %d, expected %d')
854 854 msg %= (self.display_id, len(index_data), index_size)
855 855 raise error.RevlogError(msg)
856 856
857 857 self._inline = False
858 858 # generaldelta implied by version 2 revlogs.
859 859 self.delta_config.general_delta = True
860 860 # the logic for persistent nodemap will be dealt with within the
861 861 # main docket, so disable it for now.
862 862 self._nodemap_file = None
863 863
864 864 if self._docket is not None:
865 865 self._datafile = self._docket.data_filepath()
866 866 self._sidedatafile = self._docket.sidedata_filepath()
867 867 elif self.postfix is None:
868 868 self._datafile = b'%s.d' % self.radix
869 869 else:
870 870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
871 871
872 872 self.nodeconstants = sha1nodeconstants
873 873 self.nullid = self.nodeconstants.nullid
874 874
875 875 # sparse-revlog can't be on without general-delta (issue6056)
876 876 if not self.delta_config.general_delta:
877 877 self.delta_config.sparse_revlog = False
878 878
879 879 self._storedeltachains = True
880 880
881 881 devel_nodemap = (
882 882 self._nodemap_file
883 883 and force_nodemap
884 884 and parse_index_v1_nodemap is not None
885 885 )
886 886
887 887 use_rust_index = False
888 888 if rustrevlog is not None:
889 889 if self._nodemap_file is not None:
890 890 use_rust_index = True
891 891 else:
892 892 use_rust_index = self.opener.options.get(b'rust.index')
893 893
894 894 self._parse_index = parse_index_v1
895 895 if self._format_version == REVLOGV0:
896 896 self._parse_index = revlogv0.parse_index_v0
897 897 elif self._format_version == REVLOGV2:
898 898 self._parse_index = parse_index_v2
899 899 elif self._format_version == CHANGELOGV2:
900 900 self._parse_index = parse_index_cl_v2
901 901 elif devel_nodemap:
902 902 self._parse_index = parse_index_v1_nodemap
903 903 elif use_rust_index:
904 904 self._parse_index = parse_index_v1_mixed
905 905 try:
906 906 d = self._parse_index(index_data, self._inline)
907 907 index, chunkcache = d
908 908 use_nodemap = (
909 909 not self._inline
910 910 and self._nodemap_file is not None
911 911 and hasattr(index, 'update_nodemap_data')
912 912 )
913 913 if use_nodemap:
914 914 nodemap_data = nodemaputil.persisted_data(self)
915 915 if nodemap_data is not None:
916 916 docket = nodemap_data[0]
917 917 if (
918 918 len(d[0]) > docket.tip_rev
919 919 and d[0][docket.tip_rev][7] == docket.tip_node
920 920 ):
921 921 # no changelog tampering
922 922 self._nodemap_docket = docket
923 923 index.update_nodemap_data(*nodemap_data)
924 924 except (ValueError, IndexError):
925 925 raise error.RevlogError(
926 926 _(b"index %s is corrupted") % self.display_id
927 927 )
928 928 self.index = index
929 929 self._segmentfile = randomaccessfile.randomaccessfile(
930 930 self.opener,
931 931 (self._indexfile if self._inline else self._datafile),
932 932 self._chunkcachesize,
933 933 chunkcache,
934 934 )
935 935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
936 936 self.opener,
937 937 self._sidedatafile,
938 938 self._chunkcachesize,
939 939 )
940 940 # revnum -> (chain-length, sum-delta-length)
941 941 self._chaininfocache = util.lrucachedict(500)
942 942 # revlog header -> revlog compressor
943 943 self._decompressors = {}
944 944
945 945 def get_revlog(self):
946 946 """simple function to mirror API of other not-really-revlog API"""
947 947 return self
948 948
949 949 @util.propertycache
950 950 def revlog_kind(self):
951 951 return self.target[0]
952 952
953 953 @util.propertycache
954 954 def display_id(self):
955 955 """The public facing "ID" of the revlog that we use in message"""
956 956 if self.revlog_kind == KIND_FILELOG:
957 957 # Reference the file without the "data/" prefix, so it is familiar
958 958 # to the user.
959 959 return self.target[1]
960 960 else:
961 961 return self.radix
962 962
963 963 def _get_decompressor(self, t):
964 964 try:
965 965 compressor = self._decompressors[t]
966 966 except KeyError:
967 967 try:
968 968 engine = util.compengines.forrevlogheader(t)
969 969 compressor = engine.revlogcompressor(self._compengineopts)
970 970 self._decompressors[t] = compressor
971 971 except KeyError:
972 972 raise error.RevlogError(
973 973 _(b'unknown compression type %s') % binascii.hexlify(t)
974 974 )
975 975 return compressor
976 976
977 977 @util.propertycache
978 978 def _compressor(self):
979 979 engine = util.compengines[self._compengine]
980 980 return engine.revlogcompressor(self._compengineopts)
981 981
982 982 @util.propertycache
983 983 def _decompressor(self):
984 984 """the default decompressor"""
985 985 if self._docket is None:
986 986 return None
987 987 t = self._docket.default_compression_header
988 988 c = self._get_decompressor(t)
989 989 return c.decompress
990 990
991 991 def _indexfp(self):
992 992 """file object for the revlog's index file"""
993 993 return self.opener(self._indexfile, mode=b"r")
994 994
995 995 def __index_write_fp(self):
996 996 # You should not use this directly and use `_writing` instead
997 997 try:
998 998 f = self.opener(
999 self._indexfile, mode=b"r+", checkambig=self._checkambig
999 self._indexfile,
1000 mode=b"r+",
1001 checkambig=self.data_config.check_ambig,
1000 1002 )
1001 1003 if self._docket is None:
1002 1004 f.seek(0, os.SEEK_END)
1003 1005 else:
1004 1006 f.seek(self._docket.index_end, os.SEEK_SET)
1005 1007 return f
1006 1008 except FileNotFoundError:
1007 1009 return self.opener(
1008 self._indexfile, mode=b"w+", checkambig=self._checkambig
1010 self._indexfile,
1011 mode=b"w+",
1012 checkambig=self.data_config.check_ambig,
1009 1013 )
1010 1014
1011 1015 def __index_new_fp(self):
1012 1016 # You should not use this unless you are upgrading from inline revlog
1013 1017 return self.opener(
1014 1018 self._indexfile,
1015 1019 mode=b"w",
1016 checkambig=self._checkambig,
1020 checkambig=self.data_config.check_ambig,
1017 1021 atomictemp=True,
1018 1022 )
1019 1023
1020 1024 def _datafp(self, mode=b'r'):
1021 1025 """file object for the revlog's data file"""
1022 1026 return self.opener(self._datafile, mode=mode)
1023 1027
1024 1028 @contextlib.contextmanager
1025 1029 def _sidedatareadfp(self):
1026 1030 """file object suitable to read sidedata"""
1027 1031 if self._writinghandles:
1028 1032 yield self._writinghandles[2]
1029 1033 else:
1030 1034 with self.opener(self._sidedatafile) as fp:
1031 1035 yield fp
1032 1036
1033 1037 def tiprev(self):
1034 1038 return len(self.index) - 1
1035 1039
1036 1040 def tip(self):
1037 1041 return self.node(self.tiprev())
1038 1042
1039 1043 def __contains__(self, rev):
1040 1044 return 0 <= rev < len(self)
1041 1045
1042 1046 def __len__(self):
1043 1047 return len(self.index)
1044 1048
1045 1049 def __iter__(self):
1046 1050 return iter(range(len(self)))
1047 1051
1048 1052 def revs(self, start=0, stop=None):
1049 1053 """iterate over all rev in this revlog (from start to stop)"""
1050 1054 return storageutil.iterrevs(len(self), start=start, stop=stop)
1051 1055
1052 1056 def hasnode(self, node):
1053 1057 try:
1054 1058 self.rev(node)
1055 1059 return True
1056 1060 except KeyError:
1057 1061 return False
1058 1062
1059 1063 def _candelta(self, baserev, rev):
1060 1064 """whether two revisions (baserev, rev) can be delta-ed or not"""
1061 1065 # Disable delta if either rev requires a content-changing flag
1062 1066 # processor (ex. LFS). This is because such flag processor can alter
1063 1067 # the rawtext content that the delta will be based on, and two clients
1064 1068 # could have a same revlog node with different flags (i.e. different
1065 1069 # rawtext contents) and the delta could be incompatible.
1066 1070 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1067 1071 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1068 1072 ):
1069 1073 return False
1070 1074 return True
1071 1075
1072 1076 def update_caches(self, transaction):
1073 1077 """update on disk cache
1074 1078
1075 1079 If a transaction is passed, the update may be delayed to transaction
1076 1080 commit."""
1077 1081 if self._nodemap_file is not None:
1078 1082 if transaction is None:
1079 1083 nodemaputil.update_persistent_nodemap(self)
1080 1084 else:
1081 1085 nodemaputil.setup_persistent_nodemap(transaction, self)
1082 1086
1083 1087 def clearcaches(self):
1084 1088 """Clear in-memory caches"""
1085 1089 self._revisioncache = None
1086 1090 self._chainbasecache.clear()
1087 1091 self._segmentfile.clear_cache()
1088 1092 self._segmentfile_sidedata.clear_cache()
1089 1093 self._pcache = {}
1090 1094 self._nodemap_docket = None
1091 1095 self.index.clearcaches()
1092 1096 # The python code is the one responsible for validating the docket, we
1093 1097 # end up having to refresh it here.
1094 1098 use_nodemap = (
1095 1099 not self._inline
1096 1100 and self._nodemap_file is not None
1097 1101 and hasattr(self.index, 'update_nodemap_data')
1098 1102 )
1099 1103 if use_nodemap:
1100 1104 nodemap_data = nodemaputil.persisted_data(self)
1101 1105 if nodemap_data is not None:
1102 1106 self._nodemap_docket = nodemap_data[0]
1103 1107 self.index.update_nodemap_data(*nodemap_data)
1104 1108
1105 1109 def rev(self, node):
1106 1110 """return the revision number associated with a <nodeid>"""
1107 1111 try:
1108 1112 return self.index.rev(node)
1109 1113 except TypeError:
1110 1114 raise
1111 1115 except error.RevlogError:
1112 1116 # parsers.c radix tree lookup failed
1113 1117 if (
1114 1118 node == self.nodeconstants.wdirid
1115 1119 or node in self.nodeconstants.wdirfilenodeids
1116 1120 ):
1117 1121 raise error.WdirUnsupported
1118 1122 raise error.LookupError(node, self.display_id, _(b'no node'))
1119 1123
1120 1124 # Accessors for index entries.
1121 1125
1122 1126 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1123 1127 # are flags.
1124 1128 def start(self, rev):
1125 1129 return int(self.index[rev][0] >> 16)
1126 1130
1127 1131 def sidedata_cut_off(self, rev):
1128 1132 sd_cut_off = self.index[rev][8]
1129 1133 if sd_cut_off != 0:
1130 1134 return sd_cut_off
1131 1135 # This is some annoying dance, because entries without sidedata
1132 1136 # currently use 0 as their ofsset. (instead of previous-offset +
1133 1137 # previous-size)
1134 1138 #
1135 1139 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1136 1140 # In the meantime, we need this.
1137 1141 while 0 <= rev:
1138 1142 e = self.index[rev]
1139 1143 if e[9] != 0:
1140 1144 return e[8] + e[9]
1141 1145 rev -= 1
1142 1146 return 0
1143 1147
1144 1148 def flags(self, rev):
1145 1149 return self.index[rev][0] & 0xFFFF
1146 1150
1147 1151 def length(self, rev):
1148 1152 return self.index[rev][1]
1149 1153
1150 1154 def sidedata_length(self, rev):
1151 1155 if not self.hassidedata:
1152 1156 return 0
1153 1157 return self.index[rev][9]
1154 1158
1155 1159 def rawsize(self, rev):
1156 1160 """return the length of the uncompressed text for a given revision"""
1157 1161 l = self.index[rev][2]
1158 1162 if l >= 0:
1159 1163 return l
1160 1164
1161 1165 t = self.rawdata(rev)
1162 1166 return len(t)
1163 1167
1164 1168 def size(self, rev):
1165 1169 """length of non-raw text (processed by a "read" flag processor)"""
1166 1170 # fast path: if no "read" flag processor could change the content,
1167 1171 # size is rawsize. note: ELLIPSIS is known to not change the content.
1168 1172 flags = self.flags(rev)
1169 1173 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1170 1174 return self.rawsize(rev)
1171 1175
1172 1176 return len(self.revision(rev))
1173 1177
1174 1178 def fast_rank(self, rev):
1175 1179 """Return the rank of a revision if already known, or None otherwise.
1176 1180
1177 1181 The rank of a revision is the size of the sub-graph it defines as a
1178 1182 head. Equivalently, the rank of a revision `r` is the size of the set
1179 1183 `ancestors(r)`, `r` included.
1180 1184
1181 1185 This method returns the rank retrieved from the revlog in constant
1182 1186 time. It makes no attempt at computing unknown values for versions of
1183 1187 the revlog which do not persist the rank.
1184 1188 """
1185 1189 rank = self.index[rev][ENTRY_RANK]
1186 1190 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1187 1191 return None
1188 1192 if rev == nullrev:
1189 1193 return 0 # convention
1190 1194 return rank
1191 1195
1192 1196 def chainbase(self, rev):
1193 1197 base = self._chainbasecache.get(rev)
1194 1198 if base is not None:
1195 1199 return base
1196 1200
1197 1201 index = self.index
1198 1202 iterrev = rev
1199 1203 base = index[iterrev][3]
1200 1204 while base != iterrev:
1201 1205 iterrev = base
1202 1206 base = index[iterrev][3]
1203 1207
1204 1208 self._chainbasecache[rev] = base
1205 1209 return base
1206 1210
1207 1211 def linkrev(self, rev):
1208 1212 return self.index[rev][4]
1209 1213
1210 1214 def parentrevs(self, rev):
1211 1215 try:
1212 1216 entry = self.index[rev]
1213 1217 except IndexError:
1214 1218 if rev == wdirrev:
1215 1219 raise error.WdirUnsupported
1216 1220 raise
1217 1221
1218 1222 if self.canonical_parent_order and entry[5] == nullrev:
1219 1223 return entry[6], entry[5]
1220 1224 else:
1221 1225 return entry[5], entry[6]
1222 1226
1223 1227 # fast parentrevs(rev) where rev isn't filtered
1224 1228 _uncheckedparentrevs = parentrevs
1225 1229
1226 1230 def node(self, rev):
1227 1231 try:
1228 1232 return self.index[rev][7]
1229 1233 except IndexError:
1230 1234 if rev == wdirrev:
1231 1235 raise error.WdirUnsupported
1232 1236 raise
1233 1237
1234 1238 # Derived from index values.
1235 1239
1236 1240 def end(self, rev):
1237 1241 return self.start(rev) + self.length(rev)
1238 1242
1239 1243 def parents(self, node):
1240 1244 i = self.index
1241 1245 d = i[self.rev(node)]
1242 1246 # inline node() to avoid function call overhead
1243 1247 if self.canonical_parent_order and d[5] == self.nullid:
1244 1248 return i[d[6]][7], i[d[5]][7]
1245 1249 else:
1246 1250 return i[d[5]][7], i[d[6]][7]
1247 1251
1248 1252 def chainlen(self, rev):
1249 1253 return self._chaininfo(rev)[0]
1250 1254
1251 1255 def _chaininfo(self, rev):
1252 1256 chaininfocache = self._chaininfocache
1253 1257 if rev in chaininfocache:
1254 1258 return chaininfocache[rev]
1255 1259 index = self.index
1256 1260 generaldelta = self.delta_config.general_delta
1257 1261 iterrev = rev
1258 1262 e = index[iterrev]
1259 1263 clen = 0
1260 1264 compresseddeltalen = 0
1261 1265 while iterrev != e[3]:
1262 1266 clen += 1
1263 1267 compresseddeltalen += e[1]
1264 1268 if generaldelta:
1265 1269 iterrev = e[3]
1266 1270 else:
1267 1271 iterrev -= 1
1268 1272 if iterrev in chaininfocache:
1269 1273 t = chaininfocache[iterrev]
1270 1274 clen += t[0]
1271 1275 compresseddeltalen += t[1]
1272 1276 break
1273 1277 e = index[iterrev]
1274 1278 else:
1275 1279 # Add text length of base since decompressing that also takes
1276 1280 # work. For cache hits the length is already included.
1277 1281 compresseddeltalen += e[1]
1278 1282 r = (clen, compresseddeltalen)
1279 1283 chaininfocache[rev] = r
1280 1284 return r
1281 1285
1282 1286 def _deltachain(self, rev, stoprev=None):
1283 1287 """Obtain the delta chain for a revision.
1284 1288
1285 1289 ``stoprev`` specifies a revision to stop at. If not specified, we
1286 1290 stop at the base of the chain.
1287 1291
1288 1292 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1289 1293 revs in ascending order and ``stopped`` is a bool indicating whether
1290 1294 ``stoprev`` was hit.
1291 1295 """
1292 1296 generaldelta = self.delta_config.general_delta
1293 1297 # Try C implementation.
1294 1298 try:
1295 1299 return self.index.deltachain(rev, stoprev, generaldelta)
1296 1300 except AttributeError:
1297 1301 pass
1298 1302
1299 1303 chain = []
1300 1304
1301 1305 # Alias to prevent attribute lookup in tight loop.
1302 1306 index = self.index
1303 1307
1304 1308 iterrev = rev
1305 1309 e = index[iterrev]
1306 1310 while iterrev != e[3] and iterrev != stoprev:
1307 1311 chain.append(iterrev)
1308 1312 if generaldelta:
1309 1313 iterrev = e[3]
1310 1314 else:
1311 1315 iterrev -= 1
1312 1316 e = index[iterrev]
1313 1317
1314 1318 if iterrev == stoprev:
1315 1319 stopped = True
1316 1320 else:
1317 1321 chain.append(iterrev)
1318 1322 stopped = False
1319 1323
1320 1324 chain.reverse()
1321 1325 return chain, stopped
1322 1326
1323 1327 def ancestors(self, revs, stoprev=0, inclusive=False):
1324 1328 """Generate the ancestors of 'revs' in reverse revision order.
1325 1329 Does not generate revs lower than stoprev.
1326 1330
1327 1331 See the documentation for ancestor.lazyancestors for more details."""
1328 1332
1329 1333 # first, make sure start revisions aren't filtered
1330 1334 revs = list(revs)
1331 1335 checkrev = self.node
1332 1336 for r in revs:
1333 1337 checkrev(r)
1334 1338 # and we're sure ancestors aren't filtered as well
1335 1339
1336 1340 if rustancestor is not None and self.index.rust_ext_compat:
1337 1341 lazyancestors = rustancestor.LazyAncestors
1338 1342 arg = self.index
1339 1343 else:
1340 1344 lazyancestors = ancestor.lazyancestors
1341 1345 arg = self._uncheckedparentrevs
1342 1346 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1343 1347
1344 1348 def descendants(self, revs):
1345 1349 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1346 1350
1347 1351 def findcommonmissing(self, common=None, heads=None):
1348 1352 """Return a tuple of the ancestors of common and the ancestors of heads
1349 1353 that are not ancestors of common. In revset terminology, we return the
1350 1354 tuple:
1351 1355
1352 1356 ::common, (::heads) - (::common)
1353 1357
1354 1358 The list is sorted by revision number, meaning it is
1355 1359 topologically sorted.
1356 1360
1357 1361 'heads' and 'common' are both lists of node IDs. If heads is
1358 1362 not supplied, uses all of the revlog's heads. If common is not
1359 1363 supplied, uses nullid."""
1360 1364 if common is None:
1361 1365 common = [self.nullid]
1362 1366 if heads is None:
1363 1367 heads = self.heads()
1364 1368
1365 1369 common = [self.rev(n) for n in common]
1366 1370 heads = [self.rev(n) for n in heads]
1367 1371
1368 1372 # we want the ancestors, but inclusive
1369 1373 class lazyset:
1370 1374 def __init__(self, lazyvalues):
1371 1375 self.addedvalues = set()
1372 1376 self.lazyvalues = lazyvalues
1373 1377
1374 1378 def __contains__(self, value):
1375 1379 return value in self.addedvalues or value in self.lazyvalues
1376 1380
1377 1381 def __iter__(self):
1378 1382 added = self.addedvalues
1379 1383 for r in added:
1380 1384 yield r
1381 1385 for r in self.lazyvalues:
1382 1386 if not r in added:
1383 1387 yield r
1384 1388
1385 1389 def add(self, value):
1386 1390 self.addedvalues.add(value)
1387 1391
1388 1392 def update(self, values):
1389 1393 self.addedvalues.update(values)
1390 1394
1391 1395 has = lazyset(self.ancestors(common))
1392 1396 has.add(nullrev)
1393 1397 has.update(common)
1394 1398
1395 1399 # take all ancestors from heads that aren't in has
1396 1400 missing = set()
1397 1401 visit = collections.deque(r for r in heads if r not in has)
1398 1402 while visit:
1399 1403 r = visit.popleft()
1400 1404 if r in missing:
1401 1405 continue
1402 1406 else:
1403 1407 missing.add(r)
1404 1408 for p in self.parentrevs(r):
1405 1409 if p not in has:
1406 1410 visit.append(p)
1407 1411 missing = list(missing)
1408 1412 missing.sort()
1409 1413 return has, [self.node(miss) for miss in missing]
1410 1414
1411 1415 def incrementalmissingrevs(self, common=None):
1412 1416 """Return an object that can be used to incrementally compute the
1413 1417 revision numbers of the ancestors of arbitrary sets that are not
1414 1418 ancestors of common. This is an ancestor.incrementalmissingancestors
1415 1419 object.
1416 1420
1417 1421 'common' is a list of revision numbers. If common is not supplied, uses
1418 1422 nullrev.
1419 1423 """
1420 1424 if common is None:
1421 1425 common = [nullrev]
1422 1426
1423 1427 if rustancestor is not None and self.index.rust_ext_compat:
1424 1428 return rustancestor.MissingAncestors(self.index, common)
1425 1429 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1426 1430
1427 1431 def findmissingrevs(self, common=None, heads=None):
1428 1432 """Return the revision numbers of the ancestors of heads that
1429 1433 are not ancestors of common.
1430 1434
1431 1435 More specifically, return a list of revision numbers corresponding to
1432 1436 nodes N such that every N satisfies the following constraints:
1433 1437
1434 1438 1. N is an ancestor of some node in 'heads'
1435 1439 2. N is not an ancestor of any node in 'common'
1436 1440
1437 1441 The list is sorted by revision number, meaning it is
1438 1442 topologically sorted.
1439 1443
1440 1444 'heads' and 'common' are both lists of revision numbers. If heads is
1441 1445 not supplied, uses all of the revlog's heads. If common is not
1442 1446 supplied, uses nullid."""
1443 1447 if common is None:
1444 1448 common = [nullrev]
1445 1449 if heads is None:
1446 1450 heads = self.headrevs()
1447 1451
1448 1452 inc = self.incrementalmissingrevs(common=common)
1449 1453 return inc.missingancestors(heads)
1450 1454
1451 1455 def findmissing(self, common=None, heads=None):
1452 1456 """Return the ancestors of heads that are not ancestors of common.
1453 1457
1454 1458 More specifically, return a list of nodes N such that every N
1455 1459 satisfies the following constraints:
1456 1460
1457 1461 1. N is an ancestor of some node in 'heads'
1458 1462 2. N is not an ancestor of any node in 'common'
1459 1463
1460 1464 The list is sorted by revision number, meaning it is
1461 1465 topologically sorted.
1462 1466
1463 1467 'heads' and 'common' are both lists of node IDs. If heads is
1464 1468 not supplied, uses all of the revlog's heads. If common is not
1465 1469 supplied, uses nullid."""
1466 1470 if common is None:
1467 1471 common = [self.nullid]
1468 1472 if heads is None:
1469 1473 heads = self.heads()
1470 1474
1471 1475 common = [self.rev(n) for n in common]
1472 1476 heads = [self.rev(n) for n in heads]
1473 1477
1474 1478 inc = self.incrementalmissingrevs(common=common)
1475 1479 return [self.node(r) for r in inc.missingancestors(heads)]
1476 1480
1477 1481 def nodesbetween(self, roots=None, heads=None):
1478 1482 """Return a topological path from 'roots' to 'heads'.
1479 1483
1480 1484 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1481 1485 topologically sorted list of all nodes N that satisfy both of
1482 1486 these constraints:
1483 1487
1484 1488 1. N is a descendant of some node in 'roots'
1485 1489 2. N is an ancestor of some node in 'heads'
1486 1490
1487 1491 Every node is considered to be both a descendant and an ancestor
1488 1492 of itself, so every reachable node in 'roots' and 'heads' will be
1489 1493 included in 'nodes'.
1490 1494
1491 1495 'outroots' is the list of reachable nodes in 'roots', i.e., the
1492 1496 subset of 'roots' that is returned in 'nodes'. Likewise,
1493 1497 'outheads' is the subset of 'heads' that is also in 'nodes'.
1494 1498
1495 1499 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1496 1500 unspecified, uses nullid as the only root. If 'heads' is
1497 1501 unspecified, uses list of all of the revlog's heads."""
1498 1502 nonodes = ([], [], [])
1499 1503 if roots is not None:
1500 1504 roots = list(roots)
1501 1505 if not roots:
1502 1506 return nonodes
1503 1507 lowestrev = min([self.rev(n) for n in roots])
1504 1508 else:
1505 1509 roots = [self.nullid] # Everybody's a descendant of nullid
1506 1510 lowestrev = nullrev
1507 1511 if (lowestrev == nullrev) and (heads is None):
1508 1512 # We want _all_ the nodes!
1509 1513 return (
1510 1514 [self.node(r) for r in self],
1511 1515 [self.nullid],
1512 1516 list(self.heads()),
1513 1517 )
1514 1518 if heads is None:
1515 1519 # All nodes are ancestors, so the latest ancestor is the last
1516 1520 # node.
1517 1521 highestrev = len(self) - 1
1518 1522 # Set ancestors to None to signal that every node is an ancestor.
1519 1523 ancestors = None
1520 1524 # Set heads to an empty dictionary for later discovery of heads
1521 1525 heads = {}
1522 1526 else:
1523 1527 heads = list(heads)
1524 1528 if not heads:
1525 1529 return nonodes
1526 1530 ancestors = set()
1527 1531 # Turn heads into a dictionary so we can remove 'fake' heads.
1528 1532 # Also, later we will be using it to filter out the heads we can't
1529 1533 # find from roots.
1530 1534 heads = dict.fromkeys(heads, False)
1531 1535 # Start at the top and keep marking parents until we're done.
1532 1536 nodestotag = set(heads)
1533 1537 # Remember where the top was so we can use it as a limit later.
1534 1538 highestrev = max([self.rev(n) for n in nodestotag])
1535 1539 while nodestotag:
1536 1540 # grab a node to tag
1537 1541 n = nodestotag.pop()
1538 1542 # Never tag nullid
1539 1543 if n == self.nullid:
1540 1544 continue
1541 1545 # A node's revision number represents its place in a
1542 1546 # topologically sorted list of nodes.
1543 1547 r = self.rev(n)
1544 1548 if r >= lowestrev:
1545 1549 if n not in ancestors:
1546 1550 # If we are possibly a descendant of one of the roots
1547 1551 # and we haven't already been marked as an ancestor
1548 1552 ancestors.add(n) # Mark as ancestor
1549 1553 # Add non-nullid parents to list of nodes to tag.
1550 1554 nodestotag.update(
1551 1555 [p for p in self.parents(n) if p != self.nullid]
1552 1556 )
1553 1557 elif n in heads: # We've seen it before, is it a fake head?
1554 1558 # So it is, real heads should not be the ancestors of
1555 1559 # any other heads.
1556 1560 heads.pop(n)
1557 1561 if not ancestors:
1558 1562 return nonodes
1559 1563 # Now that we have our set of ancestors, we want to remove any
1560 1564 # roots that are not ancestors.
1561 1565
1562 1566 # If one of the roots was nullid, everything is included anyway.
1563 1567 if lowestrev > nullrev:
1564 1568 # But, since we weren't, let's recompute the lowest rev to not
1565 1569 # include roots that aren't ancestors.
1566 1570
1567 1571 # Filter out roots that aren't ancestors of heads
1568 1572 roots = [root for root in roots if root in ancestors]
1569 1573 # Recompute the lowest revision
1570 1574 if roots:
1571 1575 lowestrev = min([self.rev(root) for root in roots])
1572 1576 else:
1573 1577 # No more roots? Return empty list
1574 1578 return nonodes
1575 1579 else:
1576 1580 # We are descending from nullid, and don't need to care about
1577 1581 # any other roots.
1578 1582 lowestrev = nullrev
1579 1583 roots = [self.nullid]
1580 1584 # Transform our roots list into a set.
1581 1585 descendants = set(roots)
1582 1586 # Also, keep the original roots so we can filter out roots that aren't
1583 1587 # 'real' roots (i.e. are descended from other roots).
1584 1588 roots = descendants.copy()
1585 1589 # Our topologically sorted list of output nodes.
1586 1590 orderedout = []
1587 1591 # Don't start at nullid since we don't want nullid in our output list,
1588 1592 # and if nullid shows up in descendants, empty parents will look like
1589 1593 # they're descendants.
1590 1594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1591 1595 n = self.node(r)
1592 1596 isdescendant = False
1593 1597 if lowestrev == nullrev: # Everybody is a descendant of nullid
1594 1598 isdescendant = True
1595 1599 elif n in descendants:
1596 1600 # n is already a descendant
1597 1601 isdescendant = True
1598 1602 # This check only needs to be done here because all the roots
1599 1603 # will start being marked is descendants before the loop.
1600 1604 if n in roots:
1601 1605 # If n was a root, check if it's a 'real' root.
1602 1606 p = tuple(self.parents(n))
1603 1607 # If any of its parents are descendants, it's not a root.
1604 1608 if (p[0] in descendants) or (p[1] in descendants):
1605 1609 roots.remove(n)
1606 1610 else:
1607 1611 p = tuple(self.parents(n))
1608 1612 # A node is a descendant if either of its parents are
1609 1613 # descendants. (We seeded the dependents list with the roots
1610 1614 # up there, remember?)
1611 1615 if (p[0] in descendants) or (p[1] in descendants):
1612 1616 descendants.add(n)
1613 1617 isdescendant = True
1614 1618 if isdescendant and ((ancestors is None) or (n in ancestors)):
1615 1619 # Only include nodes that are both descendants and ancestors.
1616 1620 orderedout.append(n)
1617 1621 if (ancestors is not None) and (n in heads):
1618 1622 # We're trying to figure out which heads are reachable
1619 1623 # from roots.
1620 1624 # Mark this head as having been reached
1621 1625 heads[n] = True
1622 1626 elif ancestors is None:
1623 1627 # Otherwise, we're trying to discover the heads.
1624 1628 # Assume this is a head because if it isn't, the next step
1625 1629 # will eventually remove it.
1626 1630 heads[n] = True
1627 1631 # But, obviously its parents aren't.
1628 1632 for p in self.parents(n):
1629 1633 heads.pop(p, None)
1630 1634 heads = [head for head, flag in heads.items() if flag]
1631 1635 roots = list(roots)
1632 1636 assert orderedout
1633 1637 assert roots
1634 1638 assert heads
1635 1639 return (orderedout, roots, heads)
1636 1640
1637 1641 def headrevs(self, revs=None):
1638 1642 if revs is None:
1639 1643 try:
1640 1644 return self.index.headrevs()
1641 1645 except AttributeError:
1642 1646 return self._headrevs()
1643 1647 if rustdagop is not None and self.index.rust_ext_compat:
1644 1648 return rustdagop.headrevs(self.index, revs)
1645 1649 return dagop.headrevs(revs, self._uncheckedparentrevs)
1646 1650
1647 1651 def computephases(self, roots):
1648 1652 return self.index.computephasesmapsets(roots)
1649 1653
1650 1654 def _headrevs(self):
1651 1655 count = len(self)
1652 1656 if not count:
1653 1657 return [nullrev]
1654 1658 # we won't iter over filtered rev so nobody is a head at start
1655 1659 ishead = [0] * (count + 1)
1656 1660 index = self.index
1657 1661 for r in self:
1658 1662 ishead[r] = 1 # I may be an head
1659 1663 e = index[r]
1660 1664 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1661 1665 return [r for r, val in enumerate(ishead) if val]
1662 1666
1663 1667 def heads(self, start=None, stop=None):
1664 1668 """return the list of all nodes that have no children
1665 1669
1666 1670 if start is specified, only heads that are descendants of
1667 1671 start will be returned
1668 1672 if stop is specified, it will consider all the revs from stop
1669 1673 as if they had no children
1670 1674 """
1671 1675 if start is None and stop is None:
1672 1676 if not len(self):
1673 1677 return [self.nullid]
1674 1678 return [self.node(r) for r in self.headrevs()]
1675 1679
1676 1680 if start is None:
1677 1681 start = nullrev
1678 1682 else:
1679 1683 start = self.rev(start)
1680 1684
1681 1685 stoprevs = {self.rev(n) for n in stop or []}
1682 1686
1683 1687 revs = dagop.headrevssubset(
1684 1688 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1685 1689 )
1686 1690
1687 1691 return [self.node(rev) for rev in revs]
1688 1692
1689 1693 def children(self, node):
1690 1694 """find the children of a given node"""
1691 1695 c = []
1692 1696 p = self.rev(node)
1693 1697 for r in self.revs(start=p + 1):
1694 1698 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1695 1699 if prevs:
1696 1700 for pr in prevs:
1697 1701 if pr == p:
1698 1702 c.append(self.node(r))
1699 1703 elif p == nullrev:
1700 1704 c.append(self.node(r))
1701 1705 return c
1702 1706
1703 1707 def commonancestorsheads(self, a, b):
1704 1708 """calculate all the heads of the common ancestors of nodes a and b"""
1705 1709 a, b = self.rev(a), self.rev(b)
1706 1710 ancs = self._commonancestorsheads(a, b)
1707 1711 return pycompat.maplist(self.node, ancs)
1708 1712
1709 1713 def _commonancestorsheads(self, *revs):
1710 1714 """calculate all the heads of the common ancestors of revs"""
1711 1715 try:
1712 1716 ancs = self.index.commonancestorsheads(*revs)
1713 1717 except (AttributeError, OverflowError): # C implementation failed
1714 1718 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1715 1719 return ancs
1716 1720
1717 1721 def isancestor(self, a, b):
1718 1722 """return True if node a is an ancestor of node b
1719 1723
1720 1724 A revision is considered an ancestor of itself."""
1721 1725 a, b = self.rev(a), self.rev(b)
1722 1726 return self.isancestorrev(a, b)
1723 1727
1724 1728 def isancestorrev(self, a, b):
1725 1729 """return True if revision a is an ancestor of revision b
1726 1730
1727 1731 A revision is considered an ancestor of itself.
1728 1732
1729 1733 The implementation of this is trivial but the use of
1730 1734 reachableroots is not."""
1731 1735 if a == nullrev:
1732 1736 return True
1733 1737 elif a == b:
1734 1738 return True
1735 1739 elif a > b:
1736 1740 return False
1737 1741 return bool(self.reachableroots(a, [b], [a], includepath=False))
1738 1742
1739 1743 def reachableroots(self, minroot, heads, roots, includepath=False):
1740 1744 """return (heads(::(<roots> and <roots>::<heads>)))
1741 1745
1742 1746 If includepath is True, return (<roots>::<heads>)."""
1743 1747 try:
1744 1748 return self.index.reachableroots2(
1745 1749 minroot, heads, roots, includepath
1746 1750 )
1747 1751 except AttributeError:
1748 1752 return dagop._reachablerootspure(
1749 1753 self.parentrevs, minroot, roots, heads, includepath
1750 1754 )
1751 1755
1752 1756 def ancestor(self, a, b):
1753 1757 """calculate the "best" common ancestor of nodes a and b"""
1754 1758
1755 1759 a, b = self.rev(a), self.rev(b)
1756 1760 try:
1757 1761 ancs = self.index.ancestors(a, b)
1758 1762 except (AttributeError, OverflowError):
1759 1763 ancs = ancestor.ancestors(self.parentrevs, a, b)
1760 1764 if ancs:
1761 1765 # choose a consistent winner when there's a tie
1762 1766 return min(map(self.node, ancs))
1763 1767 return self.nullid
1764 1768
1765 1769 def _match(self, id):
1766 1770 if isinstance(id, int):
1767 1771 # rev
1768 1772 return self.node(id)
1769 1773 if len(id) == self.nodeconstants.nodelen:
1770 1774 # possibly a binary node
1771 1775 # odds of a binary node being all hex in ASCII are 1 in 10**25
1772 1776 try:
1773 1777 node = id
1774 1778 self.rev(node) # quick search the index
1775 1779 return node
1776 1780 except error.LookupError:
1777 1781 pass # may be partial hex id
1778 1782 try:
1779 1783 # str(rev)
1780 1784 rev = int(id)
1781 1785 if b"%d" % rev != id:
1782 1786 raise ValueError
1783 1787 if rev < 0:
1784 1788 rev = len(self) + rev
1785 1789 if rev < 0 or rev >= len(self):
1786 1790 raise ValueError
1787 1791 return self.node(rev)
1788 1792 except (ValueError, OverflowError):
1789 1793 pass
1790 1794 if len(id) == 2 * self.nodeconstants.nodelen:
1791 1795 try:
1792 1796 # a full hex nodeid?
1793 1797 node = bin(id)
1794 1798 self.rev(node)
1795 1799 return node
1796 1800 except (binascii.Error, error.LookupError):
1797 1801 pass
1798 1802
1799 1803 def _partialmatch(self, id):
1800 1804 # we don't care wdirfilenodeids as they should be always full hash
1801 1805 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1802 1806 ambiguous = False
1803 1807 try:
1804 1808 partial = self.index.partialmatch(id)
1805 1809 if partial and self.hasnode(partial):
1806 1810 if maybewdir:
1807 1811 # single 'ff...' match in radix tree, ambiguous with wdir
1808 1812 ambiguous = True
1809 1813 else:
1810 1814 return partial
1811 1815 elif maybewdir:
1812 1816 # no 'ff...' match in radix tree, wdir identified
1813 1817 raise error.WdirUnsupported
1814 1818 else:
1815 1819 return None
1816 1820 except error.RevlogError:
1817 1821 # parsers.c radix tree lookup gave multiple matches
1818 1822 # fast path: for unfiltered changelog, radix tree is accurate
1819 1823 if not getattr(self, 'filteredrevs', None):
1820 1824 ambiguous = True
1821 1825 # fall through to slow path that filters hidden revisions
1822 1826 except (AttributeError, ValueError):
1823 1827 # we are pure python, or key is not hex
1824 1828 pass
1825 1829 if ambiguous:
1826 1830 raise error.AmbiguousPrefixLookupError(
1827 1831 id, self.display_id, _(b'ambiguous identifier')
1828 1832 )
1829 1833
1830 1834 if id in self._pcache:
1831 1835 return self._pcache[id]
1832 1836
1833 1837 if len(id) <= 40:
1834 1838 # hex(node)[:...]
1835 1839 l = len(id) // 2 * 2 # grab an even number of digits
1836 1840 try:
1837 1841 # we're dropping the last digit, so let's check that it's hex,
1838 1842 # to avoid the expensive computation below if it's not
1839 1843 if len(id) % 2 > 0:
1840 1844 if not (id[-1] in hexdigits):
1841 1845 return None
1842 1846 prefix = bin(id[:l])
1843 1847 except binascii.Error:
1844 1848 pass
1845 1849 else:
1846 1850 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1847 1851 nl = [
1848 1852 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1849 1853 ]
1850 1854 if self.nodeconstants.nullhex.startswith(id):
1851 1855 nl.append(self.nullid)
1852 1856 if len(nl) > 0:
1853 1857 if len(nl) == 1 and not maybewdir:
1854 1858 self._pcache[id] = nl[0]
1855 1859 return nl[0]
1856 1860 raise error.AmbiguousPrefixLookupError(
1857 1861 id, self.display_id, _(b'ambiguous identifier')
1858 1862 )
1859 1863 if maybewdir:
1860 1864 raise error.WdirUnsupported
1861 1865 return None
1862 1866
1863 1867 def lookup(self, id):
1864 1868 """locate a node based on:
1865 1869 - revision number or str(revision number)
1866 1870 - nodeid or subset of hex nodeid
1867 1871 """
1868 1872 n = self._match(id)
1869 1873 if n is not None:
1870 1874 return n
1871 1875 n = self._partialmatch(id)
1872 1876 if n:
1873 1877 return n
1874 1878
1875 1879 raise error.LookupError(id, self.display_id, _(b'no match found'))
1876 1880
1877 1881 def shortest(self, node, minlength=1):
1878 1882 """Find the shortest unambiguous prefix that matches node."""
1879 1883
1880 1884 def isvalid(prefix):
1881 1885 try:
1882 1886 matchednode = self._partialmatch(prefix)
1883 1887 except error.AmbiguousPrefixLookupError:
1884 1888 return False
1885 1889 except error.WdirUnsupported:
1886 1890 # single 'ff...' match
1887 1891 return True
1888 1892 if matchednode is None:
1889 1893 raise error.LookupError(node, self.display_id, _(b'no node'))
1890 1894 return True
1891 1895
1892 1896 def maybewdir(prefix):
1893 1897 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1894 1898
1895 1899 hexnode = hex(node)
1896 1900
1897 1901 def disambiguate(hexnode, minlength):
1898 1902 """Disambiguate against wdirid."""
1899 1903 for length in range(minlength, len(hexnode) + 1):
1900 1904 prefix = hexnode[:length]
1901 1905 if not maybewdir(prefix):
1902 1906 return prefix
1903 1907
1904 1908 if not getattr(self, 'filteredrevs', None):
1905 1909 try:
1906 1910 length = max(self.index.shortest(node), minlength)
1907 1911 return disambiguate(hexnode, length)
1908 1912 except error.RevlogError:
1909 1913 if node != self.nodeconstants.wdirid:
1910 1914 raise error.LookupError(
1911 1915 node, self.display_id, _(b'no node')
1912 1916 )
1913 1917 except AttributeError:
1914 1918 # Fall through to pure code
1915 1919 pass
1916 1920
1917 1921 if node == self.nodeconstants.wdirid:
1918 1922 for length in range(minlength, len(hexnode) + 1):
1919 1923 prefix = hexnode[:length]
1920 1924 if isvalid(prefix):
1921 1925 return prefix
1922 1926
1923 1927 for length in range(minlength, len(hexnode) + 1):
1924 1928 prefix = hexnode[:length]
1925 1929 if isvalid(prefix):
1926 1930 return disambiguate(hexnode, length)
1927 1931
1928 1932 def cmp(self, node, text):
1929 1933 """compare text with a given file revision
1930 1934
1931 1935 returns True if text is different than what is stored.
1932 1936 """
1933 1937 p1, p2 = self.parents(node)
1934 1938 return storageutil.hashrevisionsha1(text, p1, p2) != node
1935 1939
1936 1940 def _getsegmentforrevs(self, startrev, endrev):
1937 1941 """Obtain a segment of raw data corresponding to a range of revisions.
1938 1942
1939 1943 Accepts the start and end revisions and an optional already-open
1940 1944 file handle to be used for reading. If the file handle is read, its
1941 1945 seek position will not be preserved.
1942 1946
1943 1947 Requests for data may be satisfied by a cache.
1944 1948
1945 1949 Returns a 2-tuple of (offset, data) for the requested range of
1946 1950 revisions. Offset is the integer offset from the beginning of the
1947 1951 revlog and data is a str or buffer of the raw byte data.
1948 1952
1949 1953 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1950 1954 to determine where each revision's data begins and ends.
1951 1955 """
1952 1956 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1953 1957 # (functions are expensive).
1954 1958 index = self.index
1955 1959 istart = index[startrev]
1956 1960 start = int(istart[0] >> 16)
1957 1961 if startrev == endrev:
1958 1962 end = start + istart[1]
1959 1963 else:
1960 1964 iend = index[endrev]
1961 1965 end = int(iend[0] >> 16) + iend[1]
1962 1966
1963 1967 if self._inline:
1964 1968 start += (startrev + 1) * self.index.entry_size
1965 1969 end += (endrev + 1) * self.index.entry_size
1966 1970 length = end - start
1967 1971
1968 1972 return start, self._segmentfile.read_chunk(start, length)
1969 1973
1970 1974 def _chunk(self, rev):
1971 1975 """Obtain a single decompressed chunk for a revision.
1972 1976
1973 1977 Accepts an integer revision and an optional already-open file handle
1974 1978 to be used for reading. If used, the seek position of the file will not
1975 1979 be preserved.
1976 1980
1977 1981 Returns a str holding uncompressed data for the requested revision.
1978 1982 """
1979 1983 compression_mode = self.index[rev][10]
1980 1984 data = self._getsegmentforrevs(rev, rev)[1]
1981 1985 if compression_mode == COMP_MODE_PLAIN:
1982 1986 return data
1983 1987 elif compression_mode == COMP_MODE_DEFAULT:
1984 1988 return self._decompressor(data)
1985 1989 elif compression_mode == COMP_MODE_INLINE:
1986 1990 return self.decompress(data)
1987 1991 else:
1988 1992 msg = b'unknown compression mode %d'
1989 1993 msg %= compression_mode
1990 1994 raise error.RevlogError(msg)
1991 1995
1992 1996 def _chunks(self, revs, targetsize=None):
1993 1997 """Obtain decompressed chunks for the specified revisions.
1994 1998
1995 1999 Accepts an iterable of numeric revisions that are assumed to be in
1996 2000 ascending order. Also accepts an optional already-open file handle
1997 2001 to be used for reading. If used, the seek position of the file will
1998 2002 not be preserved.
1999 2003
2000 2004 This function is similar to calling ``self._chunk()`` multiple times,
2001 2005 but is faster.
2002 2006
2003 2007 Returns a list with decompressed data for each requested revision.
2004 2008 """
2005 2009 if not revs:
2006 2010 return []
2007 2011 start = self.start
2008 2012 length = self.length
2009 2013 inline = self._inline
2010 2014 iosize = self.index.entry_size
2011 2015 buffer = util.buffer
2012 2016
2013 2017 l = []
2014 2018 ladd = l.append
2015 2019
2016 2020 if not self._withsparseread:
2017 2021 slicedchunks = (revs,)
2018 2022 else:
2019 2023 slicedchunks = deltautil.slicechunk(
2020 2024 self, revs, targetsize=targetsize
2021 2025 )
2022 2026
2023 2027 for revschunk in slicedchunks:
2024 2028 firstrev = revschunk[0]
2025 2029 # Skip trailing revisions with empty diff
2026 2030 for lastrev in revschunk[::-1]:
2027 2031 if length(lastrev) != 0:
2028 2032 break
2029 2033
2030 2034 try:
2031 2035 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2032 2036 except OverflowError:
2033 2037 # issue4215 - we can't cache a run of chunks greater than
2034 2038 # 2G on Windows
2035 2039 return [self._chunk(rev) for rev in revschunk]
2036 2040
2037 2041 decomp = self.decompress
2038 2042 # self._decompressor might be None, but will not be used in that case
2039 2043 def_decomp = self._decompressor
2040 2044 for rev in revschunk:
2041 2045 chunkstart = start(rev)
2042 2046 if inline:
2043 2047 chunkstart += (rev + 1) * iosize
2044 2048 chunklength = length(rev)
2045 2049 comp_mode = self.index[rev][10]
2046 2050 c = buffer(data, chunkstart - offset, chunklength)
2047 2051 if comp_mode == COMP_MODE_PLAIN:
2048 2052 ladd(c)
2049 2053 elif comp_mode == COMP_MODE_INLINE:
2050 2054 ladd(decomp(c))
2051 2055 elif comp_mode == COMP_MODE_DEFAULT:
2052 2056 ladd(def_decomp(c))
2053 2057 else:
2054 2058 msg = b'unknown compression mode %d'
2055 2059 msg %= comp_mode
2056 2060 raise error.RevlogError(msg)
2057 2061
2058 2062 return l
2059 2063
2060 2064 def deltaparent(self, rev):
2061 2065 """return deltaparent of the given revision"""
2062 2066 base = self.index[rev][3]
2063 2067 if base == rev:
2064 2068 return nullrev
2065 2069 elif self.delta_config.general_delta:
2066 2070 return base
2067 2071 else:
2068 2072 return rev - 1
2069 2073
2070 2074 def issnapshot(self, rev):
2071 2075 """tells whether rev is a snapshot"""
2072 2076 if not self._sparserevlog:
2073 2077 return self.deltaparent(rev) == nullrev
2074 2078 elif hasattr(self.index, 'issnapshot'):
2075 2079 # directly assign the method to cache the testing and access
2076 2080 self.issnapshot = self.index.issnapshot
2077 2081 return self.issnapshot(rev)
2078 2082 if rev == nullrev:
2079 2083 return True
2080 2084 entry = self.index[rev]
2081 2085 base = entry[3]
2082 2086 if base == rev:
2083 2087 return True
2084 2088 if base == nullrev:
2085 2089 return True
2086 2090 p1 = entry[5]
2087 2091 while self.length(p1) == 0:
2088 2092 b = self.deltaparent(p1)
2089 2093 if b == p1:
2090 2094 break
2091 2095 p1 = b
2092 2096 p2 = entry[6]
2093 2097 while self.length(p2) == 0:
2094 2098 b = self.deltaparent(p2)
2095 2099 if b == p2:
2096 2100 break
2097 2101 p2 = b
2098 2102 if base == p1 or base == p2:
2099 2103 return False
2100 2104 return self.issnapshot(base)
2101 2105
2102 2106 def snapshotdepth(self, rev):
2103 2107 """number of snapshot in the chain before this one"""
2104 2108 if not self.issnapshot(rev):
2105 2109 raise error.ProgrammingError(b'revision %d not a snapshot')
2106 2110 return len(self._deltachain(rev)[0]) - 1
2107 2111
2108 2112 def revdiff(self, rev1, rev2):
2109 2113 """return or calculate a delta between two revisions
2110 2114
2111 2115 The delta calculated is in binary form and is intended to be written to
2112 2116 revlog data directly. So this function needs raw revision data.
2113 2117 """
2114 2118 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2115 2119 return bytes(self._chunk(rev2))
2116 2120
2117 2121 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2118 2122
2119 2123 def revision(self, nodeorrev):
2120 2124 """return an uncompressed revision of a given node or revision
2121 2125 number.
2122 2126 """
2123 2127 return self._revisiondata(nodeorrev)
2124 2128
2125 2129 def sidedata(self, nodeorrev):
2126 2130 """a map of extra data related to the changeset but not part of the hash
2127 2131
2128 2132 This function currently return a dictionary. However, more advanced
2129 2133 mapping object will likely be used in the future for a more
2130 2134 efficient/lazy code.
2131 2135 """
2132 2136 # deal with <nodeorrev> argument type
2133 2137 if isinstance(nodeorrev, int):
2134 2138 rev = nodeorrev
2135 2139 else:
2136 2140 rev = self.rev(nodeorrev)
2137 2141 return self._sidedata(rev)
2138 2142
2139 2143 def _revisiondata(self, nodeorrev, raw=False):
2140 2144 # deal with <nodeorrev> argument type
2141 2145 if isinstance(nodeorrev, int):
2142 2146 rev = nodeorrev
2143 2147 node = self.node(rev)
2144 2148 else:
2145 2149 node = nodeorrev
2146 2150 rev = None
2147 2151
2148 2152 # fast path the special `nullid` rev
2149 2153 if node == self.nullid:
2150 2154 return b""
2151 2155
2152 2156 # ``rawtext`` is the text as stored inside the revlog. Might be the
2153 2157 # revision or might need to be processed to retrieve the revision.
2154 2158 rev, rawtext, validated = self._rawtext(node, rev)
2155 2159
2156 2160 if raw and validated:
2157 2161 # if we don't want to process the raw text and that raw
2158 2162 # text is cached, we can exit early.
2159 2163 return rawtext
2160 2164 if rev is None:
2161 2165 rev = self.rev(node)
2162 2166 # the revlog's flag for this revision
2163 2167 # (usually alter its state or content)
2164 2168 flags = self.flags(rev)
2165 2169
2166 2170 if validated and flags == REVIDX_DEFAULT_FLAGS:
2167 2171 # no extra flags set, no flag processor runs, text = rawtext
2168 2172 return rawtext
2169 2173
2170 2174 if raw:
2171 2175 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2172 2176 text = rawtext
2173 2177 else:
2174 2178 r = flagutil.processflagsread(self, rawtext, flags)
2175 2179 text, validatehash = r
2176 2180 if validatehash:
2177 2181 self.checkhash(text, node, rev=rev)
2178 2182 if not validated:
2179 2183 self._revisioncache = (node, rev, rawtext)
2180 2184
2181 2185 return text
2182 2186
2183 2187 def _rawtext(self, node, rev):
2184 2188 """return the possibly unvalidated rawtext for a revision
2185 2189
2186 2190 returns (rev, rawtext, validated)
2187 2191 """
2188 2192
2189 2193 # revision in the cache (could be useful to apply delta)
2190 2194 cachedrev = None
2191 2195 # An intermediate text to apply deltas to
2192 2196 basetext = None
2193 2197
2194 2198 # Check if we have the entry in cache
2195 2199 # The cache entry looks like (node, rev, rawtext)
2196 2200 if self._revisioncache:
2197 2201 if self._revisioncache[0] == node:
2198 2202 return (rev, self._revisioncache[2], True)
2199 2203 cachedrev = self._revisioncache[1]
2200 2204
2201 2205 if rev is None:
2202 2206 rev = self.rev(node)
2203 2207
2204 2208 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2205 2209 if stopped:
2206 2210 basetext = self._revisioncache[2]
2207 2211
2208 2212 # drop cache to save memory, the caller is expected to
2209 2213 # update self._revisioncache after validating the text
2210 2214 self._revisioncache = None
2211 2215
2212 2216 targetsize = None
2213 2217 rawsize = self.index[rev][2]
2214 2218 if 0 <= rawsize:
2215 2219 targetsize = 4 * rawsize
2216 2220
2217 2221 bins = self._chunks(chain, targetsize=targetsize)
2218 2222 if basetext is None:
2219 2223 basetext = bytes(bins[0])
2220 2224 bins = bins[1:]
2221 2225
2222 2226 rawtext = mdiff.patches(basetext, bins)
2223 2227 del basetext # let us have a chance to free memory early
2224 2228 return (rev, rawtext, False)
2225 2229
2226 2230 def _sidedata(self, rev):
2227 2231 """Return the sidedata for a given revision number."""
2228 2232 index_entry = self.index[rev]
2229 2233 sidedata_offset = index_entry[8]
2230 2234 sidedata_size = index_entry[9]
2231 2235
2232 2236 if self._inline:
2233 2237 sidedata_offset += self.index.entry_size * (1 + rev)
2234 2238 if sidedata_size == 0:
2235 2239 return {}
2236 2240
2237 2241 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2238 2242 filename = self._sidedatafile
2239 2243 end = self._docket.sidedata_end
2240 2244 offset = sidedata_offset
2241 2245 length = sidedata_size
2242 2246 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2243 2247 raise error.RevlogError(m)
2244 2248
2245 2249 comp_segment = self._segmentfile_sidedata.read_chunk(
2246 2250 sidedata_offset, sidedata_size
2247 2251 )
2248 2252
2249 2253 comp = self.index[rev][11]
2250 2254 if comp == COMP_MODE_PLAIN:
2251 2255 segment = comp_segment
2252 2256 elif comp == COMP_MODE_DEFAULT:
2253 2257 segment = self._decompressor(comp_segment)
2254 2258 elif comp == COMP_MODE_INLINE:
2255 2259 segment = self.decompress(comp_segment)
2256 2260 else:
2257 2261 msg = b'unknown compression mode %d'
2258 2262 msg %= comp
2259 2263 raise error.RevlogError(msg)
2260 2264
2261 2265 sidedata = sidedatautil.deserialize_sidedata(segment)
2262 2266 return sidedata
2263 2267
2264 2268 def rawdata(self, nodeorrev):
2265 2269 """return an uncompressed raw data of a given node or revision number."""
2266 2270 return self._revisiondata(nodeorrev, raw=True)
2267 2271
2268 2272 def hash(self, text, p1, p2):
2269 2273 """Compute a node hash.
2270 2274
2271 2275 Available as a function so that subclasses can replace the hash
2272 2276 as needed.
2273 2277 """
2274 2278 return storageutil.hashrevisionsha1(text, p1, p2)
2275 2279
2276 2280 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2277 2281 """Check node hash integrity.
2278 2282
2279 2283 Available as a function so that subclasses can extend hash mismatch
2280 2284 behaviors as needed.
2281 2285 """
2282 2286 try:
2283 2287 if p1 is None and p2 is None:
2284 2288 p1, p2 = self.parents(node)
2285 2289 if node != self.hash(text, p1, p2):
2286 2290 # Clear the revision cache on hash failure. The revision cache
2287 2291 # only stores the raw revision and clearing the cache does have
2288 2292 # the side-effect that we won't have a cache hit when the raw
2289 2293 # revision data is accessed. But this case should be rare and
2290 2294 # it is extra work to teach the cache about the hash
2291 2295 # verification state.
2292 2296 if self._revisioncache and self._revisioncache[0] == node:
2293 2297 self._revisioncache = None
2294 2298
2295 2299 revornode = rev
2296 2300 if revornode is None:
2297 2301 revornode = templatefilters.short(hex(node))
2298 2302 raise error.RevlogError(
2299 2303 _(b"integrity check failed on %s:%s")
2300 2304 % (self.display_id, pycompat.bytestr(revornode))
2301 2305 )
2302 2306 except error.RevlogError:
2303 2307 if self._censorable and storageutil.iscensoredtext(text):
2304 2308 raise error.CensoredNodeError(self.display_id, node, text)
2305 2309 raise
2306 2310
2307 2311 @property
2308 2312 def _split_index_file(self):
2309 2313 """the path where to expect the index of an ongoing splitting operation
2310 2314
2311 2315 The file will only exist if a splitting operation is in progress, but
2312 2316 it is always expected at the same location."""
2313 2317 parts = self.radix.split(b'/')
2314 2318 if len(parts) > 1:
2315 2319 # adds a '-s' prefix to the ``data/` or `meta/` base
2316 2320 head = parts[0] + b'-s'
2317 2321 mids = parts[1:-1]
2318 2322 tail = parts[-1] + b'.i'
2319 2323 pieces = [head] + mids + [tail]
2320 2324 return b'/'.join(pieces)
2321 2325 else:
2322 2326 # the revlog is stored at the root of the store (changelog or
2323 2327 # manifest), no risk of collision.
2324 2328 return self.radix + b'.i.s'
2325 2329
2326 2330 def _enforceinlinesize(self, tr, side_write=True):
2327 2331 """Check if the revlog is too big for inline and convert if so.
2328 2332
2329 2333 This should be called after revisions are added to the revlog. If the
2330 2334 revlog has grown too large to be an inline revlog, it will convert it
2331 2335 to use multiple index and data files.
2332 2336 """
2333 2337 tiprev = len(self) - 1
2334 2338 total_size = self.start(tiprev) + self.length(tiprev)
2335 2339 if not self._inline or total_size < _maxinline:
2336 2340 return
2337 2341
2338 2342 troffset = tr.findoffset(self._indexfile)
2339 2343 if troffset is None:
2340 2344 raise error.RevlogError(
2341 2345 _(b"%s not found in the transaction") % self._indexfile
2342 2346 )
2343 2347 if troffset:
2344 2348 tr.addbackup(self._indexfile, for_offset=True)
2345 2349 tr.add(self._datafile, 0)
2346 2350
2347 2351 existing_handles = False
2348 2352 if self._writinghandles is not None:
2349 2353 existing_handles = True
2350 2354 fp = self._writinghandles[0]
2351 2355 fp.flush()
2352 2356 fp.close()
2353 2357 # We can't use the cached file handle after close(). So prevent
2354 2358 # its usage.
2355 2359 self._writinghandles = None
2356 2360 self._segmentfile.writing_handle = None
2357 2361 # No need to deal with sidedata writing handle as it is only
2358 2362 # relevant with revlog-v2 which is never inline, not reaching
2359 2363 # this code
2360 2364 if side_write:
2361 2365 old_index_file_path = self._indexfile
2362 2366 new_index_file_path = self._split_index_file
2363 2367 opener = self.opener
2364 2368 weak_self = weakref.ref(self)
2365 2369
2366 2370 # the "split" index replace the real index when the transaction is finalized
2367 2371 def finalize_callback(tr):
2368 2372 opener.rename(
2369 2373 new_index_file_path,
2370 2374 old_index_file_path,
2371 2375 checkambig=True,
2372 2376 )
2373 2377 maybe_self = weak_self()
2374 2378 if maybe_self is not None:
2375 2379 maybe_self._indexfile = old_index_file_path
2376 2380
2377 2381 def abort_callback(tr):
2378 2382 maybe_self = weak_self()
2379 2383 if maybe_self is not None:
2380 2384 maybe_self._indexfile = old_index_file_path
2381 2385
2382 2386 tr.registertmp(new_index_file_path)
2383 2387 if self.target[1] is not None:
2384 2388 callback_id = b'000-revlog-split-%d-%s' % self.target
2385 2389 else:
2386 2390 callback_id = b'000-revlog-split-%d' % self.target[0]
2387 2391 tr.addfinalize(callback_id, finalize_callback)
2388 2392 tr.addabort(callback_id, abort_callback)
2389 2393
2390 2394 new_dfh = self._datafp(b'w+')
2391 2395 new_dfh.truncate(0) # drop any potentially existing data
2392 2396 try:
2393 2397 with self.reading():
2394 2398 for r in self:
2395 2399 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2396 2400 new_dfh.flush()
2397 2401
2398 2402 if side_write:
2399 2403 self._indexfile = new_index_file_path
2400 2404 with self.__index_new_fp() as fp:
2401 2405 self._format_flags &= ~FLAG_INLINE_DATA
2402 2406 self._inline = False
2403 2407 for i in self:
2404 2408 e = self.index.entry_binary(i)
2405 2409 if i == 0 and self._docket is None:
2406 2410 header = self._format_flags | self._format_version
2407 2411 header = self.index.pack_header(header)
2408 2412 e = header + e
2409 2413 fp.write(e)
2410 2414 if self._docket is not None:
2411 2415 self._docket.index_end = fp.tell()
2412 2416
2413 2417 # If we don't use side-write, the temp file replace the real
2414 2418 # index when we exit the context manager
2415 2419
2416 2420 nodemaputil.setup_persistent_nodemap(tr, self)
2417 2421 self._segmentfile = randomaccessfile.randomaccessfile(
2418 2422 self.opener,
2419 2423 self._datafile,
2420 2424 self._chunkcachesize,
2421 2425 )
2422 2426
2423 2427 if existing_handles:
2424 2428 # switched from inline to conventional reopen the index
2425 2429 ifh = self.__index_write_fp()
2426 2430 self._writinghandles = (ifh, new_dfh, None)
2427 2431 self._segmentfile.writing_handle = new_dfh
2428 2432 new_dfh = None
2429 2433 # No need to deal with sidedata writing handle as it is only
2430 2434 # relevant with revlog-v2 which is never inline, not reaching
2431 2435 # this code
2432 2436 finally:
2433 2437 if new_dfh is not None:
2434 2438 new_dfh.close()
2435 2439
2436 2440 def _nodeduplicatecallback(self, transaction, node):
2437 2441 """called when trying to add a node already stored."""
2438 2442
2439 2443 @contextlib.contextmanager
2440 2444 def reading(self):
2441 2445 """Context manager that keeps data and sidedata files open for reading"""
2442 2446 if len(self.index) == 0:
2443 2447 yield # nothing to be read
2444 2448 else:
2445 2449 with self._segmentfile.reading():
2446 2450 with self._segmentfile_sidedata.reading():
2447 2451 yield
2448 2452
2449 2453 @contextlib.contextmanager
2450 2454 def _writing(self, transaction):
2451 2455 if self._trypending:
2452 2456 msg = b'try to write in a `trypending` revlog: %s'
2453 2457 msg %= self.display_id
2454 2458 raise error.ProgrammingError(msg)
2455 2459 if self._writinghandles is not None:
2456 2460 yield
2457 2461 else:
2458 2462 ifh = dfh = sdfh = None
2459 2463 try:
2460 2464 r = len(self)
2461 2465 # opening the data file.
2462 2466 dsize = 0
2463 2467 if r:
2464 2468 dsize = self.end(r - 1)
2465 2469 dfh = None
2466 2470 if not self._inline:
2467 2471 try:
2468 2472 dfh = self._datafp(b"r+")
2469 2473 if self._docket is None:
2470 2474 dfh.seek(0, os.SEEK_END)
2471 2475 else:
2472 2476 dfh.seek(self._docket.data_end, os.SEEK_SET)
2473 2477 except FileNotFoundError:
2474 2478 dfh = self._datafp(b"w+")
2475 2479 transaction.add(self._datafile, dsize)
2476 2480 if self._sidedatafile is not None:
2477 2481 # revlog-v2 does not inline, help Pytype
2478 2482 assert dfh is not None
2479 2483 try:
2480 2484 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2481 2485 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2482 2486 except FileNotFoundError:
2483 2487 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2484 2488 transaction.add(
2485 2489 self._sidedatafile, self._docket.sidedata_end
2486 2490 )
2487 2491
2488 2492 # opening the index file.
2489 2493 isize = r * self.index.entry_size
2490 2494 ifh = self.__index_write_fp()
2491 2495 if self._inline:
2492 2496 transaction.add(self._indexfile, dsize + isize)
2493 2497 else:
2494 2498 transaction.add(self._indexfile, isize)
2495 2499 # exposing all file handle for writing.
2496 2500 self._writinghandles = (ifh, dfh, sdfh)
2497 2501 self._segmentfile.writing_handle = ifh if self._inline else dfh
2498 2502 self._segmentfile_sidedata.writing_handle = sdfh
2499 2503 yield
2500 2504 if self._docket is not None:
2501 2505 self._write_docket(transaction)
2502 2506 finally:
2503 2507 self._writinghandles = None
2504 2508 self._segmentfile.writing_handle = None
2505 2509 self._segmentfile_sidedata.writing_handle = None
2506 2510 if dfh is not None:
2507 2511 dfh.close()
2508 2512 if sdfh is not None:
2509 2513 sdfh.close()
2510 2514 # closing the index file last to avoid exposing referent to
2511 2515 # potential unflushed data content.
2512 2516 if ifh is not None:
2513 2517 ifh.close()
2514 2518
2515 2519 def _write_docket(self, transaction):
2516 2520 """write the current docket on disk
2517 2521
2518 2522 Exist as a method to help changelog to implement transaction logic
2519 2523
2520 2524 We could also imagine using the same transaction logic for all revlog
2521 2525 since docket are cheap."""
2522 2526 self._docket.write(transaction)
2523 2527
2524 2528 def addrevision(
2525 2529 self,
2526 2530 text,
2527 2531 transaction,
2528 2532 link,
2529 2533 p1,
2530 2534 p2,
2531 2535 cachedelta=None,
2532 2536 node=None,
2533 2537 flags=REVIDX_DEFAULT_FLAGS,
2534 2538 deltacomputer=None,
2535 2539 sidedata=None,
2536 2540 ):
2537 2541 """add a revision to the log
2538 2542
2539 2543 text - the revision data to add
2540 2544 transaction - the transaction object used for rollback
2541 2545 link - the linkrev data to add
2542 2546 p1, p2 - the parent nodeids of the revision
2543 2547 cachedelta - an optional precomputed delta
2544 2548 node - nodeid of revision; typically node is not specified, and it is
2545 2549 computed by default as hash(text, p1, p2), however subclasses might
2546 2550 use different hashing method (and override checkhash() in such case)
2547 2551 flags - the known flags to set on the revision
2548 2552 deltacomputer - an optional deltacomputer instance shared between
2549 2553 multiple calls
2550 2554 """
2551 2555 if link == nullrev:
2552 2556 raise error.RevlogError(
2553 2557 _(b"attempted to add linkrev -1 to %s") % self.display_id
2554 2558 )
2555 2559
2556 2560 if sidedata is None:
2557 2561 sidedata = {}
2558 2562 elif sidedata and not self.hassidedata:
2559 2563 raise error.ProgrammingError(
2560 2564 _(b"trying to add sidedata to a revlog who don't support them")
2561 2565 )
2562 2566
2563 2567 if flags:
2564 2568 node = node or self.hash(text, p1, p2)
2565 2569
2566 2570 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2567 2571
2568 2572 # If the flag processor modifies the revision data, ignore any provided
2569 2573 # cachedelta.
2570 2574 if rawtext != text:
2571 2575 cachedelta = None
2572 2576
2573 2577 if len(rawtext) > _maxentrysize:
2574 2578 raise error.RevlogError(
2575 2579 _(
2576 2580 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2577 2581 )
2578 2582 % (self.display_id, len(rawtext))
2579 2583 )
2580 2584
2581 2585 node = node or self.hash(rawtext, p1, p2)
2582 2586 rev = self.index.get_rev(node)
2583 2587 if rev is not None:
2584 2588 return rev
2585 2589
2586 2590 if validatehash:
2587 2591 self.checkhash(rawtext, node, p1=p1, p2=p2)
2588 2592
2589 2593 return self.addrawrevision(
2590 2594 rawtext,
2591 2595 transaction,
2592 2596 link,
2593 2597 p1,
2594 2598 p2,
2595 2599 node,
2596 2600 flags,
2597 2601 cachedelta=cachedelta,
2598 2602 deltacomputer=deltacomputer,
2599 2603 sidedata=sidedata,
2600 2604 )
2601 2605
2602 2606 def addrawrevision(
2603 2607 self,
2604 2608 rawtext,
2605 2609 transaction,
2606 2610 link,
2607 2611 p1,
2608 2612 p2,
2609 2613 node,
2610 2614 flags,
2611 2615 cachedelta=None,
2612 2616 deltacomputer=None,
2613 2617 sidedata=None,
2614 2618 ):
2615 2619 """add a raw revision with known flags, node and parents
2616 2620 useful when reusing a revision not stored in this revlog (ex: received
2617 2621 over wire, or read from an external bundle).
2618 2622 """
2619 2623 with self._writing(transaction):
2620 2624 return self._addrevision(
2621 2625 node,
2622 2626 rawtext,
2623 2627 transaction,
2624 2628 link,
2625 2629 p1,
2626 2630 p2,
2627 2631 flags,
2628 2632 cachedelta,
2629 2633 deltacomputer=deltacomputer,
2630 2634 sidedata=sidedata,
2631 2635 )
2632 2636
2633 2637 def compress(self, data):
2634 2638 """Generate a possibly-compressed representation of data."""
2635 2639 if not data:
2636 2640 return b'', data
2637 2641
2638 2642 compressed = self._compressor.compress(data)
2639 2643
2640 2644 if compressed:
2641 2645 # The revlog compressor added the header in the returned data.
2642 2646 return b'', compressed
2643 2647
2644 2648 if data[0:1] == b'\0':
2645 2649 return b'', data
2646 2650 return b'u', data
2647 2651
2648 2652 def decompress(self, data):
2649 2653 """Decompress a revlog chunk.
2650 2654
2651 2655 The chunk is expected to begin with a header identifying the
2652 2656 format type so it can be routed to an appropriate decompressor.
2653 2657 """
2654 2658 if not data:
2655 2659 return data
2656 2660
2657 2661 # Revlogs are read much more frequently than they are written and many
2658 2662 # chunks only take microseconds to decompress, so performance is
2659 2663 # important here.
2660 2664 #
2661 2665 # We can make a few assumptions about revlogs:
2662 2666 #
2663 2667 # 1) the majority of chunks will be compressed (as opposed to inline
2664 2668 # raw data).
2665 2669 # 2) decompressing *any* data will likely by at least 10x slower than
2666 2670 # returning raw inline data.
2667 2671 # 3) we want to prioritize common and officially supported compression
2668 2672 # engines
2669 2673 #
2670 2674 # It follows that we want to optimize for "decompress compressed data
2671 2675 # when encoded with common and officially supported compression engines"
2672 2676 # case over "raw data" and "data encoded by less common or non-official
2673 2677 # compression engines." That is why we have the inline lookup first
2674 2678 # followed by the compengines lookup.
2675 2679 #
2676 2680 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2677 2681 # compressed chunks. And this matters for changelog and manifest reads.
2678 2682 t = data[0:1]
2679 2683
2680 2684 if t == b'x':
2681 2685 try:
2682 2686 return _zlibdecompress(data)
2683 2687 except zlib.error as e:
2684 2688 raise error.RevlogError(
2685 2689 _(b'revlog decompress error: %s')
2686 2690 % stringutil.forcebytestr(e)
2687 2691 )
2688 2692 # '\0' is more common than 'u' so it goes first.
2689 2693 elif t == b'\0':
2690 2694 return data
2691 2695 elif t == b'u':
2692 2696 return util.buffer(data, 1)
2693 2697
2694 2698 compressor = self._get_decompressor(t)
2695 2699
2696 2700 return compressor.decompress(data)
2697 2701
2698 2702 def _addrevision(
2699 2703 self,
2700 2704 node,
2701 2705 rawtext,
2702 2706 transaction,
2703 2707 link,
2704 2708 p1,
2705 2709 p2,
2706 2710 flags,
2707 2711 cachedelta,
2708 2712 alwayscache=False,
2709 2713 deltacomputer=None,
2710 2714 sidedata=None,
2711 2715 ):
2712 2716 """internal function to add revisions to the log
2713 2717
2714 2718 see addrevision for argument descriptions.
2715 2719
2716 2720 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2717 2721
2718 2722 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2719 2723 be used.
2720 2724
2721 2725 invariants:
2722 2726 - rawtext is optional (can be None); if not set, cachedelta must be set.
2723 2727 if both are set, they must correspond to each other.
2724 2728 """
2725 2729 if node == self.nullid:
2726 2730 raise error.RevlogError(
2727 2731 _(b"%s: attempt to add null revision") % self.display_id
2728 2732 )
2729 2733 if (
2730 2734 node == self.nodeconstants.wdirid
2731 2735 or node in self.nodeconstants.wdirfilenodeids
2732 2736 ):
2733 2737 raise error.RevlogError(
2734 2738 _(b"%s: attempt to add wdir revision") % self.display_id
2735 2739 )
2736 2740 if self._writinghandles is None:
2737 2741 msg = b'adding revision outside `revlog._writing` context'
2738 2742 raise error.ProgrammingError(msg)
2739 2743
2740 2744 btext = [rawtext]
2741 2745
2742 2746 curr = len(self)
2743 2747 prev = curr - 1
2744 2748
2745 2749 offset = self._get_data_offset(prev)
2746 2750
2747 2751 if self._concurrencychecker:
2748 2752 ifh, dfh, sdfh = self._writinghandles
2749 2753 # XXX no checking for the sidedata file
2750 2754 if self._inline:
2751 2755 # offset is "as if" it were in the .d file, so we need to add on
2752 2756 # the size of the entry metadata.
2753 2757 self._concurrencychecker(
2754 2758 ifh, self._indexfile, offset + curr * self.index.entry_size
2755 2759 )
2756 2760 else:
2757 2761 # Entries in the .i are a consistent size.
2758 2762 self._concurrencychecker(
2759 2763 ifh, self._indexfile, curr * self.index.entry_size
2760 2764 )
2761 2765 self._concurrencychecker(dfh, self._datafile, offset)
2762 2766
2763 2767 p1r, p2r = self.rev(p1), self.rev(p2)
2764 2768
2765 2769 # full versions are inserted when the needed deltas
2766 2770 # become comparable to the uncompressed text
2767 2771 if rawtext is None:
2768 2772 # need rawtext size, before changed by flag processors, which is
2769 2773 # the non-raw size. use revlog explicitly to avoid filelog's extra
2770 2774 # logic that might remove metadata size.
2771 2775 textlen = mdiff.patchedsize(
2772 2776 revlog.size(self, cachedelta[0]), cachedelta[1]
2773 2777 )
2774 2778 else:
2775 2779 textlen = len(rawtext)
2776 2780
2777 2781 if deltacomputer is None:
2778 2782 write_debug = None
2779 2783 if self._debug_delta:
2780 2784 write_debug = transaction._report
2781 2785 deltacomputer = deltautil.deltacomputer(
2782 2786 self, write_debug=write_debug
2783 2787 )
2784 2788
2785 2789 if cachedelta is not None and len(cachedelta) == 2:
2786 2790 # If the cached delta has no information about how it should be
2787 2791 # reused, add the default reuse instruction according to the
2788 2792 # revlog's configuration.
2789 2793 if (
2790 2794 self.delta_config.general_delta
2791 2795 and self.delta_config.lazy_delta_base
2792 2796 ):
2793 2797 delta_base_reuse = DELTA_BASE_REUSE_TRY
2794 2798 else:
2795 2799 delta_base_reuse = DELTA_BASE_REUSE_NO
2796 2800 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2797 2801
2798 2802 revinfo = revlogutils.revisioninfo(
2799 2803 node,
2800 2804 p1,
2801 2805 p2,
2802 2806 btext,
2803 2807 textlen,
2804 2808 cachedelta,
2805 2809 flags,
2806 2810 )
2807 2811
2808 2812 deltainfo = deltacomputer.finddeltainfo(revinfo)
2809 2813
2810 2814 compression_mode = COMP_MODE_INLINE
2811 2815 if self._docket is not None:
2812 2816 default_comp = self._docket.default_compression_header
2813 2817 r = deltautil.delta_compression(default_comp, deltainfo)
2814 2818 compression_mode, deltainfo = r
2815 2819
2816 2820 sidedata_compression_mode = COMP_MODE_INLINE
2817 2821 if sidedata and self.hassidedata:
2818 2822 sidedata_compression_mode = COMP_MODE_PLAIN
2819 2823 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2820 2824 sidedata_offset = self._docket.sidedata_end
2821 2825 h, comp_sidedata = self.compress(serialized_sidedata)
2822 2826 if (
2823 2827 h != b'u'
2824 2828 and comp_sidedata[0:1] != b'\0'
2825 2829 and len(comp_sidedata) < len(serialized_sidedata)
2826 2830 ):
2827 2831 assert not h
2828 2832 if (
2829 2833 comp_sidedata[0:1]
2830 2834 == self._docket.default_compression_header
2831 2835 ):
2832 2836 sidedata_compression_mode = COMP_MODE_DEFAULT
2833 2837 serialized_sidedata = comp_sidedata
2834 2838 else:
2835 2839 sidedata_compression_mode = COMP_MODE_INLINE
2836 2840 serialized_sidedata = comp_sidedata
2837 2841 else:
2838 2842 serialized_sidedata = b""
2839 2843 # Don't store the offset if the sidedata is empty, that way
2840 2844 # we can easily detect empty sidedata and they will be no different
2841 2845 # than ones we manually add.
2842 2846 sidedata_offset = 0
2843 2847
2844 2848 rank = RANK_UNKNOWN
2845 2849 if self._compute_rank:
2846 2850 if (p1r, p2r) == (nullrev, nullrev):
2847 2851 rank = 1
2848 2852 elif p1r != nullrev and p2r == nullrev:
2849 2853 rank = 1 + self.fast_rank(p1r)
2850 2854 elif p1r == nullrev and p2r != nullrev:
2851 2855 rank = 1 + self.fast_rank(p2r)
2852 2856 else: # merge node
2853 2857 if rustdagop is not None and self.index.rust_ext_compat:
2854 2858 rank = rustdagop.rank(self.index, p1r, p2r)
2855 2859 else:
2856 2860 pmin, pmax = sorted((p1r, p2r))
2857 2861 rank = 1 + self.fast_rank(pmax)
2858 2862 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2859 2863
2860 2864 e = revlogutils.entry(
2861 2865 flags=flags,
2862 2866 data_offset=offset,
2863 2867 data_compressed_length=deltainfo.deltalen,
2864 2868 data_uncompressed_length=textlen,
2865 2869 data_compression_mode=compression_mode,
2866 2870 data_delta_base=deltainfo.base,
2867 2871 link_rev=link,
2868 2872 parent_rev_1=p1r,
2869 2873 parent_rev_2=p2r,
2870 2874 node_id=node,
2871 2875 sidedata_offset=sidedata_offset,
2872 2876 sidedata_compressed_length=len(serialized_sidedata),
2873 2877 sidedata_compression_mode=sidedata_compression_mode,
2874 2878 rank=rank,
2875 2879 )
2876 2880
2877 2881 self.index.append(e)
2878 2882 entry = self.index.entry_binary(curr)
2879 2883 if curr == 0 and self._docket is None:
2880 2884 header = self._format_flags | self._format_version
2881 2885 header = self.index.pack_header(header)
2882 2886 entry = header + entry
2883 2887 self._writeentry(
2884 2888 transaction,
2885 2889 entry,
2886 2890 deltainfo.data,
2887 2891 link,
2888 2892 offset,
2889 2893 serialized_sidedata,
2890 2894 sidedata_offset,
2891 2895 )
2892 2896
2893 2897 rawtext = btext[0]
2894 2898
2895 2899 if alwayscache and rawtext is None:
2896 2900 rawtext = deltacomputer.buildtext(revinfo)
2897 2901
2898 2902 if type(rawtext) == bytes: # only accept immutable objects
2899 2903 self._revisioncache = (node, curr, rawtext)
2900 2904 self._chainbasecache[curr] = deltainfo.chainbase
2901 2905 return curr
2902 2906
2903 2907 def _get_data_offset(self, prev):
2904 2908 """Returns the current offset in the (in-transaction) data file.
2905 2909 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2906 2910 file to store that information: since sidedata can be rewritten to the
2907 2911 end of the data file within a transaction, you can have cases where, for
2908 2912 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2909 2913 to `n - 1`'s sidedata being written after `n`'s data.
2910 2914
2911 2915 TODO cache this in a docket file before getting out of experimental."""
2912 2916 if self._docket is None:
2913 2917 return self.end(prev)
2914 2918 else:
2915 2919 return self._docket.data_end
2916 2920
2917 2921 def _writeentry(
2918 2922 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2919 2923 ):
2920 2924 # Files opened in a+ mode have inconsistent behavior on various
2921 2925 # platforms. Windows requires that a file positioning call be made
2922 2926 # when the file handle transitions between reads and writes. See
2923 2927 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2924 2928 # platforms, Python or the platform itself can be buggy. Some versions
2925 2929 # of Solaris have been observed to not append at the end of the file
2926 2930 # if the file was seeked to before the end. See issue4943 for more.
2927 2931 #
2928 2932 # We work around this issue by inserting a seek() before writing.
2929 2933 # Note: This is likely not necessary on Python 3. However, because
2930 2934 # the file handle is reused for reads and may be seeked there, we need
2931 2935 # to be careful before changing this.
2932 2936 if self._writinghandles is None:
2933 2937 msg = b'adding revision outside `revlog._writing` context'
2934 2938 raise error.ProgrammingError(msg)
2935 2939 ifh, dfh, sdfh = self._writinghandles
2936 2940 if self._docket is None:
2937 2941 ifh.seek(0, os.SEEK_END)
2938 2942 else:
2939 2943 ifh.seek(self._docket.index_end, os.SEEK_SET)
2940 2944 if dfh:
2941 2945 if self._docket is None:
2942 2946 dfh.seek(0, os.SEEK_END)
2943 2947 else:
2944 2948 dfh.seek(self._docket.data_end, os.SEEK_SET)
2945 2949 if sdfh:
2946 2950 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2947 2951
2948 2952 curr = len(self) - 1
2949 2953 if not self._inline:
2950 2954 transaction.add(self._datafile, offset)
2951 2955 if self._sidedatafile:
2952 2956 transaction.add(self._sidedatafile, sidedata_offset)
2953 2957 transaction.add(self._indexfile, curr * len(entry))
2954 2958 if data[0]:
2955 2959 dfh.write(data[0])
2956 2960 dfh.write(data[1])
2957 2961 if sidedata:
2958 2962 sdfh.write(sidedata)
2959 2963 ifh.write(entry)
2960 2964 else:
2961 2965 offset += curr * self.index.entry_size
2962 2966 transaction.add(self._indexfile, offset)
2963 2967 ifh.write(entry)
2964 2968 ifh.write(data[0])
2965 2969 ifh.write(data[1])
2966 2970 assert not sidedata
2967 2971 self._enforceinlinesize(transaction)
2968 2972 if self._docket is not None:
2969 2973 # revlog-v2 always has 3 writing handles, help Pytype
2970 2974 wh1 = self._writinghandles[0]
2971 2975 wh2 = self._writinghandles[1]
2972 2976 wh3 = self._writinghandles[2]
2973 2977 assert wh1 is not None
2974 2978 assert wh2 is not None
2975 2979 assert wh3 is not None
2976 2980 self._docket.index_end = wh1.tell()
2977 2981 self._docket.data_end = wh2.tell()
2978 2982 self._docket.sidedata_end = wh3.tell()
2979 2983
2980 2984 nodemaputil.setup_persistent_nodemap(transaction, self)
2981 2985
2982 2986 def addgroup(
2983 2987 self,
2984 2988 deltas,
2985 2989 linkmapper,
2986 2990 transaction,
2987 2991 alwayscache=False,
2988 2992 addrevisioncb=None,
2989 2993 duplicaterevisioncb=None,
2990 2994 debug_info=None,
2991 2995 delta_base_reuse_policy=None,
2992 2996 ):
2993 2997 """
2994 2998 add a delta group
2995 2999
2996 3000 given a set of deltas, add them to the revision log. the
2997 3001 first delta is against its parent, which should be in our
2998 3002 log, the rest are against the previous delta.
2999 3003
3000 3004 If ``addrevisioncb`` is defined, it will be called with arguments of
3001 3005 this revlog and the node that was added.
3002 3006 """
3003 3007
3004 3008 if self._adding_group:
3005 3009 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3006 3010
3007 3011 # read the default delta-base reuse policy from revlog config if the
3008 3012 # group did not specify one.
3009 3013 if delta_base_reuse_policy is None:
3010 3014 if (
3011 3015 self.delta_config.general_delta
3012 3016 and self.delta_config.lazy_delta_base
3013 3017 ):
3014 3018 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3015 3019 else:
3016 3020 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3017 3021
3018 3022 self._adding_group = True
3019 3023 empty = True
3020 3024 try:
3021 3025 with self._writing(transaction):
3022 3026 write_debug = None
3023 3027 if self._debug_delta:
3024 3028 write_debug = transaction._report
3025 3029 deltacomputer = deltautil.deltacomputer(
3026 3030 self,
3027 3031 write_debug=write_debug,
3028 3032 debug_info=debug_info,
3029 3033 )
3030 3034 # loop through our set of deltas
3031 3035 for data in deltas:
3032 3036 (
3033 3037 node,
3034 3038 p1,
3035 3039 p2,
3036 3040 linknode,
3037 3041 deltabase,
3038 3042 delta,
3039 3043 flags,
3040 3044 sidedata,
3041 3045 ) = data
3042 3046 link = linkmapper(linknode)
3043 3047 flags = flags or REVIDX_DEFAULT_FLAGS
3044 3048
3045 3049 rev = self.index.get_rev(node)
3046 3050 if rev is not None:
3047 3051 # this can happen if two branches make the same change
3048 3052 self._nodeduplicatecallback(transaction, rev)
3049 3053 if duplicaterevisioncb:
3050 3054 duplicaterevisioncb(self, rev)
3051 3055 empty = False
3052 3056 continue
3053 3057
3054 3058 for p in (p1, p2):
3055 3059 if not self.index.has_node(p):
3056 3060 raise error.LookupError(
3057 3061 p, self.radix, _(b'unknown parent')
3058 3062 )
3059 3063
3060 3064 if not self.index.has_node(deltabase):
3061 3065 raise error.LookupError(
3062 3066 deltabase, self.display_id, _(b'unknown delta base')
3063 3067 )
3064 3068
3065 3069 baserev = self.rev(deltabase)
3066 3070
3067 3071 if baserev != nullrev and self.iscensored(baserev):
3068 3072 # if base is censored, delta must be full replacement in a
3069 3073 # single patch operation
3070 3074 hlen = struct.calcsize(b">lll")
3071 3075 oldlen = self.rawsize(baserev)
3072 3076 newlen = len(delta) - hlen
3073 3077 if delta[:hlen] != mdiff.replacediffheader(
3074 3078 oldlen, newlen
3075 3079 ):
3076 3080 raise error.CensoredBaseError(
3077 3081 self.display_id, self.node(baserev)
3078 3082 )
3079 3083
3080 3084 if not flags and self._peek_iscensored(baserev, delta):
3081 3085 flags |= REVIDX_ISCENSORED
3082 3086
3083 3087 # We assume consumers of addrevisioncb will want to retrieve
3084 3088 # the added revision, which will require a call to
3085 3089 # revision(). revision() will fast path if there is a cache
3086 3090 # hit. So, we tell _addrevision() to always cache in this case.
3087 3091 # We're only using addgroup() in the context of changegroup
3088 3092 # generation so the revision data can always be handled as raw
3089 3093 # by the flagprocessor.
3090 3094 rev = self._addrevision(
3091 3095 node,
3092 3096 None,
3093 3097 transaction,
3094 3098 link,
3095 3099 p1,
3096 3100 p2,
3097 3101 flags,
3098 3102 (baserev, delta, delta_base_reuse_policy),
3099 3103 alwayscache=alwayscache,
3100 3104 deltacomputer=deltacomputer,
3101 3105 sidedata=sidedata,
3102 3106 )
3103 3107
3104 3108 if addrevisioncb:
3105 3109 addrevisioncb(self, rev)
3106 3110 empty = False
3107 3111 finally:
3108 3112 self._adding_group = False
3109 3113 return not empty
3110 3114
3111 3115 def iscensored(self, rev):
3112 3116 """Check if a file revision is censored."""
3113 3117 if not self._censorable:
3114 3118 return False
3115 3119
3116 3120 return self.flags(rev) & REVIDX_ISCENSORED
3117 3121
3118 3122 def _peek_iscensored(self, baserev, delta):
3119 3123 """Quickly check if a delta produces a censored revision."""
3120 3124 if not self._censorable:
3121 3125 return False
3122 3126
3123 3127 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3124 3128
3125 3129 def getstrippoint(self, minlink):
3126 3130 """find the minimum rev that must be stripped to strip the linkrev
3127 3131
3128 3132 Returns a tuple containing the minimum rev and a set of all revs that
3129 3133 have linkrevs that will be broken by this strip.
3130 3134 """
3131 3135 return storageutil.resolvestripinfo(
3132 3136 minlink,
3133 3137 len(self) - 1,
3134 3138 self.headrevs(),
3135 3139 self.linkrev,
3136 3140 self.parentrevs,
3137 3141 )
3138 3142
3139 3143 def strip(self, minlink, transaction):
3140 3144 """truncate the revlog on the first revision with a linkrev >= minlink
3141 3145
3142 3146 This function is called when we're stripping revision minlink and
3143 3147 its descendants from the repository.
3144 3148
3145 3149 We have to remove all revisions with linkrev >= minlink, because
3146 3150 the equivalent changelog revisions will be renumbered after the
3147 3151 strip.
3148 3152
3149 3153 So we truncate the revlog on the first of these revisions, and
3150 3154 trust that the caller has saved the revisions that shouldn't be
3151 3155 removed and that it'll re-add them after this truncation.
3152 3156 """
3153 3157 if len(self) == 0:
3154 3158 return
3155 3159
3156 3160 rev, _ = self.getstrippoint(minlink)
3157 3161 if rev == len(self):
3158 3162 return
3159 3163
3160 3164 # first truncate the files on disk
3161 3165 data_end = self.start(rev)
3162 3166 if not self._inline:
3163 3167 transaction.add(self._datafile, data_end)
3164 3168 end = rev * self.index.entry_size
3165 3169 else:
3166 3170 end = data_end + (rev * self.index.entry_size)
3167 3171
3168 3172 if self._sidedatafile:
3169 3173 sidedata_end = self.sidedata_cut_off(rev)
3170 3174 transaction.add(self._sidedatafile, sidedata_end)
3171 3175
3172 3176 transaction.add(self._indexfile, end)
3173 3177 if self._docket is not None:
3174 3178 # XXX we could, leverage the docket while stripping. However it is
3175 3179 # not powerfull enough at the time of this comment
3176 3180 self._docket.index_end = end
3177 3181 self._docket.data_end = data_end
3178 3182 self._docket.sidedata_end = sidedata_end
3179 3183 self._docket.write(transaction, stripping=True)
3180 3184
3181 3185 # then reset internal state in memory to forget those revisions
3182 3186 self._revisioncache = None
3183 3187 self._chaininfocache = util.lrucachedict(500)
3184 3188 self._segmentfile.clear_cache()
3185 3189 self._segmentfile_sidedata.clear_cache()
3186 3190
3187 3191 del self.index[rev:-1]
3188 3192
3189 3193 def checksize(self):
3190 3194 """Check size of index and data files
3191 3195
3192 3196 return a (dd, di) tuple.
3193 3197 - dd: extra bytes for the "data" file
3194 3198 - di: extra bytes for the "index" file
3195 3199
3196 3200 A healthy revlog will return (0, 0).
3197 3201 """
3198 3202 expected = 0
3199 3203 if len(self):
3200 3204 expected = max(0, self.end(len(self) - 1))
3201 3205
3202 3206 try:
3203 3207 with self._datafp() as f:
3204 3208 f.seek(0, io.SEEK_END)
3205 3209 actual = f.tell()
3206 3210 dd = actual - expected
3207 3211 except FileNotFoundError:
3208 3212 dd = 0
3209 3213
3210 3214 try:
3211 3215 f = self.opener(self._indexfile)
3212 3216 f.seek(0, io.SEEK_END)
3213 3217 actual = f.tell()
3214 3218 f.close()
3215 3219 s = self.index.entry_size
3216 3220 i = max(0, actual // s)
3217 3221 di = actual - (i * s)
3218 3222 if self._inline:
3219 3223 databytes = 0
3220 3224 for r in self:
3221 3225 databytes += max(0, self.length(r))
3222 3226 dd = 0
3223 3227 di = actual - len(self) * s - databytes
3224 3228 except FileNotFoundError:
3225 3229 di = 0
3226 3230
3227 3231 return (dd, di)
3228 3232
3229 3233 def files(self):
3230 3234 res = [self._indexfile]
3231 3235 if self._docket_file is None:
3232 3236 if not self._inline:
3233 3237 res.append(self._datafile)
3234 3238 else:
3235 3239 res.append(self._docket_file)
3236 3240 res.extend(self._docket.old_index_filepaths(include_empty=False))
3237 3241 if self._docket.data_end:
3238 3242 res.append(self._datafile)
3239 3243 res.extend(self._docket.old_data_filepaths(include_empty=False))
3240 3244 if self._docket.sidedata_end:
3241 3245 res.append(self._sidedatafile)
3242 3246 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3243 3247 return res
3244 3248
3245 3249 def emitrevisions(
3246 3250 self,
3247 3251 nodes,
3248 3252 nodesorder=None,
3249 3253 revisiondata=False,
3250 3254 assumehaveparentrevisions=False,
3251 3255 deltamode=repository.CG_DELTAMODE_STD,
3252 3256 sidedata_helpers=None,
3253 3257 debug_info=None,
3254 3258 ):
3255 3259 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3256 3260 raise error.ProgrammingError(
3257 3261 b'unhandled value for nodesorder: %s' % nodesorder
3258 3262 )
3259 3263
3260 3264 if nodesorder is None and not self.delta_config.general_delta:
3261 3265 nodesorder = b'storage'
3262 3266
3263 3267 if (
3264 3268 not self._storedeltachains
3265 3269 and deltamode != repository.CG_DELTAMODE_PREV
3266 3270 ):
3267 3271 deltamode = repository.CG_DELTAMODE_FULL
3268 3272
3269 3273 return storageutil.emitrevisions(
3270 3274 self,
3271 3275 nodes,
3272 3276 nodesorder,
3273 3277 revlogrevisiondelta,
3274 3278 deltaparentfn=self.deltaparent,
3275 3279 candeltafn=self._candelta,
3276 3280 rawsizefn=self.rawsize,
3277 3281 revdifffn=self.revdiff,
3278 3282 flagsfn=self.flags,
3279 3283 deltamode=deltamode,
3280 3284 revisiondata=revisiondata,
3281 3285 assumehaveparentrevisions=assumehaveparentrevisions,
3282 3286 sidedata_helpers=sidedata_helpers,
3283 3287 debug_info=debug_info,
3284 3288 )
3285 3289
3286 3290 DELTAREUSEALWAYS = b'always'
3287 3291 DELTAREUSESAMEREVS = b'samerevs'
3288 3292 DELTAREUSENEVER = b'never'
3289 3293
3290 3294 DELTAREUSEFULLADD = b'fulladd'
3291 3295
3292 3296 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3293 3297
3294 3298 def clone(
3295 3299 self,
3296 3300 tr,
3297 3301 destrevlog,
3298 3302 addrevisioncb=None,
3299 3303 deltareuse=DELTAREUSESAMEREVS,
3300 3304 forcedeltabothparents=None,
3301 3305 sidedata_helpers=None,
3302 3306 ):
3303 3307 """Copy this revlog to another, possibly with format changes.
3304 3308
3305 3309 The destination revlog will contain the same revisions and nodes.
3306 3310 However, it may not be bit-for-bit identical due to e.g. delta encoding
3307 3311 differences.
3308 3312
3309 3313 The ``deltareuse`` argument control how deltas from the existing revlog
3310 3314 are preserved in the destination revlog. The argument can have the
3311 3315 following values:
3312 3316
3313 3317 DELTAREUSEALWAYS
3314 3318 Deltas will always be reused (if possible), even if the destination
3315 3319 revlog would not select the same revisions for the delta. This is the
3316 3320 fastest mode of operation.
3317 3321 DELTAREUSESAMEREVS
3318 3322 Deltas will be reused if the destination revlog would pick the same
3319 3323 revisions for the delta. This mode strikes a balance between speed
3320 3324 and optimization.
3321 3325 DELTAREUSENEVER
3322 3326 Deltas will never be reused. This is the slowest mode of execution.
3323 3327 This mode can be used to recompute deltas (e.g. if the diff/delta
3324 3328 algorithm changes).
3325 3329 DELTAREUSEFULLADD
3326 3330 Revision will be re-added as if their were new content. This is
3327 3331 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3328 3332 eg: large file detection and handling.
3329 3333
3330 3334 Delta computation can be slow, so the choice of delta reuse policy can
3331 3335 significantly affect run time.
3332 3336
3333 3337 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3334 3338 two extremes. Deltas will be reused if they are appropriate. But if the
3335 3339 delta could choose a better revision, it will do so. This means if you
3336 3340 are converting a non-generaldelta revlog to a generaldelta revlog,
3337 3341 deltas will be recomputed if the delta's parent isn't a parent of the
3338 3342 revision.
3339 3343
3340 3344 In addition to the delta policy, the ``forcedeltabothparents``
3341 3345 argument controls whether to force compute deltas against both parents
3342 3346 for merges. By default, the current default is used.
3343 3347
3344 3348 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3345 3349 `sidedata_helpers`.
3346 3350 """
3347 3351 if deltareuse not in self.DELTAREUSEALL:
3348 3352 raise ValueError(
3349 3353 _(b'value for deltareuse invalid: %s') % deltareuse
3350 3354 )
3351 3355
3352 3356 if len(destrevlog):
3353 3357 raise ValueError(_(b'destination revlog is not empty'))
3354 3358
3355 3359 if getattr(self, 'filteredrevs', None):
3356 3360 raise ValueError(_(b'source revlog has filtered revisions'))
3357 3361 if getattr(destrevlog, 'filteredrevs', None):
3358 3362 raise ValueError(_(b'destination revlog has filtered revisions'))
3359 3363
3360 3364 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3361 3365 # if possible.
3362 3366 old_delta_config = destrevlog.delta_config
3363 3367 destrevlog.delta_config = destrevlog.delta_config.copy()
3364 3368
3365 3369 try:
3366 3370 if deltareuse == self.DELTAREUSEALWAYS:
3367 3371 destrevlog.delta_config.lazy_delta_base = True
3368 3372 destrevlog.delta_config.lazy_delta = True
3369 3373 elif deltareuse == self.DELTAREUSESAMEREVS:
3370 3374 destrevlog.delta_config.lazy_delta_base = False
3371 3375 destrevlog.delta_config.lazy_delta = True
3372 3376 elif deltareuse == self.DELTAREUSENEVER:
3373 3377 destrevlog.delta_config.lazy_delta_base = False
3374 3378 destrevlog.delta_config.lazy_delta = False
3375 3379
3376 3380 delta_both_parents = (
3377 3381 forcedeltabothparents or old_delta_config.delta_both_parents
3378 3382 )
3379 3383 destrevlog.delta_config.delta_both_parents = delta_both_parents
3380 3384
3381 3385 with self.reading():
3382 3386 self._clone(
3383 3387 tr,
3384 3388 destrevlog,
3385 3389 addrevisioncb,
3386 3390 deltareuse,
3387 3391 forcedeltabothparents,
3388 3392 sidedata_helpers,
3389 3393 )
3390 3394
3391 3395 finally:
3392 3396 destrevlog.delta_config = old_delta_config
3393 3397
3394 3398 def _clone(
3395 3399 self,
3396 3400 tr,
3397 3401 destrevlog,
3398 3402 addrevisioncb,
3399 3403 deltareuse,
3400 3404 forcedeltabothparents,
3401 3405 sidedata_helpers,
3402 3406 ):
3403 3407 """perform the core duty of `revlog.clone` after parameter processing"""
3404 3408 write_debug = None
3405 3409 if self._debug_delta:
3406 3410 write_debug = tr._report
3407 3411 deltacomputer = deltautil.deltacomputer(
3408 3412 destrevlog,
3409 3413 write_debug=write_debug,
3410 3414 )
3411 3415 index = self.index
3412 3416 for rev in self:
3413 3417 entry = index[rev]
3414 3418
3415 3419 # Some classes override linkrev to take filtered revs into
3416 3420 # account. Use raw entry from index.
3417 3421 flags = entry[0] & 0xFFFF
3418 3422 linkrev = entry[4]
3419 3423 p1 = index[entry[5]][7]
3420 3424 p2 = index[entry[6]][7]
3421 3425 node = entry[7]
3422 3426
3423 3427 # (Possibly) reuse the delta from the revlog if allowed and
3424 3428 # the revlog chunk is a delta.
3425 3429 cachedelta = None
3426 3430 rawtext = None
3427 3431 if deltareuse == self.DELTAREUSEFULLADD:
3428 3432 text = self._revisiondata(rev)
3429 3433 sidedata = self.sidedata(rev)
3430 3434
3431 3435 if sidedata_helpers is not None:
3432 3436 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3433 3437 self, sidedata_helpers, sidedata, rev
3434 3438 )
3435 3439 flags = flags | new_flags[0] & ~new_flags[1]
3436 3440
3437 3441 destrevlog.addrevision(
3438 3442 text,
3439 3443 tr,
3440 3444 linkrev,
3441 3445 p1,
3442 3446 p2,
3443 3447 cachedelta=cachedelta,
3444 3448 node=node,
3445 3449 flags=flags,
3446 3450 deltacomputer=deltacomputer,
3447 3451 sidedata=sidedata,
3448 3452 )
3449 3453 else:
3450 3454 if destrevlog._lazydelta:
3451 3455 dp = self.deltaparent(rev)
3452 3456 if dp != nullrev:
3453 3457 cachedelta = (dp, bytes(self._chunk(rev)))
3454 3458
3455 3459 sidedata = None
3456 3460 if not cachedelta:
3457 3461 rawtext = self._revisiondata(rev)
3458 3462 sidedata = self.sidedata(rev)
3459 3463 if sidedata is None:
3460 3464 sidedata = self.sidedata(rev)
3461 3465
3462 3466 if sidedata_helpers is not None:
3463 3467 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3464 3468 self, sidedata_helpers, sidedata, rev
3465 3469 )
3466 3470 flags = flags | new_flags[0] & ~new_flags[1]
3467 3471
3468 3472 with destrevlog._writing(tr):
3469 3473 destrevlog._addrevision(
3470 3474 node,
3471 3475 rawtext,
3472 3476 tr,
3473 3477 linkrev,
3474 3478 p1,
3475 3479 p2,
3476 3480 flags,
3477 3481 cachedelta,
3478 3482 deltacomputer=deltacomputer,
3479 3483 sidedata=sidedata,
3480 3484 )
3481 3485
3482 3486 if addrevisioncb:
3483 3487 addrevisioncb(self, rev, node)
3484 3488
3485 3489 def censorrevision(self, tr, censornode, tombstone=b''):
3486 3490 if self._format_version == REVLOGV0:
3487 3491 raise error.RevlogError(
3488 3492 _(b'cannot censor with version %d revlogs')
3489 3493 % self._format_version
3490 3494 )
3491 3495 elif self._format_version == REVLOGV1:
3492 3496 rewrite.v1_censor(self, tr, censornode, tombstone)
3493 3497 else:
3494 3498 rewrite.v2_censor(self, tr, censornode, tombstone)
3495 3499
3496 3500 def verifyintegrity(self, state):
3497 3501 """Verifies the integrity of the revlog.
3498 3502
3499 3503 Yields ``revlogproblem`` instances describing problems that are
3500 3504 found.
3501 3505 """
3502 3506 dd, di = self.checksize()
3503 3507 if dd:
3504 3508 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3505 3509 if di:
3506 3510 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3507 3511
3508 3512 version = self._format_version
3509 3513
3510 3514 # The verifier tells us what version revlog we should be.
3511 3515 if version != state[b'expectedversion']:
3512 3516 yield revlogproblem(
3513 3517 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3514 3518 % (self.display_id, version, state[b'expectedversion'])
3515 3519 )
3516 3520
3517 3521 state[b'skipread'] = set()
3518 3522 state[b'safe_renamed'] = set()
3519 3523
3520 3524 for rev in self:
3521 3525 node = self.node(rev)
3522 3526
3523 3527 # Verify contents. 4 cases to care about:
3524 3528 #
3525 3529 # common: the most common case
3526 3530 # rename: with a rename
3527 3531 # meta: file content starts with b'\1\n', the metadata
3528 3532 # header defined in filelog.py, but without a rename
3529 3533 # ext: content stored externally
3530 3534 #
3531 3535 # More formally, their differences are shown below:
3532 3536 #
3533 3537 # | common | rename | meta | ext
3534 3538 # -------------------------------------------------------
3535 3539 # flags() | 0 | 0 | 0 | not 0
3536 3540 # renamed() | False | True | False | ?
3537 3541 # rawtext[0:2]=='\1\n'| False | True | True | ?
3538 3542 #
3539 3543 # "rawtext" means the raw text stored in revlog data, which
3540 3544 # could be retrieved by "rawdata(rev)". "text"
3541 3545 # mentioned below is "revision(rev)".
3542 3546 #
3543 3547 # There are 3 different lengths stored physically:
3544 3548 # 1. L1: rawsize, stored in revlog index
3545 3549 # 2. L2: len(rawtext), stored in revlog data
3546 3550 # 3. L3: len(text), stored in revlog data if flags==0, or
3547 3551 # possibly somewhere else if flags!=0
3548 3552 #
3549 3553 # L1 should be equal to L2. L3 could be different from them.
3550 3554 # "text" may or may not affect commit hash depending on flag
3551 3555 # processors (see flagutil.addflagprocessor).
3552 3556 #
3553 3557 # | common | rename | meta | ext
3554 3558 # -------------------------------------------------
3555 3559 # rawsize() | L1 | L1 | L1 | L1
3556 3560 # size() | L1 | L2-LM | L1(*) | L1 (?)
3557 3561 # len(rawtext) | L2 | L2 | L2 | L2
3558 3562 # len(text) | L2 | L2 | L2 | L3
3559 3563 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3560 3564 #
3561 3565 # LM: length of metadata, depending on rawtext
3562 3566 # (*): not ideal, see comment in filelog.size
3563 3567 # (?): could be "- len(meta)" if the resolved content has
3564 3568 # rename metadata
3565 3569 #
3566 3570 # Checks needed to be done:
3567 3571 # 1. length check: L1 == L2, in all cases.
3568 3572 # 2. hash check: depending on flag processor, we may need to
3569 3573 # use either "text" (external), or "rawtext" (in revlog).
3570 3574
3571 3575 try:
3572 3576 skipflags = state.get(b'skipflags', 0)
3573 3577 if skipflags:
3574 3578 skipflags &= self.flags(rev)
3575 3579
3576 3580 _verify_revision(self, skipflags, state, node)
3577 3581
3578 3582 l1 = self.rawsize(rev)
3579 3583 l2 = len(self.rawdata(node))
3580 3584
3581 3585 if l1 != l2:
3582 3586 yield revlogproblem(
3583 3587 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3584 3588 node=node,
3585 3589 )
3586 3590
3587 3591 except error.CensoredNodeError:
3588 3592 if state[b'erroroncensored']:
3589 3593 yield revlogproblem(
3590 3594 error=_(b'censored file data'), node=node
3591 3595 )
3592 3596 state[b'skipread'].add(node)
3593 3597 except Exception as e:
3594 3598 yield revlogproblem(
3595 3599 error=_(b'unpacking %s: %s')
3596 3600 % (short(node), stringutil.forcebytestr(e)),
3597 3601 node=node,
3598 3602 )
3599 3603 state[b'skipread'].add(node)
3600 3604
3601 3605 def storageinfo(
3602 3606 self,
3603 3607 exclusivefiles=False,
3604 3608 sharedfiles=False,
3605 3609 revisionscount=False,
3606 3610 trackedsize=False,
3607 3611 storedsize=False,
3608 3612 ):
3609 3613 d = {}
3610 3614
3611 3615 if exclusivefiles:
3612 3616 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3613 3617 if not self._inline:
3614 3618 d[b'exclusivefiles'].append((self.opener, self._datafile))
3615 3619
3616 3620 if sharedfiles:
3617 3621 d[b'sharedfiles'] = []
3618 3622
3619 3623 if revisionscount:
3620 3624 d[b'revisionscount'] = len(self)
3621 3625
3622 3626 if trackedsize:
3623 3627 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3624 3628
3625 3629 if storedsize:
3626 3630 d[b'storedsize'] = sum(
3627 3631 self.opener.stat(path).st_size for path in self.files()
3628 3632 )
3629 3633
3630 3634 return d
3631 3635
3632 3636 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3633 3637 if not self.hassidedata:
3634 3638 return
3635 3639 # revlog formats with sidedata support does not support inline
3636 3640 assert not self._inline
3637 3641 if not helpers[1] and not helpers[2]:
3638 3642 # Nothing to generate or remove
3639 3643 return
3640 3644
3641 3645 new_entries = []
3642 3646 # append the new sidedata
3643 3647 with self._writing(transaction):
3644 3648 ifh, dfh, sdfh = self._writinghandles
3645 3649 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3646 3650
3647 3651 current_offset = sdfh.tell()
3648 3652 for rev in range(startrev, endrev + 1):
3649 3653 entry = self.index[rev]
3650 3654 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3651 3655 store=self,
3652 3656 sidedata_helpers=helpers,
3653 3657 sidedata={},
3654 3658 rev=rev,
3655 3659 )
3656 3660
3657 3661 serialized_sidedata = sidedatautil.serialize_sidedata(
3658 3662 new_sidedata
3659 3663 )
3660 3664
3661 3665 sidedata_compression_mode = COMP_MODE_INLINE
3662 3666 if serialized_sidedata and self.hassidedata:
3663 3667 sidedata_compression_mode = COMP_MODE_PLAIN
3664 3668 h, comp_sidedata = self.compress(serialized_sidedata)
3665 3669 if (
3666 3670 h != b'u'
3667 3671 and comp_sidedata[0] != b'\0'
3668 3672 and len(comp_sidedata) < len(serialized_sidedata)
3669 3673 ):
3670 3674 assert not h
3671 3675 if (
3672 3676 comp_sidedata[0]
3673 3677 == self._docket.default_compression_header
3674 3678 ):
3675 3679 sidedata_compression_mode = COMP_MODE_DEFAULT
3676 3680 serialized_sidedata = comp_sidedata
3677 3681 else:
3678 3682 sidedata_compression_mode = COMP_MODE_INLINE
3679 3683 serialized_sidedata = comp_sidedata
3680 3684 if entry[8] != 0 or entry[9] != 0:
3681 3685 # rewriting entries that already have sidedata is not
3682 3686 # supported yet, because it introduces garbage data in the
3683 3687 # revlog.
3684 3688 msg = b"rewriting existing sidedata is not supported yet"
3685 3689 raise error.Abort(msg)
3686 3690
3687 3691 # Apply (potential) flags to add and to remove after running
3688 3692 # the sidedata helpers
3689 3693 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3690 3694 entry_update = (
3691 3695 current_offset,
3692 3696 len(serialized_sidedata),
3693 3697 new_offset_flags,
3694 3698 sidedata_compression_mode,
3695 3699 )
3696 3700
3697 3701 # the sidedata computation might have move the file cursors around
3698 3702 sdfh.seek(current_offset, os.SEEK_SET)
3699 3703 sdfh.write(serialized_sidedata)
3700 3704 new_entries.append(entry_update)
3701 3705 current_offset += len(serialized_sidedata)
3702 3706 self._docket.sidedata_end = sdfh.tell()
3703 3707
3704 3708 # rewrite the new index entries
3705 3709 ifh.seek(startrev * self.index.entry_size)
3706 3710 for i, e in enumerate(new_entries):
3707 3711 rev = startrev + i
3708 3712 self.index.replace_sidedata_info(rev, *e)
3709 3713 packed = self.index.entry_binary(rev)
3710 3714 if rev == 0 and self._docket is None:
3711 3715 header = self._format_flags | self._format_version
3712 3716 header = self.index.pack_header(header)
3713 3717 packed = header + packed
3714 3718 ifh.write(packed)
@@ -1,875 +1,875 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl.delta_config.general_delta = rl.delta_config.general_delta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 rl._loadindex()
130 130
131 131
132 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 133 """censors a revision in a "version 2" revlog"""
134 134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 136
137 137 censor_revs = {revlog.rev(censornode)}
138 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 139
140 140
141 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 142 """rewrite a revlog to censor some of its content
143 143
144 144 General principle
145 145
146 146 We create new revlog files (index/data/sidedata) to copy the content of
147 147 the existing data without the censored data.
148 148
149 149 We need to recompute new delta for any revision that used the censored
150 150 revision as delta base. As the cumulative size of the new delta may be
151 151 large, we store them in a temporary file until they are stored in their
152 152 final destination.
153 153
154 154 All data before the censored data can be blindly copied. The rest needs
155 155 to be copied as we go and the associated index entry needs adjustement.
156 156 """
157 157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 159
160 160 old_index = revlog.index
161 161 docket = revlog._docket
162 162
163 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 164
165 165 first_excl_rev = min(censor_revs)
166 166
167 167 first_excl_entry = revlog.index[first_excl_rev]
168 168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 171
172 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 174 rewritten_entries = _precompute_rewritten_delta(
175 175 revlog,
176 176 old_index,
177 177 censor_revs,
178 178 tmp_storage,
179 179 )
180 180
181 181 all_files = _setup_new_files(
182 182 revlog,
183 183 index_cutoff,
184 184 data_cutoff,
185 185 sidedata_cutoff,
186 186 )
187 187
188 188 # we dont need to open the old index file since its content already
189 189 # exist in a usable form in `old_index`.
190 190 with all_files() as open_files:
191 191 (
192 192 old_data_file,
193 193 old_sidedata_file,
194 194 new_index_file,
195 195 new_data_file,
196 196 new_sidedata_file,
197 197 ) = open_files
198 198
199 199 # writing the censored revision
200 200
201 201 # Writing all subsequent revisions
202 202 for rev in range(first_excl_rev, len(old_index)):
203 203 if rev in censor_revs:
204 204 _rewrite_censor(
205 205 revlog,
206 206 old_index,
207 207 open_files,
208 208 rev,
209 209 tombstone,
210 210 )
211 211 else:
212 212 _rewrite_simple(
213 213 revlog,
214 214 old_index,
215 215 open_files,
216 216 rev,
217 217 rewritten_entries,
218 218 tmp_storage,
219 219 )
220 220 docket.write(transaction=None, stripping=True)
221 221
222 222
223 223 def _precompute_rewritten_delta(
224 224 revlog,
225 225 old_index,
226 226 excluded_revs,
227 227 tmp_storage,
228 228 ):
229 229 """Compute new delta for revisions whose delta is based on revision that
230 230 will not survive as is.
231 231
232 232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 233 """
234 234 dc = deltas.deltacomputer(revlog)
235 235 rewritten_entries = {}
236 236 first_excl_rev = min(excluded_revs)
237 237 with revlog.reading():
238 238 for rev in range(first_excl_rev, len(old_index)):
239 239 if rev in excluded_revs:
240 240 # this revision will be preserved as is, so we don't need to
241 241 # consider recomputing a delta.
242 242 continue
243 243 entry = old_index[rev]
244 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 245 continue
246 246 # This is a revision that use the censored revision as the base
247 247 # for its delta. We need a need new deltas
248 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 249 # this revision is empty, we can delta against nullrev
250 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 251 else:
252 252
253 253 text = revlog.rawdata(rev)
254 254 info = revlogutils.revisioninfo(
255 255 node=entry[ENTRY_NODE_ID],
256 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 258 btext=[text],
259 259 textlen=len(text),
260 260 cachedelta=None,
261 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 262 )
263 263 d = dc.finddeltainfo(
264 264 info, excluded_bases=excluded_revs, target_rev=rev
265 265 )
266 266 default_comp = revlog._docket.default_compression_header
267 267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 268 # using `tell` is a bit lazy, but we are not here for speed
269 269 start = tmp_storage.tell()
270 270 tmp_storage.write(d.data[1])
271 271 end = tmp_storage.tell()
272 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 273 return rewritten_entries
274 274
275 275
276 276 def _setup_new_files(
277 277 revlog,
278 278 index_cutoff,
279 279 data_cutoff,
280 280 sidedata_cutoff,
281 281 ):
282 282 """
283 283
284 284 return a context manager to open all the relevant files:
285 285 - old_data_file,
286 286 - old_sidedata_file,
287 287 - new_index_file,
288 288 - new_data_file,
289 289 - new_sidedata_file,
290 290
291 291 The old_index_file is not here because it is accessed through the
292 292 `old_index` object if the caller function.
293 293 """
294 294 docket = revlog._docket
295 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 298
299 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 302
303 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 305 util.copyfile(
306 306 old_sidedata_filepath,
307 307 new_sidedata_filepath,
308 308 nb_bytes=sidedata_cutoff,
309 309 )
310 310 revlog.opener.register_file(docket.index_filepath())
311 311 revlog.opener.register_file(docket.data_filepath())
312 312 revlog.opener.register_file(docket.sidedata_filepath())
313 313
314 314 docket.index_end = index_cutoff
315 315 docket.data_end = data_cutoff
316 316 docket.sidedata_end = sidedata_cutoff
317 317
318 318 # reload the revlog internal information
319 319 revlog.clearcaches()
320 320 revlog._loadindex(docket=docket)
321 321
322 322 @contextlib.contextmanager
323 323 def all_files_opener():
324 324 # hide opening in an helper function to please check-code, black
325 325 # and various python version at the same time
326 326 with open(old_data_filepath, 'rb') as old_data_file:
327 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 328 with open(new_index_filepath, 'r+b') as new_index_file:
329 329 with open(new_data_filepath, 'r+b') as new_data_file:
330 330 with open(
331 331 new_sidedata_filepath, 'r+b'
332 332 ) as new_sidedata_file:
333 333 new_index_file.seek(0, os.SEEK_END)
334 334 assert new_index_file.tell() == index_cutoff
335 335 new_data_file.seek(0, os.SEEK_END)
336 336 assert new_data_file.tell() == data_cutoff
337 337 new_sidedata_file.seek(0, os.SEEK_END)
338 338 assert new_sidedata_file.tell() == sidedata_cutoff
339 339 yield (
340 340 old_data_file,
341 341 old_sidedata_file,
342 342 new_index_file,
343 343 new_data_file,
344 344 new_sidedata_file,
345 345 )
346 346
347 347 return all_files_opener
348 348
349 349
350 350 def _rewrite_simple(
351 351 revlog,
352 352 old_index,
353 353 all_files,
354 354 rev,
355 355 rewritten_entries,
356 356 tmp_storage,
357 357 ):
358 358 """append a normal revision to the index after the rewritten one(s)"""
359 359 (
360 360 old_data_file,
361 361 old_sidedata_file,
362 362 new_index_file,
363 363 new_data_file,
364 364 new_sidedata_file,
365 365 ) = all_files
366 366 entry = old_index[rev]
367 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 369
370 370 if rev not in rewritten_entries:
371 371 old_data_file.seek(old_data_offset)
372 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 373 new_data = old_data_file.read(new_data_size)
374 374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 376 else:
377 377 (
378 378 data_delta_base,
379 379 start,
380 380 end,
381 381 d_comp_mode,
382 382 ) = rewritten_entries[rev]
383 383 new_data_size = end - start
384 384 tmp_storage.seek(start)
385 385 new_data = tmp_storage.read(new_data_size)
386 386
387 387 # It might be faster to group continuous read/write operation,
388 388 # however, this is censor, an operation that is not focussed
389 389 # around stellar performance. So I have not written this
390 390 # optimisation yet.
391 391 new_data_offset = new_data_file.tell()
392 392 new_data_file.write(new_data)
393 393
394 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 395 new_sidedata_offset = new_sidedata_file.tell()
396 396 if 0 < sidedata_size:
397 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 398 old_sidedata_file.seek(old_sidedata_offset)
399 399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 400 new_sidedata_file.write(new_sidedata)
401 401
402 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 404 assert data_delta_base <= rev, (data_delta_base, rev)
405 405
406 406 new_entry = revlogutils.entry(
407 407 flags=flags,
408 408 data_offset=new_data_offset,
409 409 data_compressed_length=new_data_size,
410 410 data_uncompressed_length=data_uncompressed_length,
411 411 data_delta_base=data_delta_base,
412 412 link_rev=entry[ENTRY_LINK_REV],
413 413 parent_rev_1=entry[ENTRY_PARENT_1],
414 414 parent_rev_2=entry[ENTRY_PARENT_2],
415 415 node_id=entry[ENTRY_NODE_ID],
416 416 sidedata_offset=new_sidedata_offset,
417 417 sidedata_compressed_length=sidedata_size,
418 418 data_compression_mode=d_comp_mode,
419 419 sidedata_compression_mode=sd_com_mode,
420 420 )
421 421 revlog.index.append(new_entry)
422 422 entry_bin = revlog.index.entry_binary(rev)
423 423 new_index_file.write(entry_bin)
424 424
425 425 revlog._docket.index_end = new_index_file.tell()
426 426 revlog._docket.data_end = new_data_file.tell()
427 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 428
429 429
430 430 def _rewrite_censor(
431 431 revlog,
432 432 old_index,
433 433 all_files,
434 434 rev,
435 435 tombstone,
436 436 ):
437 437 """rewrite and append a censored revision"""
438 438 (
439 439 old_data_file,
440 440 old_sidedata_file,
441 441 new_index_file,
442 442 new_data_file,
443 443 new_sidedata_file,
444 444 ) = all_files
445 445 entry = old_index[rev]
446 446
447 447 # XXX consider trying the default compression too
448 448 new_data_size = len(tombstone)
449 449 new_data_offset = new_data_file.tell()
450 450 new_data_file.write(tombstone)
451 451
452 452 # we are not adding any sidedata as they might leak info about the censored version
453 453
454 454 link_rev = entry[ENTRY_LINK_REV]
455 455
456 456 p1 = entry[ENTRY_PARENT_1]
457 457 p2 = entry[ENTRY_PARENT_2]
458 458
459 459 new_entry = revlogutils.entry(
460 460 flags=constants.REVIDX_ISCENSORED,
461 461 data_offset=new_data_offset,
462 462 data_compressed_length=new_data_size,
463 463 data_uncompressed_length=new_data_size,
464 464 data_delta_base=rev,
465 465 link_rev=link_rev,
466 466 parent_rev_1=p1,
467 467 parent_rev_2=p2,
468 468 node_id=entry[ENTRY_NODE_ID],
469 469 sidedata_offset=0,
470 470 sidedata_compressed_length=0,
471 471 data_compression_mode=COMP_MODE_PLAIN,
472 472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 473 )
474 474 revlog.index.append(new_entry)
475 475 entry_bin = revlog.index.entry_binary(rev)
476 476 new_index_file.write(entry_bin)
477 477 revlog._docket.index_end = new_index_file.tell()
478 478 revlog._docket.data_end = new_data_file.tell()
479 479
480 480
481 481 def _get_filename_from_filelog_index(path):
482 482 # Drop the extension and the `data/` prefix
483 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 484 if len(path_part) < 2:
485 485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 486 msg %= path
487 487 raise error.Abort(msg)
488 488
489 489 return path_part[1]
490 490
491 491
492 492 def _filelog_from_filename(repo, path):
493 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 494
495 495 from .. import filelog # avoid cycle
496 496
497 497 fl = filelog.filelog(repo.svfs, path)
498 498 return fl
499 499
500 500
501 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 503 from ..pure import parsers # avoid cycle
504 504
505 505 if repo._currentlock(repo._lockref) is None:
506 506 # Let's be paranoid about it
507 507 msg = "repo needs to be locked to rewrite parents"
508 508 raise error.ProgrammingError(msg)
509 509
510 510 index_format = parsers.IndexObject.index_format
511 511 entry = rl.index[rev]
512 512 new_entry = list(entry)
513 513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 514 packed = index_format.pack(*new_entry[:8])
515 515 fp.seek(offset)
516 516 fp.write(packed)
517 517
518 518
519 519 def _reorder_filelog_parents(repo, fl, to_fix):
520 520 """
521 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 522 new version to disk, overwriting the old one with a rename.
523 523 """
524 524 from ..pure import parsers # avoid cycle
525 525
526 526 ui = repo.ui
527 527 assert len(to_fix) > 0
528 528 rl = fl._revlog
529 529 if rl._format_version != constants.REVLOGV1:
530 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 531 raise error.ProgrammingError(msg)
532 532
533 533 index_file = rl._indexfile
534 534 new_file_path = index_file + b'.tmp-parents-fix'
535 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 536
537 537 with ui.uninterruptible():
538 538 try:
539 539 util.copyfile(
540 540 rl.opener.join(index_file),
541 541 rl.opener.join(new_file_path),
542 checkambig=rl._checkambig,
542 checkambig=rl.data_config.check_ambig,
543 543 )
544 544
545 545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 546 if rl._inline:
547 547 index = parsers.InlinedIndexObject(fp.read())
548 548 for rev in fl.revs():
549 549 if rev in to_fix:
550 550 offset = index._calculate_index(rev)
551 551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 552 ui.write(repaired_msg % (rev, index_file))
553 553 else:
554 554 index_format = parsers.IndexObject.index_format
555 555 for rev in to_fix:
556 556 offset = rev * index_format.size
557 557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 558 ui.write(repaired_msg % (rev, index_file))
559 559
560 560 rl.opener.rename(new_file_path, index_file)
561 561 rl.clearcaches()
562 562 rl._loadindex()
563 563 finally:
564 564 util.tryunlink(new_file_path)
565 565
566 566
567 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 568 full_text = lambda: fl._revlog.rawdata(filerev)
569 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 570 return _is_revision_affected_inner(
571 571 full_text, parent_revs, filerev, metadata_cache
572 572 )
573 573
574 574
575 575 def _is_revision_affected_inner(
576 576 full_text,
577 577 parents_revs,
578 578 filerev,
579 579 metadata_cache=None,
580 580 ):
581 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 582 special meaning compared to the reverse in the context of filelog-based
583 583 copytracing. issue6528 exists because new code assumed that parent ordering
584 584 didn't matter, so this detects if the revision contains metadata (since
585 585 it's only used for filelog-based copytracing) and its parents are in the
586 586 "wrong" order."""
587 587 try:
588 588 raw_text = full_text()
589 589 except error.CensoredNodeError:
590 590 # We don't care about censored nodes as they never carry metadata
591 591 return False
592 592
593 593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 595 if metadata_cache is not None:
596 596 metadata_cache[filerev] = has_meta
597 597 if has_meta:
598 598 (p1, p2) = parents_revs()
599 599 if p1 != nullrev and p2 == nullrev:
600 600 return True
601 601 return False
602 602
603 603
604 604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 605 rl = fl._revlog
606 606 is_censored = lambda: rl.iscensored(filerev)
607 607 delta_base = lambda: rl.deltaparent(filerev)
608 608 delta = lambda: rl._chunk(filerev)
609 609 full_text = lambda: rl.rawdata(filerev)
610 610 parent_revs = lambda: rl.parentrevs(filerev)
611 611 return _is_revision_affected_fast_inner(
612 612 is_censored,
613 613 delta_base,
614 614 delta,
615 615 full_text,
616 616 parent_revs,
617 617 filerev,
618 618 metadata_cache,
619 619 )
620 620
621 621
622 622 def _is_revision_affected_fast_inner(
623 623 is_censored,
624 624 delta_base,
625 625 delta,
626 626 full_text,
627 627 parent_revs,
628 628 filerev,
629 629 metadata_cache,
630 630 ):
631 631 """Optimization fast-path for `_is_revision_affected`.
632 632
633 633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 634 revision to check if its base has metadata, saving computation of the full
635 635 text, instead looking at the current delta.
636 636
637 637 This optimization only works if the revisions are looked at in order."""
638 638
639 639 if is_censored():
640 640 # Censored revisions don't contain metadata, so they cannot be affected
641 641 metadata_cache[filerev] = False
642 642 return False
643 643
644 644 p1, p2 = parent_revs()
645 645 if p1 == nullrev or p2 != nullrev:
646 646 return False
647 647
648 648 delta_parent = delta_base()
649 649 parent_has_metadata = metadata_cache.get(delta_parent)
650 650 if parent_has_metadata is None:
651 651 return _is_revision_affected_inner(
652 652 full_text,
653 653 parent_revs,
654 654 filerev,
655 655 metadata_cache,
656 656 )
657 657
658 658 chunk = delta()
659 659 if not len(chunk):
660 660 # No diff for this revision
661 661 return parent_has_metadata
662 662
663 663 header_length = 12
664 664 if len(chunk) < header_length:
665 665 raise error.Abort(_(b"patch cannot be decoded"))
666 666
667 667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668 668
669 669 if start < 2: # len(b'\x01\n') == 2
670 670 # This delta does *something* to the metadata marker (if any).
671 671 # Check it the slow way
672 672 is_affected = _is_revision_affected_inner(
673 673 full_text,
674 674 parent_revs,
675 675 filerev,
676 676 metadata_cache,
677 677 )
678 678 return is_affected
679 679
680 680 # The diff did not remove or add the metadata header, it's then in the same
681 681 # situation as its parent
682 682 metadata_cache[filerev] = parent_has_metadata
683 683 return parent_has_metadata
684 684
685 685
686 686 def _from_report(ui, repo, context, from_report, dry_run):
687 687 """
688 688 Fix the revisions given in the `from_report` file, but still checks if the
689 689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 690 where we'd swap well-ordered parents again.
691 691
692 692 See the doc for `debug_fix_issue6528` for the format documentation.
693 693 """
694 694 ui.write(_(b"loading report file '%s'\n") % from_report)
695 695
696 696 with context(), open(from_report, mode='rb') as f:
697 697 for line in f.read().split(b'\n'):
698 698 if not line:
699 699 continue
700 700 filenodes, filename = line.split(b' ', 1)
701 701 fl = _filelog_from_filename(repo, filename)
702 702 to_fix = set(
703 703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 704 )
705 705 excluded = set()
706 706
707 707 for filerev in to_fix:
708 708 if _is_revision_affected(fl, filerev):
709 709 msg = b"found affected revision %d for filelog '%s'\n"
710 710 ui.warn(msg % (filerev, filename))
711 711 else:
712 712 msg = _(b"revision %s of file '%s' is not affected\n")
713 713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 714 ui.warn(msg)
715 715 excluded.add(filerev)
716 716
717 717 to_fix = to_fix - excluded
718 718 if not to_fix:
719 719 msg = _(b"no affected revisions were found for '%s'\n")
720 720 ui.write(msg % filename)
721 721 continue
722 722 if not dry_run:
723 723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724 724
725 725
726 726 def filter_delta_issue6528(revlog, deltas_iter):
727 727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 728 metadata_cache = {}
729 729
730 730 deltacomputer = deltas.deltacomputer(revlog)
731 731
732 732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 733 (
734 734 node,
735 735 p1_node,
736 736 p2_node,
737 737 linknode,
738 738 deltabase,
739 739 delta,
740 740 flags,
741 741 sidedata,
742 742 ) = d
743 743
744 744 if not revlog.index.has_node(deltabase):
745 745 raise error.LookupError(
746 746 deltabase, revlog.radix, _(b'unknown parent')
747 747 )
748 748 base_rev = revlog.rev(deltabase)
749 749 if not revlog.index.has_node(p1_node):
750 750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 751 p1_rev = revlog.rev(p1_node)
752 752 if not revlog.index.has_node(p2_node):
753 753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 754 p2_rev = revlog.rev(p2_node)
755 755
756 756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 757 delta_base = lambda: revlog.rev(delta_base)
758 758 delta_base = lambda: base_rev
759 759 parent_revs = lambda: (p1_rev, p2_rev)
760 760
761 761 def full_text():
762 762 # note: being able to reuse the full text computation in the
763 763 # underlying addrevision would be useful however this is a bit too
764 764 # intrusive the for the "quick" issue6528 we are writing before the
765 765 # 5.8 release
766 766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767 767
768 768 revinfo = revlogutils.revisioninfo(
769 769 node,
770 770 p1_node,
771 771 p2_node,
772 772 [None],
773 773 textlen,
774 774 (base_rev, delta),
775 775 flags,
776 776 )
777 777 return deltacomputer.buildtext(revinfo)
778 778
779 779 is_affected = _is_revision_affected_fast_inner(
780 780 is_censored,
781 781 delta_base,
782 782 lambda: delta,
783 783 full_text,
784 784 parent_revs,
785 785 rev,
786 786 metadata_cache,
787 787 )
788 788 if is_affected:
789 789 d = (
790 790 node,
791 791 p2_node,
792 792 p1_node,
793 793 linknode,
794 794 deltabase,
795 795 delta,
796 796 flags,
797 797 sidedata,
798 798 )
799 799 yield d
800 800
801 801
802 802 def repair_issue6528(
803 803 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
804 804 ):
805 805 @contextlib.contextmanager
806 806 def context():
807 807 if dry_run or to_report: # No need for locking
808 808 yield
809 809 else:
810 810 with repo.wlock(), repo.lock():
811 811 yield
812 812
813 813 if from_report:
814 814 return _from_report(ui, repo, context, from_report, dry_run)
815 815
816 816 report_entries = []
817 817
818 818 with context():
819 819 files = list(
820 820 entry
821 821 for entry in repo.store.data_entries()
822 822 if entry.is_revlog and entry.is_filelog
823 823 )
824 824
825 825 progress = ui.makeprogress(
826 826 _(b"looking for affected revisions"),
827 827 unit=_(b"filelogs"),
828 828 total=len(files),
829 829 )
830 830 found_nothing = True
831 831
832 832 for entry in files:
833 833 progress.increment()
834 834 filename = entry.target_id
835 835 fl = _filelog_from_filename(repo, entry.target_id)
836 836
837 837 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
838 838 to_fix = set()
839 839 metadata_cache = {}
840 840 for filerev in fl.revs():
841 841 affected = _is_revision_affected_fast(
842 842 repo, fl, filerev, metadata_cache
843 843 )
844 844 if paranoid:
845 845 slow = _is_revision_affected(fl, filerev)
846 846 if slow != affected:
847 847 msg = _(b"paranoid check failed for '%s' at node %s")
848 848 node = binascii.hexlify(fl.node(filerev))
849 849 raise error.Abort(msg % (filename, node))
850 850 if affected:
851 851 msg = b"found affected revision %d for file '%s'\n"
852 852 ui.warn(msg % (filerev, filename))
853 853 found_nothing = False
854 854 if not dry_run:
855 855 if to_report:
856 856 to_fix.add(binascii.hexlify(fl.node(filerev)))
857 857 else:
858 858 to_fix.add(filerev)
859 859
860 860 if to_fix:
861 861 to_fix = sorted(to_fix)
862 862 if to_report:
863 863 report_entries.append((filename, to_fix))
864 864 else:
865 865 _reorder_filelog_parents(repo, fl, to_fix)
866 866
867 867 if found_nothing:
868 868 ui.write(_(b"no affected revisions were found\n"))
869 869
870 870 if to_report and report_entries:
871 871 with open(to_report, mode="wb") as f:
872 872 for path, to_fix in report_entries:
873 873 f.write(b"%s %s\n" % (b",".join(to_fix), path))
874 874
875 875 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now