##// END OF EJS Templates
revlog: remove the `_indexfp` method...
marmoute -
r51974:5ffee3cf default
parent child Browse files
Show More
@@ -1,3800 +1,3796 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 util.nouideprecwarn(
496 496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 497 )
498 498 return self.delta_config.general_delta
499 499
500 500 @property
501 501 def _checkambig(self):
502 502 """temporary compatibility proxy"""
503 503 util.nouideprecwarn(
504 504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 505 )
506 506 return self.data_config.check_ambig
507 507
508 508 @property
509 509 def _mmaplargeindex(self):
510 510 """temporary compatibility proxy"""
511 511 util.nouideprecwarn(
512 512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 513 )
514 514 return self.data_config.mmap_large_index
515 515
516 516 @property
517 517 def _censorable(self):
518 518 """temporary compatibility proxy"""
519 519 util.nouideprecwarn(
520 520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 521 )
522 522 return self.feature_config.censorable
523 523
524 524 @property
525 525 def _chunkcachesize(self):
526 526 """temporary compatibility proxy"""
527 527 util.nouideprecwarn(
528 528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 529 )
530 530 return self.data_config.chunk_cache_size
531 531
532 532 @property
533 533 def _maxchainlen(self):
534 534 """temporary compatibility proxy"""
535 535 util.nouideprecwarn(
536 536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 537 )
538 538 return self.delta_config.max_chain_len
539 539
540 540 @property
541 541 def _deltabothparents(self):
542 542 """temporary compatibility proxy"""
543 543 util.nouideprecwarn(
544 544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 545 )
546 546 return self.delta_config.delta_both_parents
547 547
548 548 @property
549 549 def _candidate_group_chunk_size(self):
550 550 """temporary compatibility proxy"""
551 551 util.nouideprecwarn(
552 552 b"use revlog.delta_config.candidate_group_chunk_size",
553 553 b"6.6",
554 554 stacklevel=2,
555 555 )
556 556 return self.delta_config.candidate_group_chunk_size
557 557
558 558 @property
559 559 def _debug_delta(self):
560 560 """temporary compatibility proxy"""
561 561 util.nouideprecwarn(
562 562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 563 )
564 564 return self.delta_config.debug_delta
565 565
566 566 @property
567 567 def _compengine(self):
568 568 """temporary compatibility proxy"""
569 569 util.nouideprecwarn(
570 570 b"use revlog.feature_config.compression_engine",
571 571 b"6.6",
572 572 stacklevel=2,
573 573 )
574 574 return self.feature_config.compression_engine
575 575
576 576 @property
577 577 def _compengineopts(self):
578 578 """temporary compatibility proxy"""
579 579 util.nouideprecwarn(
580 580 b"use revlog.feature_config.compression_engine_options",
581 581 b"6.6",
582 582 stacklevel=2,
583 583 )
584 584 return self.feature_config.compression_engine_options
585 585
586 586 @property
587 587 def _maxdeltachainspan(self):
588 588 """temporary compatibility proxy"""
589 589 util.nouideprecwarn(
590 590 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
591 591 )
592 592 return self.delta_config.max_deltachain_span
593 593
594 594 @property
595 595 def _withsparseread(self):
596 596 """temporary compatibility proxy"""
597 597 util.nouideprecwarn(
598 598 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
599 599 )
600 600 return self.data_config.with_sparse_read
601 601
602 602 @property
603 603 def _sparserevlog(self):
604 604 """temporary compatibility proxy"""
605 605 util.nouideprecwarn(
606 606 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
607 607 )
608 608 return self.delta_config.sparse_revlog
609 609
610 610 @property
611 611 def hassidedata(self):
612 612 """temporary compatibility proxy"""
613 613 util.nouideprecwarn(
614 614 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
615 615 )
616 616 return self.feature_config.has_side_data
617 617
618 618 @property
619 619 def _srdensitythreshold(self):
620 620 """temporary compatibility proxy"""
621 621 util.nouideprecwarn(
622 622 b"use revlog.data_config.sr_density_threshold",
623 623 b"6.6",
624 624 stacklevel=2,
625 625 )
626 626 return self.data_config.sr_density_threshold
627 627
628 628 @property
629 629 def _srmingapsize(self):
630 630 """temporary compatibility proxy"""
631 631 util.nouideprecwarn(
632 632 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
633 633 )
634 634 return self.data_config.sr_min_gap_size
635 635
636 636 @property
637 637 def _compute_rank(self):
638 638 """temporary compatibility proxy"""
639 639 util.nouideprecwarn(
640 640 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
641 641 )
642 642 return self.feature_config.compute_rank
643 643
644 644 @property
645 645 def canonical_parent_order(self):
646 646 """temporary compatibility proxy"""
647 647 util.nouideprecwarn(
648 648 b"use revlog.feature_config.canonical_parent_order",
649 649 b"6.6",
650 650 stacklevel=2,
651 651 )
652 652 return self.feature_config.canonical_parent_order
653 653
654 654 @property
655 655 def _lazydelta(self):
656 656 """temporary compatibility proxy"""
657 657 util.nouideprecwarn(
658 658 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
659 659 )
660 660 return self.delta_config.lazy_delta
661 661
662 662 @property
663 663 def _lazydeltabase(self):
664 664 """temporary compatibility proxy"""
665 665 util.nouideprecwarn(
666 666 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
667 667 )
668 668 return self.delta_config.lazy_delta_base
669 669
670 670 def _init_opts(self):
671 671 """process options (from above/config) to setup associated default revlog mode
672 672
673 673 These values might be affected when actually reading on disk information.
674 674
675 675 The relevant values are returned for use in _loadindex().
676 676
677 677 * newversionflags:
678 678 version header to use if we need to create a new revlog
679 679
680 680 * mmapindexthreshold:
681 681 minimal index size for start to use mmap
682 682
683 683 * force_nodemap:
684 684 force the usage of a "development" version of the nodemap code
685 685 """
686 686 opts = self.opener.options
687 687
688 688 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
689 689 new_header = CHANGELOGV2
690 690 compute_rank = opts.get(b'changelogv2.compute-rank', True)
691 691 self.feature_config.compute_rank = compute_rank
692 692 elif b'revlogv2' in opts:
693 693 new_header = REVLOGV2
694 694 elif b'revlogv1' in opts:
695 695 new_header = REVLOGV1 | FLAG_INLINE_DATA
696 696 if b'generaldelta' in opts:
697 697 new_header |= FLAG_GENERALDELTA
698 698 elif b'revlogv0' in self.opener.options:
699 699 new_header = REVLOGV0
700 700 else:
701 701 new_header = REVLOG_DEFAULT_VERSION
702 702
703 703 mmapindexthreshold = None
704 704 if self.data_config.mmap_large_index:
705 705 mmapindexthreshold = self.data_config.mmap_index_threshold
706 706 if self.feature_config.enable_ellipsis:
707 707 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
708 708
709 709 # revlog v0 doesn't have flag processors
710 710 for flag, processor in opts.get(b'flagprocessors', {}).items():
711 711 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
712 712
713 713 chunk_cache_size = self.data_config.chunk_cache_size
714 714 if chunk_cache_size <= 0:
715 715 raise error.RevlogError(
716 716 _(b'revlog chunk cache size %r is not greater than 0')
717 717 % chunk_cache_size
718 718 )
719 719 elif chunk_cache_size & (chunk_cache_size - 1):
720 720 raise error.RevlogError(
721 721 _(b'revlog chunk cache size %r is not a power of 2')
722 722 % chunk_cache_size
723 723 )
724 724 force_nodemap = opts.get(b'devel-force-nodemap', False)
725 725 return new_header, mmapindexthreshold, force_nodemap
726 726
727 727 def _get_data(self, filepath, mmap_threshold, size=None):
728 728 """return a file content with or without mmap
729 729
730 730 If the file is missing return the empty string"""
731 731 try:
732 732 with self.opener(filepath) as fp:
733 733 if mmap_threshold is not None:
734 734 file_size = self.opener.fstat(fp).st_size
735 735 if file_size >= mmap_threshold:
736 736 if size is not None:
737 737 # avoid potentiel mmap crash
738 738 size = min(file_size, size)
739 739 # TODO: should .close() to release resources without
740 740 # relying on Python GC
741 741 if size is None:
742 742 return util.buffer(util.mmapread(fp))
743 743 else:
744 744 return util.buffer(util.mmapread(fp, size))
745 745 if size is None:
746 746 return fp.read()
747 747 else:
748 748 return fp.read(size)
749 749 except FileNotFoundError:
750 750 return b''
751 751
752 752 def get_streams(self, max_linkrev, force_inline=False):
753 753 """return a list of streams that represent this revlog
754 754
755 755 This is used by stream-clone to do bytes to bytes copies of a repository.
756 756
757 757 This streams data for all revisions that refer to a changelog revision up
758 758 to `max_linkrev`.
759 759
760 760 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
761 761
762 762 It returns is a list of three-tuple:
763 763
764 764 [
765 765 (filename, bytes_stream, stream_size),
766 766 …
767 767 ]
768 768 """
769 769 n = len(self)
770 770 index = self.index
771 771 while n > 0:
772 772 linkrev = index[n - 1][4]
773 773 if linkrev < max_linkrev:
774 774 break
775 775 # note: this loop will rarely go through multiple iterations, since
776 776 # it only traverses commits created during the current streaming
777 777 # pull operation.
778 778 #
779 779 # If this become a problem, using a binary search should cap the
780 780 # runtime of this.
781 781 n = n - 1
782 782 if n == 0:
783 783 # no data to send
784 784 return []
785 785 index_size = n * index.entry_size
786 786 data_size = self.end(n - 1)
787 787
788 788 # XXX we might have been split (or stripped) since the object
789 789 # initialization, We need to close this race too, but having a way to
790 790 # pre-open the file we feed to the revlog and never closing them before
791 791 # we are done streaming.
792 792
793 793 if self._inline:
794 794
795 795 def get_stream():
796 with self._indexfp() as fp:
796 with self.opener(self._indexfile, mode=b"r") as fp:
797 797 yield None
798 798 size = index_size + data_size
799 799 if size <= 65536:
800 800 yield fp.read(size)
801 801 else:
802 802 yield from util.filechunkiter(fp, limit=size)
803 803
804 804 inline_stream = get_stream()
805 805 next(inline_stream)
806 806 return [
807 807 (self._indexfile, inline_stream, index_size + data_size),
808 808 ]
809 809 elif force_inline:
810 810
811 811 def get_stream():
812 812 with self.reading():
813 813 yield None
814 814
815 815 for rev in range(n):
816 816 idx = self.index.entry_binary(rev)
817 817 if rev == 0 and self._docket is None:
818 818 # re-inject the inline flag
819 819 header = self._format_flags
820 820 header |= self._format_version
821 821 header |= FLAG_INLINE_DATA
822 822 header = self.index.pack_header(header)
823 823 idx = header + idx
824 824 yield idx
825 825 yield self._getsegmentforrevs(rev, rev)[1]
826 826
827 827 inline_stream = get_stream()
828 828 next(inline_stream)
829 829 return [
830 830 (self._indexfile, inline_stream, index_size + data_size),
831 831 ]
832 832 else:
833 833
834 834 def get_index_stream():
835 with self._indexfp() as fp:
835 with self.opener(self._indexfile, mode=b"r") as fp:
836 836 yield None
837 837 if index_size <= 65536:
838 838 yield fp.read(index_size)
839 839 else:
840 840 yield from util.filechunkiter(fp, limit=index_size)
841 841
842 842 def get_data_stream():
843 843 with self._datafp() as fp:
844 844 yield None
845 845 if data_size <= 65536:
846 846 yield fp.read(data_size)
847 847 else:
848 848 yield from util.filechunkiter(fp, limit=data_size)
849 849
850 850 index_stream = get_index_stream()
851 851 next(index_stream)
852 852 data_stream = get_data_stream()
853 853 next(data_stream)
854 854 return [
855 855 (self._datafile, data_stream, data_size),
856 856 (self._indexfile, index_stream, index_size),
857 857 ]
858 858
859 859 def _loadindex(self, docket=None):
860 860
861 861 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
862 862
863 863 if self.postfix is not None:
864 864 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
865 865 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
866 866 entry_point = b'%s.i.a' % self.radix
867 867 elif self._try_split and self.opener.exists(self._split_index_file):
868 868 entry_point = self._split_index_file
869 869 else:
870 870 entry_point = b'%s.i' % self.radix
871 871
872 872 if docket is not None:
873 873 self._docket = docket
874 874 self._docket_file = entry_point
875 875 else:
876 876 self._initempty = True
877 877 entry_data = self._get_data(entry_point, mmapindexthreshold)
878 878 if len(entry_data) > 0:
879 879 header = INDEX_HEADER.unpack(entry_data[:4])[0]
880 880 self._initempty = False
881 881 else:
882 882 header = new_header
883 883
884 884 self._format_flags = header & ~0xFFFF
885 885 self._format_version = header & 0xFFFF
886 886
887 887 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
888 888 if supported_flags is None:
889 889 msg = _(b'unknown version (%d) in revlog %s')
890 890 msg %= (self._format_version, self.display_id)
891 891 raise error.RevlogError(msg)
892 892 elif self._format_flags & ~supported_flags:
893 893 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
894 894 display_flag = self._format_flags >> 16
895 895 msg %= (display_flag, self._format_version, self.display_id)
896 896 raise error.RevlogError(msg)
897 897
898 898 features = FEATURES_BY_VERSION[self._format_version]
899 899 self._inline = features[b'inline'](self._format_flags)
900 900 self.delta_config.general_delta = features[b'generaldelta'](
901 901 self._format_flags
902 902 )
903 903 self.feature_config.has_side_data = features[b'sidedata']
904 904
905 905 if not features[b'docket']:
906 906 self._indexfile = entry_point
907 907 index_data = entry_data
908 908 else:
909 909 self._docket_file = entry_point
910 910 if self._initempty:
911 911 self._docket = docketutil.default_docket(self, header)
912 912 else:
913 913 self._docket = docketutil.parse_docket(
914 914 self, entry_data, use_pending=self._trypending
915 915 )
916 916
917 917 if self._docket is not None:
918 918 self._indexfile = self._docket.index_filepath()
919 919 index_data = b''
920 920 index_size = self._docket.index_end
921 921 if index_size > 0:
922 922 index_data = self._get_data(
923 923 self._indexfile, mmapindexthreshold, size=index_size
924 924 )
925 925 if len(index_data) < index_size:
926 926 msg = _(b'too few index data for %s: got %d, expected %d')
927 927 msg %= (self.display_id, len(index_data), index_size)
928 928 raise error.RevlogError(msg)
929 929
930 930 self._inline = False
931 931 # generaldelta implied by version 2 revlogs.
932 932 self.delta_config.general_delta = True
933 933 # the logic for persistent nodemap will be dealt with within the
934 934 # main docket, so disable it for now.
935 935 self._nodemap_file = None
936 936
937 937 if self._docket is not None:
938 938 self._datafile = self._docket.data_filepath()
939 939 self._sidedatafile = self._docket.sidedata_filepath()
940 940 elif self.postfix is None:
941 941 self._datafile = b'%s.d' % self.radix
942 942 else:
943 943 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
944 944
945 945 self.nodeconstants = sha1nodeconstants
946 946 self.nullid = self.nodeconstants.nullid
947 947
948 948 # sparse-revlog can't be on without general-delta (issue6056)
949 949 if not self.delta_config.general_delta:
950 950 self.delta_config.sparse_revlog = False
951 951
952 952 self._storedeltachains = True
953 953
954 954 devel_nodemap = (
955 955 self._nodemap_file
956 956 and force_nodemap
957 957 and parse_index_v1_nodemap is not None
958 958 )
959 959
960 960 use_rust_index = False
961 961 if rustrevlog is not None:
962 962 if self._nodemap_file is not None:
963 963 use_rust_index = True
964 964 else:
965 965 use_rust_index = self.opener.options.get(b'rust.index')
966 966
967 967 self._parse_index = parse_index_v1
968 968 if self._format_version == REVLOGV0:
969 969 self._parse_index = revlogv0.parse_index_v0
970 970 elif self._format_version == REVLOGV2:
971 971 self._parse_index = parse_index_v2
972 972 elif self._format_version == CHANGELOGV2:
973 973 self._parse_index = parse_index_cl_v2
974 974 elif devel_nodemap:
975 975 self._parse_index = parse_index_v1_nodemap
976 976 elif use_rust_index:
977 977 self._parse_index = parse_index_v1_mixed
978 978 try:
979 979 d = self._parse_index(index_data, self._inline)
980 980 index, chunkcache = d
981 981 use_nodemap = (
982 982 not self._inline
983 983 and self._nodemap_file is not None
984 984 and hasattr(index, 'update_nodemap_data')
985 985 )
986 986 if use_nodemap:
987 987 nodemap_data = nodemaputil.persisted_data(self)
988 988 if nodemap_data is not None:
989 989 docket = nodemap_data[0]
990 990 if (
991 991 len(d[0]) > docket.tip_rev
992 992 and d[0][docket.tip_rev][7] == docket.tip_node
993 993 ):
994 994 # no changelog tampering
995 995 self._nodemap_docket = docket
996 996 index.update_nodemap_data(*nodemap_data)
997 997 except (ValueError, IndexError):
998 998 raise error.RevlogError(
999 999 _(b"index %s is corrupted") % self.display_id
1000 1000 )
1001 1001 self.index = index
1002 1002 self._segmentfile = randomaccessfile.randomaccessfile(
1003 1003 self.opener,
1004 1004 (self._indexfile if self._inline else self._datafile),
1005 1005 self.data_config.chunk_cache_size,
1006 1006 chunkcache,
1007 1007 )
1008 1008 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1009 1009 self.opener,
1010 1010 self._sidedatafile,
1011 1011 self.data_config.chunk_cache_size,
1012 1012 )
1013 1013 # revnum -> (chain-length, sum-delta-length)
1014 1014 self._chaininfocache = util.lrucachedict(500)
1015 1015 # revlog header -> revlog compressor
1016 1016 self._decompressors = {}
1017 1017
1018 1018 def get_revlog(self):
1019 1019 """simple function to mirror API of other not-really-revlog API"""
1020 1020 return self
1021 1021
1022 1022 @util.propertycache
1023 1023 def revlog_kind(self):
1024 1024 return self.target[0]
1025 1025
1026 1026 @util.propertycache
1027 1027 def display_id(self):
1028 1028 """The public facing "ID" of the revlog that we use in message"""
1029 1029 if self.revlog_kind == KIND_FILELOG:
1030 1030 # Reference the file without the "data/" prefix, so it is familiar
1031 1031 # to the user.
1032 1032 return self.target[1]
1033 1033 else:
1034 1034 return self.radix
1035 1035
1036 1036 def _get_decompressor(self, t):
1037 1037 try:
1038 1038 compressor = self._decompressors[t]
1039 1039 except KeyError:
1040 1040 try:
1041 1041 engine = util.compengines.forrevlogheader(t)
1042 1042 compressor = engine.revlogcompressor(
1043 1043 self.feature_config.compression_engine_options
1044 1044 )
1045 1045 self._decompressors[t] = compressor
1046 1046 except KeyError:
1047 1047 raise error.RevlogError(
1048 1048 _(b'unknown compression type %s') % binascii.hexlify(t)
1049 1049 )
1050 1050 return compressor
1051 1051
1052 1052 @util.propertycache
1053 1053 def _compressor(self):
1054 1054 engine = util.compengines[self.feature_config.compression_engine]
1055 1055 return engine.revlogcompressor(
1056 1056 self.feature_config.compression_engine_options
1057 1057 )
1058 1058
1059 1059 @util.propertycache
1060 1060 def _decompressor(self):
1061 1061 """the default decompressor"""
1062 1062 if self._docket is None:
1063 1063 return None
1064 1064 t = self._docket.default_compression_header
1065 1065 c = self._get_decompressor(t)
1066 1066 return c.decompress
1067 1067
1068 def _indexfp(self):
1069 """file object for the revlog's index file"""
1070 return self.opener(self._indexfile, mode=b"r")
1071
1072 1068 def __index_write_fp(self):
1073 1069 # You should not use this directly and use `_writing` instead
1074 1070 try:
1075 1071 f = self.opener(
1076 1072 self._indexfile,
1077 1073 mode=b"r+",
1078 1074 checkambig=self.data_config.check_ambig,
1079 1075 )
1080 1076 if self._docket is None:
1081 1077 f.seek(0, os.SEEK_END)
1082 1078 else:
1083 1079 f.seek(self._docket.index_end, os.SEEK_SET)
1084 1080 return f
1085 1081 except FileNotFoundError:
1086 1082 return self.opener(
1087 1083 self._indexfile,
1088 1084 mode=b"w+",
1089 1085 checkambig=self.data_config.check_ambig,
1090 1086 )
1091 1087
1092 1088 def __index_new_fp(self):
1093 1089 # You should not use this unless you are upgrading from inline revlog
1094 1090 return self.opener(
1095 1091 self._indexfile,
1096 1092 mode=b"w",
1097 1093 checkambig=self.data_config.check_ambig,
1098 1094 atomictemp=True,
1099 1095 )
1100 1096
1101 1097 def _datafp(self, mode=b'r'):
1102 1098 """file object for the revlog's data file"""
1103 1099 return self.opener(self._datafile, mode=mode)
1104 1100
1105 1101 @contextlib.contextmanager
1106 1102 def _sidedatareadfp(self):
1107 1103 """file object suitable to read sidedata"""
1108 1104 if self._writinghandles:
1109 1105 yield self._writinghandles[2]
1110 1106 else:
1111 1107 with self.opener(self._sidedatafile) as fp:
1112 1108 yield fp
1113 1109
1114 1110 def tiprev(self):
1115 1111 return len(self.index) - 1
1116 1112
1117 1113 def tip(self):
1118 1114 return self.node(self.tiprev())
1119 1115
1120 1116 def __contains__(self, rev):
1121 1117 return 0 <= rev < len(self)
1122 1118
1123 1119 def __len__(self):
1124 1120 return len(self.index)
1125 1121
1126 1122 def __iter__(self):
1127 1123 return iter(range(len(self)))
1128 1124
1129 1125 def revs(self, start=0, stop=None):
1130 1126 """iterate over all rev in this revlog (from start to stop)"""
1131 1127 return storageutil.iterrevs(len(self), start=start, stop=stop)
1132 1128
1133 1129 def hasnode(self, node):
1134 1130 try:
1135 1131 self.rev(node)
1136 1132 return True
1137 1133 except KeyError:
1138 1134 return False
1139 1135
1140 1136 def _candelta(self, baserev, rev):
1141 1137 """whether two revisions (baserev, rev) can be delta-ed or not"""
1142 1138 # Disable delta if either rev requires a content-changing flag
1143 1139 # processor (ex. LFS). This is because such flag processor can alter
1144 1140 # the rawtext content that the delta will be based on, and two clients
1145 1141 # could have a same revlog node with different flags (i.e. different
1146 1142 # rawtext contents) and the delta could be incompatible.
1147 1143 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1148 1144 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1149 1145 ):
1150 1146 return False
1151 1147 return True
1152 1148
1153 1149 def update_caches(self, transaction):
1154 1150 """update on disk cache
1155 1151
1156 1152 If a transaction is passed, the update may be delayed to transaction
1157 1153 commit."""
1158 1154 if self._nodemap_file is not None:
1159 1155 if transaction is None:
1160 1156 nodemaputil.update_persistent_nodemap(self)
1161 1157 else:
1162 1158 nodemaputil.setup_persistent_nodemap(transaction, self)
1163 1159
1164 1160 def clearcaches(self):
1165 1161 """Clear in-memory caches"""
1166 1162 self._revisioncache = None
1167 1163 self._chainbasecache.clear()
1168 1164 self._segmentfile.clear_cache()
1169 1165 self._segmentfile_sidedata.clear_cache()
1170 1166 self._pcache = {}
1171 1167 self._nodemap_docket = None
1172 1168 self.index.clearcaches()
1173 1169 # The python code is the one responsible for validating the docket, we
1174 1170 # end up having to refresh it here.
1175 1171 use_nodemap = (
1176 1172 not self._inline
1177 1173 and self._nodemap_file is not None
1178 1174 and hasattr(self.index, 'update_nodemap_data')
1179 1175 )
1180 1176 if use_nodemap:
1181 1177 nodemap_data = nodemaputil.persisted_data(self)
1182 1178 if nodemap_data is not None:
1183 1179 self._nodemap_docket = nodemap_data[0]
1184 1180 self.index.update_nodemap_data(*nodemap_data)
1185 1181
1186 1182 def rev(self, node):
1187 1183 """return the revision number associated with a <nodeid>"""
1188 1184 try:
1189 1185 return self.index.rev(node)
1190 1186 except TypeError:
1191 1187 raise
1192 1188 except error.RevlogError:
1193 1189 # parsers.c radix tree lookup failed
1194 1190 if (
1195 1191 node == self.nodeconstants.wdirid
1196 1192 or node in self.nodeconstants.wdirfilenodeids
1197 1193 ):
1198 1194 raise error.WdirUnsupported
1199 1195 raise error.LookupError(node, self.display_id, _(b'no node'))
1200 1196
1201 1197 # Accessors for index entries.
1202 1198
1203 1199 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 1200 # are flags.
1205 1201 def start(self, rev):
1206 1202 return int(self.index[rev][0] >> 16)
1207 1203
1208 1204 def sidedata_cut_off(self, rev):
1209 1205 sd_cut_off = self.index[rev][8]
1210 1206 if sd_cut_off != 0:
1211 1207 return sd_cut_off
1212 1208 # This is some annoying dance, because entries without sidedata
1213 1209 # currently use 0 as their ofsset. (instead of previous-offset +
1214 1210 # previous-size)
1215 1211 #
1216 1212 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1217 1213 # In the meantime, we need this.
1218 1214 while 0 <= rev:
1219 1215 e = self.index[rev]
1220 1216 if e[9] != 0:
1221 1217 return e[8] + e[9]
1222 1218 rev -= 1
1223 1219 return 0
1224 1220
1225 1221 def flags(self, rev):
1226 1222 return self.index[rev][0] & 0xFFFF
1227 1223
1228 1224 def length(self, rev):
1229 1225 return self.index[rev][1]
1230 1226
1231 1227 def sidedata_length(self, rev):
1232 1228 if not self.feature_config.has_side_data:
1233 1229 return 0
1234 1230 return self.index[rev][9]
1235 1231
1236 1232 def rawsize(self, rev):
1237 1233 """return the length of the uncompressed text for a given revision"""
1238 1234 l = self.index[rev][2]
1239 1235 if l >= 0:
1240 1236 return l
1241 1237
1242 1238 t = self.rawdata(rev)
1243 1239 return len(t)
1244 1240
1245 1241 def size(self, rev):
1246 1242 """length of non-raw text (processed by a "read" flag processor)"""
1247 1243 # fast path: if no "read" flag processor could change the content,
1248 1244 # size is rawsize. note: ELLIPSIS is known to not change the content.
1249 1245 flags = self.flags(rev)
1250 1246 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1251 1247 return self.rawsize(rev)
1252 1248
1253 1249 return len(self.revision(rev))
1254 1250
1255 1251 def fast_rank(self, rev):
1256 1252 """Return the rank of a revision if already known, or None otherwise.
1257 1253
1258 1254 The rank of a revision is the size of the sub-graph it defines as a
1259 1255 head. Equivalently, the rank of a revision `r` is the size of the set
1260 1256 `ancestors(r)`, `r` included.
1261 1257
1262 1258 This method returns the rank retrieved from the revlog in constant
1263 1259 time. It makes no attempt at computing unknown values for versions of
1264 1260 the revlog which do not persist the rank.
1265 1261 """
1266 1262 rank = self.index[rev][ENTRY_RANK]
1267 1263 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1268 1264 return None
1269 1265 if rev == nullrev:
1270 1266 return 0 # convention
1271 1267 return rank
1272 1268
1273 1269 def chainbase(self, rev):
1274 1270 base = self._chainbasecache.get(rev)
1275 1271 if base is not None:
1276 1272 return base
1277 1273
1278 1274 index = self.index
1279 1275 iterrev = rev
1280 1276 base = index[iterrev][3]
1281 1277 while base != iterrev:
1282 1278 iterrev = base
1283 1279 base = index[iterrev][3]
1284 1280
1285 1281 self._chainbasecache[rev] = base
1286 1282 return base
1287 1283
1288 1284 def linkrev(self, rev):
1289 1285 return self.index[rev][4]
1290 1286
1291 1287 def parentrevs(self, rev):
1292 1288 try:
1293 1289 entry = self.index[rev]
1294 1290 except IndexError:
1295 1291 if rev == wdirrev:
1296 1292 raise error.WdirUnsupported
1297 1293 raise
1298 1294
1299 1295 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1300 1296 return entry[6], entry[5]
1301 1297 else:
1302 1298 return entry[5], entry[6]
1303 1299
1304 1300 # fast parentrevs(rev) where rev isn't filtered
1305 1301 _uncheckedparentrevs = parentrevs
1306 1302
1307 1303 def node(self, rev):
1308 1304 try:
1309 1305 return self.index[rev][7]
1310 1306 except IndexError:
1311 1307 if rev == wdirrev:
1312 1308 raise error.WdirUnsupported
1313 1309 raise
1314 1310
1315 1311 # Derived from index values.
1316 1312
1317 1313 def end(self, rev):
1318 1314 return self.start(rev) + self.length(rev)
1319 1315
1320 1316 def parents(self, node):
1321 1317 i = self.index
1322 1318 d = i[self.rev(node)]
1323 1319 # inline node() to avoid function call overhead
1324 1320 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1325 1321 return i[d[6]][7], i[d[5]][7]
1326 1322 else:
1327 1323 return i[d[5]][7], i[d[6]][7]
1328 1324
1329 1325 def chainlen(self, rev):
1330 1326 return self._chaininfo(rev)[0]
1331 1327
1332 1328 def _chaininfo(self, rev):
1333 1329 chaininfocache = self._chaininfocache
1334 1330 if rev in chaininfocache:
1335 1331 return chaininfocache[rev]
1336 1332 index = self.index
1337 1333 generaldelta = self.delta_config.general_delta
1338 1334 iterrev = rev
1339 1335 e = index[iterrev]
1340 1336 clen = 0
1341 1337 compresseddeltalen = 0
1342 1338 while iterrev != e[3]:
1343 1339 clen += 1
1344 1340 compresseddeltalen += e[1]
1345 1341 if generaldelta:
1346 1342 iterrev = e[3]
1347 1343 else:
1348 1344 iterrev -= 1
1349 1345 if iterrev in chaininfocache:
1350 1346 t = chaininfocache[iterrev]
1351 1347 clen += t[0]
1352 1348 compresseddeltalen += t[1]
1353 1349 break
1354 1350 e = index[iterrev]
1355 1351 else:
1356 1352 # Add text length of base since decompressing that also takes
1357 1353 # work. For cache hits the length is already included.
1358 1354 compresseddeltalen += e[1]
1359 1355 r = (clen, compresseddeltalen)
1360 1356 chaininfocache[rev] = r
1361 1357 return r
1362 1358
1363 1359 def _deltachain(self, rev, stoprev=None):
1364 1360 """Obtain the delta chain for a revision.
1365 1361
1366 1362 ``stoprev`` specifies a revision to stop at. If not specified, we
1367 1363 stop at the base of the chain.
1368 1364
1369 1365 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1370 1366 revs in ascending order and ``stopped`` is a bool indicating whether
1371 1367 ``stoprev`` was hit.
1372 1368 """
1373 1369 generaldelta = self.delta_config.general_delta
1374 1370 # Try C implementation.
1375 1371 try:
1376 1372 return self.index.deltachain(rev, stoprev, generaldelta)
1377 1373 except AttributeError:
1378 1374 pass
1379 1375
1380 1376 chain = []
1381 1377
1382 1378 # Alias to prevent attribute lookup in tight loop.
1383 1379 index = self.index
1384 1380
1385 1381 iterrev = rev
1386 1382 e = index[iterrev]
1387 1383 while iterrev != e[3] and iterrev != stoprev:
1388 1384 chain.append(iterrev)
1389 1385 if generaldelta:
1390 1386 iterrev = e[3]
1391 1387 else:
1392 1388 iterrev -= 1
1393 1389 e = index[iterrev]
1394 1390
1395 1391 if iterrev == stoprev:
1396 1392 stopped = True
1397 1393 else:
1398 1394 chain.append(iterrev)
1399 1395 stopped = False
1400 1396
1401 1397 chain.reverse()
1402 1398 return chain, stopped
1403 1399
1404 1400 def ancestors(self, revs, stoprev=0, inclusive=False):
1405 1401 """Generate the ancestors of 'revs' in reverse revision order.
1406 1402 Does not generate revs lower than stoprev.
1407 1403
1408 1404 See the documentation for ancestor.lazyancestors for more details."""
1409 1405
1410 1406 # first, make sure start revisions aren't filtered
1411 1407 revs = list(revs)
1412 1408 checkrev = self.node
1413 1409 for r in revs:
1414 1410 checkrev(r)
1415 1411 # and we're sure ancestors aren't filtered as well
1416 1412
1417 1413 if rustancestor is not None and self.index.rust_ext_compat:
1418 1414 lazyancestors = rustancestor.LazyAncestors
1419 1415 arg = self.index
1420 1416 else:
1421 1417 lazyancestors = ancestor.lazyancestors
1422 1418 arg = self._uncheckedparentrevs
1423 1419 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1424 1420
1425 1421 def descendants(self, revs):
1426 1422 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1427 1423
1428 1424 def findcommonmissing(self, common=None, heads=None):
1429 1425 """Return a tuple of the ancestors of common and the ancestors of heads
1430 1426 that are not ancestors of common. In revset terminology, we return the
1431 1427 tuple:
1432 1428
1433 1429 ::common, (::heads) - (::common)
1434 1430
1435 1431 The list is sorted by revision number, meaning it is
1436 1432 topologically sorted.
1437 1433
1438 1434 'heads' and 'common' are both lists of node IDs. If heads is
1439 1435 not supplied, uses all of the revlog's heads. If common is not
1440 1436 supplied, uses nullid."""
1441 1437 if common is None:
1442 1438 common = [self.nullid]
1443 1439 if heads is None:
1444 1440 heads = self.heads()
1445 1441
1446 1442 common = [self.rev(n) for n in common]
1447 1443 heads = [self.rev(n) for n in heads]
1448 1444
1449 1445 # we want the ancestors, but inclusive
1450 1446 class lazyset:
1451 1447 def __init__(self, lazyvalues):
1452 1448 self.addedvalues = set()
1453 1449 self.lazyvalues = lazyvalues
1454 1450
1455 1451 def __contains__(self, value):
1456 1452 return value in self.addedvalues or value in self.lazyvalues
1457 1453
1458 1454 def __iter__(self):
1459 1455 added = self.addedvalues
1460 1456 for r in added:
1461 1457 yield r
1462 1458 for r in self.lazyvalues:
1463 1459 if not r in added:
1464 1460 yield r
1465 1461
1466 1462 def add(self, value):
1467 1463 self.addedvalues.add(value)
1468 1464
1469 1465 def update(self, values):
1470 1466 self.addedvalues.update(values)
1471 1467
1472 1468 has = lazyset(self.ancestors(common))
1473 1469 has.add(nullrev)
1474 1470 has.update(common)
1475 1471
1476 1472 # take all ancestors from heads that aren't in has
1477 1473 missing = set()
1478 1474 visit = collections.deque(r for r in heads if r not in has)
1479 1475 while visit:
1480 1476 r = visit.popleft()
1481 1477 if r in missing:
1482 1478 continue
1483 1479 else:
1484 1480 missing.add(r)
1485 1481 for p in self.parentrevs(r):
1486 1482 if p not in has:
1487 1483 visit.append(p)
1488 1484 missing = list(missing)
1489 1485 missing.sort()
1490 1486 return has, [self.node(miss) for miss in missing]
1491 1487
1492 1488 def incrementalmissingrevs(self, common=None):
1493 1489 """Return an object that can be used to incrementally compute the
1494 1490 revision numbers of the ancestors of arbitrary sets that are not
1495 1491 ancestors of common. This is an ancestor.incrementalmissingancestors
1496 1492 object.
1497 1493
1498 1494 'common' is a list of revision numbers. If common is not supplied, uses
1499 1495 nullrev.
1500 1496 """
1501 1497 if common is None:
1502 1498 common = [nullrev]
1503 1499
1504 1500 if rustancestor is not None and self.index.rust_ext_compat:
1505 1501 return rustancestor.MissingAncestors(self.index, common)
1506 1502 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1507 1503
1508 1504 def findmissingrevs(self, common=None, heads=None):
1509 1505 """Return the revision numbers of the ancestors of heads that
1510 1506 are not ancestors of common.
1511 1507
1512 1508 More specifically, return a list of revision numbers corresponding to
1513 1509 nodes N such that every N satisfies the following constraints:
1514 1510
1515 1511 1. N is an ancestor of some node in 'heads'
1516 1512 2. N is not an ancestor of any node in 'common'
1517 1513
1518 1514 The list is sorted by revision number, meaning it is
1519 1515 topologically sorted.
1520 1516
1521 1517 'heads' and 'common' are both lists of revision numbers. If heads is
1522 1518 not supplied, uses all of the revlog's heads. If common is not
1523 1519 supplied, uses nullid."""
1524 1520 if common is None:
1525 1521 common = [nullrev]
1526 1522 if heads is None:
1527 1523 heads = self.headrevs()
1528 1524
1529 1525 inc = self.incrementalmissingrevs(common=common)
1530 1526 return inc.missingancestors(heads)
1531 1527
1532 1528 def findmissing(self, common=None, heads=None):
1533 1529 """Return the ancestors of heads that are not ancestors of common.
1534 1530
1535 1531 More specifically, return a list of nodes N such that every N
1536 1532 satisfies the following constraints:
1537 1533
1538 1534 1. N is an ancestor of some node in 'heads'
1539 1535 2. N is not an ancestor of any node in 'common'
1540 1536
1541 1537 The list is sorted by revision number, meaning it is
1542 1538 topologically sorted.
1543 1539
1544 1540 'heads' and 'common' are both lists of node IDs. If heads is
1545 1541 not supplied, uses all of the revlog's heads. If common is not
1546 1542 supplied, uses nullid."""
1547 1543 if common is None:
1548 1544 common = [self.nullid]
1549 1545 if heads is None:
1550 1546 heads = self.heads()
1551 1547
1552 1548 common = [self.rev(n) for n in common]
1553 1549 heads = [self.rev(n) for n in heads]
1554 1550
1555 1551 inc = self.incrementalmissingrevs(common=common)
1556 1552 return [self.node(r) for r in inc.missingancestors(heads)]
1557 1553
1558 1554 def nodesbetween(self, roots=None, heads=None):
1559 1555 """Return a topological path from 'roots' to 'heads'.
1560 1556
1561 1557 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1562 1558 topologically sorted list of all nodes N that satisfy both of
1563 1559 these constraints:
1564 1560
1565 1561 1. N is a descendant of some node in 'roots'
1566 1562 2. N is an ancestor of some node in 'heads'
1567 1563
1568 1564 Every node is considered to be both a descendant and an ancestor
1569 1565 of itself, so every reachable node in 'roots' and 'heads' will be
1570 1566 included in 'nodes'.
1571 1567
1572 1568 'outroots' is the list of reachable nodes in 'roots', i.e., the
1573 1569 subset of 'roots' that is returned in 'nodes'. Likewise,
1574 1570 'outheads' is the subset of 'heads' that is also in 'nodes'.
1575 1571
1576 1572 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1577 1573 unspecified, uses nullid as the only root. If 'heads' is
1578 1574 unspecified, uses list of all of the revlog's heads."""
1579 1575 nonodes = ([], [], [])
1580 1576 if roots is not None:
1581 1577 roots = list(roots)
1582 1578 if not roots:
1583 1579 return nonodes
1584 1580 lowestrev = min([self.rev(n) for n in roots])
1585 1581 else:
1586 1582 roots = [self.nullid] # Everybody's a descendant of nullid
1587 1583 lowestrev = nullrev
1588 1584 if (lowestrev == nullrev) and (heads is None):
1589 1585 # We want _all_ the nodes!
1590 1586 return (
1591 1587 [self.node(r) for r in self],
1592 1588 [self.nullid],
1593 1589 list(self.heads()),
1594 1590 )
1595 1591 if heads is None:
1596 1592 # All nodes are ancestors, so the latest ancestor is the last
1597 1593 # node.
1598 1594 highestrev = len(self) - 1
1599 1595 # Set ancestors to None to signal that every node is an ancestor.
1600 1596 ancestors = None
1601 1597 # Set heads to an empty dictionary for later discovery of heads
1602 1598 heads = {}
1603 1599 else:
1604 1600 heads = list(heads)
1605 1601 if not heads:
1606 1602 return nonodes
1607 1603 ancestors = set()
1608 1604 # Turn heads into a dictionary so we can remove 'fake' heads.
1609 1605 # Also, later we will be using it to filter out the heads we can't
1610 1606 # find from roots.
1611 1607 heads = dict.fromkeys(heads, False)
1612 1608 # Start at the top and keep marking parents until we're done.
1613 1609 nodestotag = set(heads)
1614 1610 # Remember where the top was so we can use it as a limit later.
1615 1611 highestrev = max([self.rev(n) for n in nodestotag])
1616 1612 while nodestotag:
1617 1613 # grab a node to tag
1618 1614 n = nodestotag.pop()
1619 1615 # Never tag nullid
1620 1616 if n == self.nullid:
1621 1617 continue
1622 1618 # A node's revision number represents its place in a
1623 1619 # topologically sorted list of nodes.
1624 1620 r = self.rev(n)
1625 1621 if r >= lowestrev:
1626 1622 if n not in ancestors:
1627 1623 # If we are possibly a descendant of one of the roots
1628 1624 # and we haven't already been marked as an ancestor
1629 1625 ancestors.add(n) # Mark as ancestor
1630 1626 # Add non-nullid parents to list of nodes to tag.
1631 1627 nodestotag.update(
1632 1628 [p for p in self.parents(n) if p != self.nullid]
1633 1629 )
1634 1630 elif n in heads: # We've seen it before, is it a fake head?
1635 1631 # So it is, real heads should not be the ancestors of
1636 1632 # any other heads.
1637 1633 heads.pop(n)
1638 1634 if not ancestors:
1639 1635 return nonodes
1640 1636 # Now that we have our set of ancestors, we want to remove any
1641 1637 # roots that are not ancestors.
1642 1638
1643 1639 # If one of the roots was nullid, everything is included anyway.
1644 1640 if lowestrev > nullrev:
1645 1641 # But, since we weren't, let's recompute the lowest rev to not
1646 1642 # include roots that aren't ancestors.
1647 1643
1648 1644 # Filter out roots that aren't ancestors of heads
1649 1645 roots = [root for root in roots if root in ancestors]
1650 1646 # Recompute the lowest revision
1651 1647 if roots:
1652 1648 lowestrev = min([self.rev(root) for root in roots])
1653 1649 else:
1654 1650 # No more roots? Return empty list
1655 1651 return nonodes
1656 1652 else:
1657 1653 # We are descending from nullid, and don't need to care about
1658 1654 # any other roots.
1659 1655 lowestrev = nullrev
1660 1656 roots = [self.nullid]
1661 1657 # Transform our roots list into a set.
1662 1658 descendants = set(roots)
1663 1659 # Also, keep the original roots so we can filter out roots that aren't
1664 1660 # 'real' roots (i.e. are descended from other roots).
1665 1661 roots = descendants.copy()
1666 1662 # Our topologically sorted list of output nodes.
1667 1663 orderedout = []
1668 1664 # Don't start at nullid since we don't want nullid in our output list,
1669 1665 # and if nullid shows up in descendants, empty parents will look like
1670 1666 # they're descendants.
1671 1667 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1672 1668 n = self.node(r)
1673 1669 isdescendant = False
1674 1670 if lowestrev == nullrev: # Everybody is a descendant of nullid
1675 1671 isdescendant = True
1676 1672 elif n in descendants:
1677 1673 # n is already a descendant
1678 1674 isdescendant = True
1679 1675 # This check only needs to be done here because all the roots
1680 1676 # will start being marked is descendants before the loop.
1681 1677 if n in roots:
1682 1678 # If n was a root, check if it's a 'real' root.
1683 1679 p = tuple(self.parents(n))
1684 1680 # If any of its parents are descendants, it's not a root.
1685 1681 if (p[0] in descendants) or (p[1] in descendants):
1686 1682 roots.remove(n)
1687 1683 else:
1688 1684 p = tuple(self.parents(n))
1689 1685 # A node is a descendant if either of its parents are
1690 1686 # descendants. (We seeded the dependents list with the roots
1691 1687 # up there, remember?)
1692 1688 if (p[0] in descendants) or (p[1] in descendants):
1693 1689 descendants.add(n)
1694 1690 isdescendant = True
1695 1691 if isdescendant and ((ancestors is None) or (n in ancestors)):
1696 1692 # Only include nodes that are both descendants and ancestors.
1697 1693 orderedout.append(n)
1698 1694 if (ancestors is not None) and (n in heads):
1699 1695 # We're trying to figure out which heads are reachable
1700 1696 # from roots.
1701 1697 # Mark this head as having been reached
1702 1698 heads[n] = True
1703 1699 elif ancestors is None:
1704 1700 # Otherwise, we're trying to discover the heads.
1705 1701 # Assume this is a head because if it isn't, the next step
1706 1702 # will eventually remove it.
1707 1703 heads[n] = True
1708 1704 # But, obviously its parents aren't.
1709 1705 for p in self.parents(n):
1710 1706 heads.pop(p, None)
1711 1707 heads = [head for head, flag in heads.items() if flag]
1712 1708 roots = list(roots)
1713 1709 assert orderedout
1714 1710 assert roots
1715 1711 assert heads
1716 1712 return (orderedout, roots, heads)
1717 1713
1718 1714 def headrevs(self, revs=None):
1719 1715 if revs is None:
1720 1716 try:
1721 1717 return self.index.headrevs()
1722 1718 except AttributeError:
1723 1719 return self._headrevs()
1724 1720 if rustdagop is not None and self.index.rust_ext_compat:
1725 1721 return rustdagop.headrevs(self.index, revs)
1726 1722 return dagop.headrevs(revs, self._uncheckedparentrevs)
1727 1723
1728 1724 def computephases(self, roots):
1729 1725 return self.index.computephasesmapsets(roots)
1730 1726
1731 1727 def _headrevs(self):
1732 1728 count = len(self)
1733 1729 if not count:
1734 1730 return [nullrev]
1735 1731 # we won't iter over filtered rev so nobody is a head at start
1736 1732 ishead = [0] * (count + 1)
1737 1733 index = self.index
1738 1734 for r in self:
1739 1735 ishead[r] = 1 # I may be an head
1740 1736 e = index[r]
1741 1737 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1742 1738 return [r for r, val in enumerate(ishead) if val]
1743 1739
1744 1740 def heads(self, start=None, stop=None):
1745 1741 """return the list of all nodes that have no children
1746 1742
1747 1743 if start is specified, only heads that are descendants of
1748 1744 start will be returned
1749 1745 if stop is specified, it will consider all the revs from stop
1750 1746 as if they had no children
1751 1747 """
1752 1748 if start is None and stop is None:
1753 1749 if not len(self):
1754 1750 return [self.nullid]
1755 1751 return [self.node(r) for r in self.headrevs()]
1756 1752
1757 1753 if start is None:
1758 1754 start = nullrev
1759 1755 else:
1760 1756 start = self.rev(start)
1761 1757
1762 1758 stoprevs = {self.rev(n) for n in stop or []}
1763 1759
1764 1760 revs = dagop.headrevssubset(
1765 1761 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1766 1762 )
1767 1763
1768 1764 return [self.node(rev) for rev in revs]
1769 1765
1770 1766 def children(self, node):
1771 1767 """find the children of a given node"""
1772 1768 c = []
1773 1769 p = self.rev(node)
1774 1770 for r in self.revs(start=p + 1):
1775 1771 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1776 1772 if prevs:
1777 1773 for pr in prevs:
1778 1774 if pr == p:
1779 1775 c.append(self.node(r))
1780 1776 elif p == nullrev:
1781 1777 c.append(self.node(r))
1782 1778 return c
1783 1779
1784 1780 def commonancestorsheads(self, a, b):
1785 1781 """calculate all the heads of the common ancestors of nodes a and b"""
1786 1782 a, b = self.rev(a), self.rev(b)
1787 1783 ancs = self._commonancestorsheads(a, b)
1788 1784 return pycompat.maplist(self.node, ancs)
1789 1785
1790 1786 def _commonancestorsheads(self, *revs):
1791 1787 """calculate all the heads of the common ancestors of revs"""
1792 1788 try:
1793 1789 ancs = self.index.commonancestorsheads(*revs)
1794 1790 except (AttributeError, OverflowError): # C implementation failed
1795 1791 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1796 1792 return ancs
1797 1793
1798 1794 def isancestor(self, a, b):
1799 1795 """return True if node a is an ancestor of node b
1800 1796
1801 1797 A revision is considered an ancestor of itself."""
1802 1798 a, b = self.rev(a), self.rev(b)
1803 1799 return self.isancestorrev(a, b)
1804 1800
1805 1801 def isancestorrev(self, a, b):
1806 1802 """return True if revision a is an ancestor of revision b
1807 1803
1808 1804 A revision is considered an ancestor of itself.
1809 1805
1810 1806 The implementation of this is trivial but the use of
1811 1807 reachableroots is not."""
1812 1808 if a == nullrev:
1813 1809 return True
1814 1810 elif a == b:
1815 1811 return True
1816 1812 elif a > b:
1817 1813 return False
1818 1814 return bool(self.reachableroots(a, [b], [a], includepath=False))
1819 1815
1820 1816 def reachableroots(self, minroot, heads, roots, includepath=False):
1821 1817 """return (heads(::(<roots> and <roots>::<heads>)))
1822 1818
1823 1819 If includepath is True, return (<roots>::<heads>)."""
1824 1820 try:
1825 1821 return self.index.reachableroots2(
1826 1822 minroot, heads, roots, includepath
1827 1823 )
1828 1824 except AttributeError:
1829 1825 return dagop._reachablerootspure(
1830 1826 self.parentrevs, minroot, roots, heads, includepath
1831 1827 )
1832 1828
1833 1829 def ancestor(self, a, b):
1834 1830 """calculate the "best" common ancestor of nodes a and b"""
1835 1831
1836 1832 a, b = self.rev(a), self.rev(b)
1837 1833 try:
1838 1834 ancs = self.index.ancestors(a, b)
1839 1835 except (AttributeError, OverflowError):
1840 1836 ancs = ancestor.ancestors(self.parentrevs, a, b)
1841 1837 if ancs:
1842 1838 # choose a consistent winner when there's a tie
1843 1839 return min(map(self.node, ancs))
1844 1840 return self.nullid
1845 1841
1846 1842 def _match(self, id):
1847 1843 if isinstance(id, int):
1848 1844 # rev
1849 1845 return self.node(id)
1850 1846 if len(id) == self.nodeconstants.nodelen:
1851 1847 # possibly a binary node
1852 1848 # odds of a binary node being all hex in ASCII are 1 in 10**25
1853 1849 try:
1854 1850 node = id
1855 1851 self.rev(node) # quick search the index
1856 1852 return node
1857 1853 except error.LookupError:
1858 1854 pass # may be partial hex id
1859 1855 try:
1860 1856 # str(rev)
1861 1857 rev = int(id)
1862 1858 if b"%d" % rev != id:
1863 1859 raise ValueError
1864 1860 if rev < 0:
1865 1861 rev = len(self) + rev
1866 1862 if rev < 0 or rev >= len(self):
1867 1863 raise ValueError
1868 1864 return self.node(rev)
1869 1865 except (ValueError, OverflowError):
1870 1866 pass
1871 1867 if len(id) == 2 * self.nodeconstants.nodelen:
1872 1868 try:
1873 1869 # a full hex nodeid?
1874 1870 node = bin(id)
1875 1871 self.rev(node)
1876 1872 return node
1877 1873 except (binascii.Error, error.LookupError):
1878 1874 pass
1879 1875
1880 1876 def _partialmatch(self, id):
1881 1877 # we don't care wdirfilenodeids as they should be always full hash
1882 1878 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1883 1879 ambiguous = False
1884 1880 try:
1885 1881 partial = self.index.partialmatch(id)
1886 1882 if partial and self.hasnode(partial):
1887 1883 if maybewdir:
1888 1884 # single 'ff...' match in radix tree, ambiguous with wdir
1889 1885 ambiguous = True
1890 1886 else:
1891 1887 return partial
1892 1888 elif maybewdir:
1893 1889 # no 'ff...' match in radix tree, wdir identified
1894 1890 raise error.WdirUnsupported
1895 1891 else:
1896 1892 return None
1897 1893 except error.RevlogError:
1898 1894 # parsers.c radix tree lookup gave multiple matches
1899 1895 # fast path: for unfiltered changelog, radix tree is accurate
1900 1896 if not getattr(self, 'filteredrevs', None):
1901 1897 ambiguous = True
1902 1898 # fall through to slow path that filters hidden revisions
1903 1899 except (AttributeError, ValueError):
1904 1900 # we are pure python, or key is not hex
1905 1901 pass
1906 1902 if ambiguous:
1907 1903 raise error.AmbiguousPrefixLookupError(
1908 1904 id, self.display_id, _(b'ambiguous identifier')
1909 1905 )
1910 1906
1911 1907 if id in self._pcache:
1912 1908 return self._pcache[id]
1913 1909
1914 1910 if len(id) <= 40:
1915 1911 # hex(node)[:...]
1916 1912 l = len(id) // 2 * 2 # grab an even number of digits
1917 1913 try:
1918 1914 # we're dropping the last digit, so let's check that it's hex,
1919 1915 # to avoid the expensive computation below if it's not
1920 1916 if len(id) % 2 > 0:
1921 1917 if not (id[-1] in hexdigits):
1922 1918 return None
1923 1919 prefix = bin(id[:l])
1924 1920 except binascii.Error:
1925 1921 pass
1926 1922 else:
1927 1923 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1928 1924 nl = [
1929 1925 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1930 1926 ]
1931 1927 if self.nodeconstants.nullhex.startswith(id):
1932 1928 nl.append(self.nullid)
1933 1929 if len(nl) > 0:
1934 1930 if len(nl) == 1 and not maybewdir:
1935 1931 self._pcache[id] = nl[0]
1936 1932 return nl[0]
1937 1933 raise error.AmbiguousPrefixLookupError(
1938 1934 id, self.display_id, _(b'ambiguous identifier')
1939 1935 )
1940 1936 if maybewdir:
1941 1937 raise error.WdirUnsupported
1942 1938 return None
1943 1939
1944 1940 def lookup(self, id):
1945 1941 """locate a node based on:
1946 1942 - revision number or str(revision number)
1947 1943 - nodeid or subset of hex nodeid
1948 1944 """
1949 1945 n = self._match(id)
1950 1946 if n is not None:
1951 1947 return n
1952 1948 n = self._partialmatch(id)
1953 1949 if n:
1954 1950 return n
1955 1951
1956 1952 raise error.LookupError(id, self.display_id, _(b'no match found'))
1957 1953
1958 1954 def shortest(self, node, minlength=1):
1959 1955 """Find the shortest unambiguous prefix that matches node."""
1960 1956
1961 1957 def isvalid(prefix):
1962 1958 try:
1963 1959 matchednode = self._partialmatch(prefix)
1964 1960 except error.AmbiguousPrefixLookupError:
1965 1961 return False
1966 1962 except error.WdirUnsupported:
1967 1963 # single 'ff...' match
1968 1964 return True
1969 1965 if matchednode is None:
1970 1966 raise error.LookupError(node, self.display_id, _(b'no node'))
1971 1967 return True
1972 1968
1973 1969 def maybewdir(prefix):
1974 1970 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1975 1971
1976 1972 hexnode = hex(node)
1977 1973
1978 1974 def disambiguate(hexnode, minlength):
1979 1975 """Disambiguate against wdirid."""
1980 1976 for length in range(minlength, len(hexnode) + 1):
1981 1977 prefix = hexnode[:length]
1982 1978 if not maybewdir(prefix):
1983 1979 return prefix
1984 1980
1985 1981 if not getattr(self, 'filteredrevs', None):
1986 1982 try:
1987 1983 length = max(self.index.shortest(node), minlength)
1988 1984 return disambiguate(hexnode, length)
1989 1985 except error.RevlogError:
1990 1986 if node != self.nodeconstants.wdirid:
1991 1987 raise error.LookupError(
1992 1988 node, self.display_id, _(b'no node')
1993 1989 )
1994 1990 except AttributeError:
1995 1991 # Fall through to pure code
1996 1992 pass
1997 1993
1998 1994 if node == self.nodeconstants.wdirid:
1999 1995 for length in range(minlength, len(hexnode) + 1):
2000 1996 prefix = hexnode[:length]
2001 1997 if isvalid(prefix):
2002 1998 return prefix
2003 1999
2004 2000 for length in range(minlength, len(hexnode) + 1):
2005 2001 prefix = hexnode[:length]
2006 2002 if isvalid(prefix):
2007 2003 return disambiguate(hexnode, length)
2008 2004
2009 2005 def cmp(self, node, text):
2010 2006 """compare text with a given file revision
2011 2007
2012 2008 returns True if text is different than what is stored.
2013 2009 """
2014 2010 p1, p2 = self.parents(node)
2015 2011 return storageutil.hashrevisionsha1(text, p1, p2) != node
2016 2012
2017 2013 def _getsegmentforrevs(self, startrev, endrev):
2018 2014 """Obtain a segment of raw data corresponding to a range of revisions.
2019 2015
2020 2016 Accepts the start and end revisions and an optional already-open
2021 2017 file handle to be used for reading. If the file handle is read, its
2022 2018 seek position will not be preserved.
2023 2019
2024 2020 Requests for data may be satisfied by a cache.
2025 2021
2026 2022 Returns a 2-tuple of (offset, data) for the requested range of
2027 2023 revisions. Offset is the integer offset from the beginning of the
2028 2024 revlog and data is a str or buffer of the raw byte data.
2029 2025
2030 2026 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2031 2027 to determine where each revision's data begins and ends.
2032 2028 """
2033 2029 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2034 2030 # (functions are expensive).
2035 2031 index = self.index
2036 2032 istart = index[startrev]
2037 2033 start = int(istart[0] >> 16)
2038 2034 if startrev == endrev:
2039 2035 end = start + istart[1]
2040 2036 else:
2041 2037 iend = index[endrev]
2042 2038 end = int(iend[0] >> 16) + iend[1]
2043 2039
2044 2040 if self._inline:
2045 2041 start += (startrev + 1) * self.index.entry_size
2046 2042 end += (endrev + 1) * self.index.entry_size
2047 2043 length = end - start
2048 2044
2049 2045 return start, self._segmentfile.read_chunk(start, length)
2050 2046
2051 2047 def _chunk(self, rev):
2052 2048 """Obtain a single decompressed chunk for a revision.
2053 2049
2054 2050 Accepts an integer revision and an optional already-open file handle
2055 2051 to be used for reading. If used, the seek position of the file will not
2056 2052 be preserved.
2057 2053
2058 2054 Returns a str holding uncompressed data for the requested revision.
2059 2055 """
2060 2056 compression_mode = self.index[rev][10]
2061 2057 data = self._getsegmentforrevs(rev, rev)[1]
2062 2058 if compression_mode == COMP_MODE_PLAIN:
2063 2059 return data
2064 2060 elif compression_mode == COMP_MODE_DEFAULT:
2065 2061 return self._decompressor(data)
2066 2062 elif compression_mode == COMP_MODE_INLINE:
2067 2063 return self.decompress(data)
2068 2064 else:
2069 2065 msg = b'unknown compression mode %d'
2070 2066 msg %= compression_mode
2071 2067 raise error.RevlogError(msg)
2072 2068
2073 2069 def _chunks(self, revs, targetsize=None):
2074 2070 """Obtain decompressed chunks for the specified revisions.
2075 2071
2076 2072 Accepts an iterable of numeric revisions that are assumed to be in
2077 2073 ascending order. Also accepts an optional already-open file handle
2078 2074 to be used for reading. If used, the seek position of the file will
2079 2075 not be preserved.
2080 2076
2081 2077 This function is similar to calling ``self._chunk()`` multiple times,
2082 2078 but is faster.
2083 2079
2084 2080 Returns a list with decompressed data for each requested revision.
2085 2081 """
2086 2082 if not revs:
2087 2083 return []
2088 2084 start = self.start
2089 2085 length = self.length
2090 2086 inline = self._inline
2091 2087 iosize = self.index.entry_size
2092 2088 buffer = util.buffer
2093 2089
2094 2090 l = []
2095 2091 ladd = l.append
2096 2092
2097 2093 if not self.data_config.with_sparse_read:
2098 2094 slicedchunks = (revs,)
2099 2095 else:
2100 2096 slicedchunks = deltautil.slicechunk(
2101 2097 self, revs, targetsize=targetsize
2102 2098 )
2103 2099
2104 2100 for revschunk in slicedchunks:
2105 2101 firstrev = revschunk[0]
2106 2102 # Skip trailing revisions with empty diff
2107 2103 for lastrev in revschunk[::-1]:
2108 2104 if length(lastrev) != 0:
2109 2105 break
2110 2106
2111 2107 try:
2112 2108 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2113 2109 except OverflowError:
2114 2110 # issue4215 - we can't cache a run of chunks greater than
2115 2111 # 2G on Windows
2116 2112 return [self._chunk(rev) for rev in revschunk]
2117 2113
2118 2114 decomp = self.decompress
2119 2115 # self._decompressor might be None, but will not be used in that case
2120 2116 def_decomp = self._decompressor
2121 2117 for rev in revschunk:
2122 2118 chunkstart = start(rev)
2123 2119 if inline:
2124 2120 chunkstart += (rev + 1) * iosize
2125 2121 chunklength = length(rev)
2126 2122 comp_mode = self.index[rev][10]
2127 2123 c = buffer(data, chunkstart - offset, chunklength)
2128 2124 if comp_mode == COMP_MODE_PLAIN:
2129 2125 ladd(c)
2130 2126 elif comp_mode == COMP_MODE_INLINE:
2131 2127 ladd(decomp(c))
2132 2128 elif comp_mode == COMP_MODE_DEFAULT:
2133 2129 ladd(def_decomp(c))
2134 2130 else:
2135 2131 msg = b'unknown compression mode %d'
2136 2132 msg %= comp_mode
2137 2133 raise error.RevlogError(msg)
2138 2134
2139 2135 return l
2140 2136
2141 2137 def deltaparent(self, rev):
2142 2138 """return deltaparent of the given revision"""
2143 2139 base = self.index[rev][3]
2144 2140 if base == rev:
2145 2141 return nullrev
2146 2142 elif self.delta_config.general_delta:
2147 2143 return base
2148 2144 else:
2149 2145 return rev - 1
2150 2146
2151 2147 def issnapshot(self, rev):
2152 2148 """tells whether rev is a snapshot"""
2153 2149 if not self.delta_config.sparse_revlog:
2154 2150 return self.deltaparent(rev) == nullrev
2155 2151 elif hasattr(self.index, 'issnapshot'):
2156 2152 # directly assign the method to cache the testing and access
2157 2153 self.issnapshot = self.index.issnapshot
2158 2154 return self.issnapshot(rev)
2159 2155 if rev == nullrev:
2160 2156 return True
2161 2157 entry = self.index[rev]
2162 2158 base = entry[3]
2163 2159 if base == rev:
2164 2160 return True
2165 2161 if base == nullrev:
2166 2162 return True
2167 2163 p1 = entry[5]
2168 2164 while self.length(p1) == 0:
2169 2165 b = self.deltaparent(p1)
2170 2166 if b == p1:
2171 2167 break
2172 2168 p1 = b
2173 2169 p2 = entry[6]
2174 2170 while self.length(p2) == 0:
2175 2171 b = self.deltaparent(p2)
2176 2172 if b == p2:
2177 2173 break
2178 2174 p2 = b
2179 2175 if base == p1 or base == p2:
2180 2176 return False
2181 2177 return self.issnapshot(base)
2182 2178
2183 2179 def snapshotdepth(self, rev):
2184 2180 """number of snapshot in the chain before this one"""
2185 2181 if not self.issnapshot(rev):
2186 2182 raise error.ProgrammingError(b'revision %d not a snapshot')
2187 2183 return len(self._deltachain(rev)[0]) - 1
2188 2184
2189 2185 def revdiff(self, rev1, rev2):
2190 2186 """return or calculate a delta between two revisions
2191 2187
2192 2188 The delta calculated is in binary form and is intended to be written to
2193 2189 revlog data directly. So this function needs raw revision data.
2194 2190 """
2195 2191 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2196 2192 return bytes(self._chunk(rev2))
2197 2193
2198 2194 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2199 2195
2200 2196 def revision(self, nodeorrev):
2201 2197 """return an uncompressed revision of a given node or revision
2202 2198 number.
2203 2199 """
2204 2200 return self._revisiondata(nodeorrev)
2205 2201
2206 2202 def sidedata(self, nodeorrev):
2207 2203 """a map of extra data related to the changeset but not part of the hash
2208 2204
2209 2205 This function currently return a dictionary. However, more advanced
2210 2206 mapping object will likely be used in the future for a more
2211 2207 efficient/lazy code.
2212 2208 """
2213 2209 # deal with <nodeorrev> argument type
2214 2210 if isinstance(nodeorrev, int):
2215 2211 rev = nodeorrev
2216 2212 else:
2217 2213 rev = self.rev(nodeorrev)
2218 2214 return self._sidedata(rev)
2219 2215
2220 2216 def _revisiondata(self, nodeorrev, raw=False):
2221 2217 # deal with <nodeorrev> argument type
2222 2218 if isinstance(nodeorrev, int):
2223 2219 rev = nodeorrev
2224 2220 node = self.node(rev)
2225 2221 else:
2226 2222 node = nodeorrev
2227 2223 rev = None
2228 2224
2229 2225 # fast path the special `nullid` rev
2230 2226 if node == self.nullid:
2231 2227 return b""
2232 2228
2233 2229 # ``rawtext`` is the text as stored inside the revlog. Might be the
2234 2230 # revision or might need to be processed to retrieve the revision.
2235 2231 rev, rawtext, validated = self._rawtext(node, rev)
2236 2232
2237 2233 if raw and validated:
2238 2234 # if we don't want to process the raw text and that raw
2239 2235 # text is cached, we can exit early.
2240 2236 return rawtext
2241 2237 if rev is None:
2242 2238 rev = self.rev(node)
2243 2239 # the revlog's flag for this revision
2244 2240 # (usually alter its state or content)
2245 2241 flags = self.flags(rev)
2246 2242
2247 2243 if validated and flags == REVIDX_DEFAULT_FLAGS:
2248 2244 # no extra flags set, no flag processor runs, text = rawtext
2249 2245 return rawtext
2250 2246
2251 2247 if raw:
2252 2248 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2253 2249 text = rawtext
2254 2250 else:
2255 2251 r = flagutil.processflagsread(self, rawtext, flags)
2256 2252 text, validatehash = r
2257 2253 if validatehash:
2258 2254 self.checkhash(text, node, rev=rev)
2259 2255 if not validated:
2260 2256 self._revisioncache = (node, rev, rawtext)
2261 2257
2262 2258 return text
2263 2259
2264 2260 def _rawtext(self, node, rev):
2265 2261 """return the possibly unvalidated rawtext for a revision
2266 2262
2267 2263 returns (rev, rawtext, validated)
2268 2264 """
2269 2265
2270 2266 # revision in the cache (could be useful to apply delta)
2271 2267 cachedrev = None
2272 2268 # An intermediate text to apply deltas to
2273 2269 basetext = None
2274 2270
2275 2271 # Check if we have the entry in cache
2276 2272 # The cache entry looks like (node, rev, rawtext)
2277 2273 if self._revisioncache:
2278 2274 if self._revisioncache[0] == node:
2279 2275 return (rev, self._revisioncache[2], True)
2280 2276 cachedrev = self._revisioncache[1]
2281 2277
2282 2278 if rev is None:
2283 2279 rev = self.rev(node)
2284 2280
2285 2281 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2286 2282 if stopped:
2287 2283 basetext = self._revisioncache[2]
2288 2284
2289 2285 # drop cache to save memory, the caller is expected to
2290 2286 # update self._revisioncache after validating the text
2291 2287 self._revisioncache = None
2292 2288
2293 2289 targetsize = None
2294 2290 rawsize = self.index[rev][2]
2295 2291 if 0 <= rawsize:
2296 2292 targetsize = 4 * rawsize
2297 2293
2298 2294 bins = self._chunks(chain, targetsize=targetsize)
2299 2295 if basetext is None:
2300 2296 basetext = bytes(bins[0])
2301 2297 bins = bins[1:]
2302 2298
2303 2299 rawtext = mdiff.patches(basetext, bins)
2304 2300 del basetext # let us have a chance to free memory early
2305 2301 return (rev, rawtext, False)
2306 2302
2307 2303 def _sidedata(self, rev):
2308 2304 """Return the sidedata for a given revision number."""
2309 2305 index_entry = self.index[rev]
2310 2306 sidedata_offset = index_entry[8]
2311 2307 sidedata_size = index_entry[9]
2312 2308
2313 2309 if self._inline:
2314 2310 sidedata_offset += self.index.entry_size * (1 + rev)
2315 2311 if sidedata_size == 0:
2316 2312 return {}
2317 2313
2318 2314 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2319 2315 filename = self._sidedatafile
2320 2316 end = self._docket.sidedata_end
2321 2317 offset = sidedata_offset
2322 2318 length = sidedata_size
2323 2319 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2324 2320 raise error.RevlogError(m)
2325 2321
2326 2322 comp_segment = self._segmentfile_sidedata.read_chunk(
2327 2323 sidedata_offset, sidedata_size
2328 2324 )
2329 2325
2330 2326 comp = self.index[rev][11]
2331 2327 if comp == COMP_MODE_PLAIN:
2332 2328 segment = comp_segment
2333 2329 elif comp == COMP_MODE_DEFAULT:
2334 2330 segment = self._decompressor(comp_segment)
2335 2331 elif comp == COMP_MODE_INLINE:
2336 2332 segment = self.decompress(comp_segment)
2337 2333 else:
2338 2334 msg = b'unknown compression mode %d'
2339 2335 msg %= comp
2340 2336 raise error.RevlogError(msg)
2341 2337
2342 2338 sidedata = sidedatautil.deserialize_sidedata(segment)
2343 2339 return sidedata
2344 2340
2345 2341 def rawdata(self, nodeorrev):
2346 2342 """return an uncompressed raw data of a given node or revision number."""
2347 2343 return self._revisiondata(nodeorrev, raw=True)
2348 2344
2349 2345 def hash(self, text, p1, p2):
2350 2346 """Compute a node hash.
2351 2347
2352 2348 Available as a function so that subclasses can replace the hash
2353 2349 as needed.
2354 2350 """
2355 2351 return storageutil.hashrevisionsha1(text, p1, p2)
2356 2352
2357 2353 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2358 2354 """Check node hash integrity.
2359 2355
2360 2356 Available as a function so that subclasses can extend hash mismatch
2361 2357 behaviors as needed.
2362 2358 """
2363 2359 try:
2364 2360 if p1 is None and p2 is None:
2365 2361 p1, p2 = self.parents(node)
2366 2362 if node != self.hash(text, p1, p2):
2367 2363 # Clear the revision cache on hash failure. The revision cache
2368 2364 # only stores the raw revision and clearing the cache does have
2369 2365 # the side-effect that we won't have a cache hit when the raw
2370 2366 # revision data is accessed. But this case should be rare and
2371 2367 # it is extra work to teach the cache about the hash
2372 2368 # verification state.
2373 2369 if self._revisioncache and self._revisioncache[0] == node:
2374 2370 self._revisioncache = None
2375 2371
2376 2372 revornode = rev
2377 2373 if revornode is None:
2378 2374 revornode = templatefilters.short(hex(node))
2379 2375 raise error.RevlogError(
2380 2376 _(b"integrity check failed on %s:%s")
2381 2377 % (self.display_id, pycompat.bytestr(revornode))
2382 2378 )
2383 2379 except error.RevlogError:
2384 2380 if self.feature_config.censorable and storageutil.iscensoredtext(
2385 2381 text
2386 2382 ):
2387 2383 raise error.CensoredNodeError(self.display_id, node, text)
2388 2384 raise
2389 2385
2390 2386 @property
2391 2387 def _split_index_file(self):
2392 2388 """the path where to expect the index of an ongoing splitting operation
2393 2389
2394 2390 The file will only exist if a splitting operation is in progress, but
2395 2391 it is always expected at the same location."""
2396 2392 parts = self.radix.split(b'/')
2397 2393 if len(parts) > 1:
2398 2394 # adds a '-s' prefix to the ``data/` or `meta/` base
2399 2395 head = parts[0] + b'-s'
2400 2396 mids = parts[1:-1]
2401 2397 tail = parts[-1] + b'.i'
2402 2398 pieces = [head] + mids + [tail]
2403 2399 return b'/'.join(pieces)
2404 2400 else:
2405 2401 # the revlog is stored at the root of the store (changelog or
2406 2402 # manifest), no risk of collision.
2407 2403 return self.radix + b'.i.s'
2408 2404
2409 2405 def _enforceinlinesize(self, tr, side_write=True):
2410 2406 """Check if the revlog is too big for inline and convert if so.
2411 2407
2412 2408 This should be called after revisions are added to the revlog. If the
2413 2409 revlog has grown too large to be an inline revlog, it will convert it
2414 2410 to use multiple index and data files.
2415 2411 """
2416 2412 tiprev = len(self) - 1
2417 2413 total_size = self.start(tiprev) + self.length(tiprev)
2418 2414 if not self._inline or total_size < _maxinline:
2419 2415 return
2420 2416
2421 2417 troffset = tr.findoffset(self._indexfile)
2422 2418 if troffset is None:
2423 2419 raise error.RevlogError(
2424 2420 _(b"%s not found in the transaction") % self._indexfile
2425 2421 )
2426 2422 if troffset:
2427 2423 tr.addbackup(self._indexfile, for_offset=True)
2428 2424 tr.add(self._datafile, 0)
2429 2425
2430 2426 existing_handles = False
2431 2427 if self._writinghandles is not None:
2432 2428 existing_handles = True
2433 2429 fp = self._writinghandles[0]
2434 2430 fp.flush()
2435 2431 fp.close()
2436 2432 # We can't use the cached file handle after close(). So prevent
2437 2433 # its usage.
2438 2434 self._writinghandles = None
2439 2435 self._segmentfile.writing_handle = None
2440 2436 # No need to deal with sidedata writing handle as it is only
2441 2437 # relevant with revlog-v2 which is never inline, not reaching
2442 2438 # this code
2443 2439 if side_write:
2444 2440 old_index_file_path = self._indexfile
2445 2441 new_index_file_path = self._split_index_file
2446 2442 opener = self.opener
2447 2443 weak_self = weakref.ref(self)
2448 2444
2449 2445 # the "split" index replace the real index when the transaction is finalized
2450 2446 def finalize_callback(tr):
2451 2447 opener.rename(
2452 2448 new_index_file_path,
2453 2449 old_index_file_path,
2454 2450 checkambig=True,
2455 2451 )
2456 2452 maybe_self = weak_self()
2457 2453 if maybe_self is not None:
2458 2454 maybe_self._indexfile = old_index_file_path
2459 2455
2460 2456 def abort_callback(tr):
2461 2457 maybe_self = weak_self()
2462 2458 if maybe_self is not None:
2463 2459 maybe_self._indexfile = old_index_file_path
2464 2460
2465 2461 tr.registertmp(new_index_file_path)
2466 2462 if self.target[1] is not None:
2467 2463 callback_id = b'000-revlog-split-%d-%s' % self.target
2468 2464 else:
2469 2465 callback_id = b'000-revlog-split-%d' % self.target[0]
2470 2466 tr.addfinalize(callback_id, finalize_callback)
2471 2467 tr.addabort(callback_id, abort_callback)
2472 2468
2473 2469 new_dfh = self._datafp(b'w+')
2474 2470 new_dfh.truncate(0) # drop any potentially existing data
2475 2471 try:
2476 2472 with self.reading():
2477 2473 for r in self:
2478 2474 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2479 2475 new_dfh.flush()
2480 2476
2481 2477 if side_write:
2482 2478 self._indexfile = new_index_file_path
2483 2479 with self.__index_new_fp() as fp:
2484 2480 self._format_flags &= ~FLAG_INLINE_DATA
2485 2481 self._inline = False
2486 2482 for i in self:
2487 2483 e = self.index.entry_binary(i)
2488 2484 if i == 0 and self._docket is None:
2489 2485 header = self._format_flags | self._format_version
2490 2486 header = self.index.pack_header(header)
2491 2487 e = header + e
2492 2488 fp.write(e)
2493 2489 if self._docket is not None:
2494 2490 self._docket.index_end = fp.tell()
2495 2491
2496 2492 # If we don't use side-write, the temp file replace the real
2497 2493 # index when we exit the context manager
2498 2494
2499 2495 nodemaputil.setup_persistent_nodemap(tr, self)
2500 2496 self._segmentfile = randomaccessfile.randomaccessfile(
2501 2497 self.opener,
2502 2498 self._datafile,
2503 2499 self.data_config.chunk_cache_size,
2504 2500 )
2505 2501
2506 2502 if existing_handles:
2507 2503 # switched from inline to conventional reopen the index
2508 2504 ifh = self.__index_write_fp()
2509 2505 self._writinghandles = (ifh, new_dfh, None)
2510 2506 self._segmentfile.writing_handle = new_dfh
2511 2507 new_dfh = None
2512 2508 # No need to deal with sidedata writing handle as it is only
2513 2509 # relevant with revlog-v2 which is never inline, not reaching
2514 2510 # this code
2515 2511 finally:
2516 2512 if new_dfh is not None:
2517 2513 new_dfh.close()
2518 2514
2519 2515 def _nodeduplicatecallback(self, transaction, node):
2520 2516 """called when trying to add a node already stored."""
2521 2517
2522 2518 @contextlib.contextmanager
2523 2519 def reading(self):
2524 2520 """Context manager that keeps data and sidedata files open for reading"""
2525 2521 if len(self.index) == 0:
2526 2522 yield # nothing to be read
2527 2523 else:
2528 2524 with self._segmentfile.reading():
2529 2525 with self._segmentfile_sidedata.reading():
2530 2526 yield
2531 2527
2532 2528 @contextlib.contextmanager
2533 2529 def _writing(self, transaction):
2534 2530 if self._trypending:
2535 2531 msg = b'try to write in a `trypending` revlog: %s'
2536 2532 msg %= self.display_id
2537 2533 raise error.ProgrammingError(msg)
2538 2534 if self._writinghandles is not None:
2539 2535 yield
2540 2536 else:
2541 2537 ifh = dfh = sdfh = None
2542 2538 try:
2543 2539 r = len(self)
2544 2540 # opening the data file.
2545 2541 dsize = 0
2546 2542 if r:
2547 2543 dsize = self.end(r - 1)
2548 2544 dfh = None
2549 2545 if not self._inline:
2550 2546 try:
2551 2547 dfh = self._datafp(b"r+")
2552 2548 if self._docket is None:
2553 2549 dfh.seek(0, os.SEEK_END)
2554 2550 else:
2555 2551 dfh.seek(self._docket.data_end, os.SEEK_SET)
2556 2552 except FileNotFoundError:
2557 2553 dfh = self._datafp(b"w+")
2558 2554 transaction.add(self._datafile, dsize)
2559 2555 if self._sidedatafile is not None:
2560 2556 # revlog-v2 does not inline, help Pytype
2561 2557 assert dfh is not None
2562 2558 try:
2563 2559 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2564 2560 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2565 2561 except FileNotFoundError:
2566 2562 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2567 2563 transaction.add(
2568 2564 self._sidedatafile, self._docket.sidedata_end
2569 2565 )
2570 2566
2571 2567 # opening the index file.
2572 2568 isize = r * self.index.entry_size
2573 2569 ifh = self.__index_write_fp()
2574 2570 if self._inline:
2575 2571 transaction.add(self._indexfile, dsize + isize)
2576 2572 else:
2577 2573 transaction.add(self._indexfile, isize)
2578 2574 # exposing all file handle for writing.
2579 2575 self._writinghandles = (ifh, dfh, sdfh)
2580 2576 self._segmentfile.writing_handle = ifh if self._inline else dfh
2581 2577 self._segmentfile_sidedata.writing_handle = sdfh
2582 2578 yield
2583 2579 if self._docket is not None:
2584 2580 self._write_docket(transaction)
2585 2581 finally:
2586 2582 self._writinghandles = None
2587 2583 self._segmentfile.writing_handle = None
2588 2584 self._segmentfile_sidedata.writing_handle = None
2589 2585 if dfh is not None:
2590 2586 dfh.close()
2591 2587 if sdfh is not None:
2592 2588 sdfh.close()
2593 2589 # closing the index file last to avoid exposing referent to
2594 2590 # potential unflushed data content.
2595 2591 if ifh is not None:
2596 2592 ifh.close()
2597 2593
2598 2594 def _write_docket(self, transaction):
2599 2595 """write the current docket on disk
2600 2596
2601 2597 Exist as a method to help changelog to implement transaction logic
2602 2598
2603 2599 We could also imagine using the same transaction logic for all revlog
2604 2600 since docket are cheap."""
2605 2601 self._docket.write(transaction)
2606 2602
2607 2603 def addrevision(
2608 2604 self,
2609 2605 text,
2610 2606 transaction,
2611 2607 link,
2612 2608 p1,
2613 2609 p2,
2614 2610 cachedelta=None,
2615 2611 node=None,
2616 2612 flags=REVIDX_DEFAULT_FLAGS,
2617 2613 deltacomputer=None,
2618 2614 sidedata=None,
2619 2615 ):
2620 2616 """add a revision to the log
2621 2617
2622 2618 text - the revision data to add
2623 2619 transaction - the transaction object used for rollback
2624 2620 link - the linkrev data to add
2625 2621 p1, p2 - the parent nodeids of the revision
2626 2622 cachedelta - an optional precomputed delta
2627 2623 node - nodeid of revision; typically node is not specified, and it is
2628 2624 computed by default as hash(text, p1, p2), however subclasses might
2629 2625 use different hashing method (and override checkhash() in such case)
2630 2626 flags - the known flags to set on the revision
2631 2627 deltacomputer - an optional deltacomputer instance shared between
2632 2628 multiple calls
2633 2629 """
2634 2630 if link == nullrev:
2635 2631 raise error.RevlogError(
2636 2632 _(b"attempted to add linkrev -1 to %s") % self.display_id
2637 2633 )
2638 2634
2639 2635 if sidedata is None:
2640 2636 sidedata = {}
2641 2637 elif sidedata and not self.feature_config.has_side_data:
2642 2638 raise error.ProgrammingError(
2643 2639 _(b"trying to add sidedata to a revlog who don't support them")
2644 2640 )
2645 2641
2646 2642 if flags:
2647 2643 node = node or self.hash(text, p1, p2)
2648 2644
2649 2645 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2650 2646
2651 2647 # If the flag processor modifies the revision data, ignore any provided
2652 2648 # cachedelta.
2653 2649 if rawtext != text:
2654 2650 cachedelta = None
2655 2651
2656 2652 if len(rawtext) > _maxentrysize:
2657 2653 raise error.RevlogError(
2658 2654 _(
2659 2655 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2660 2656 )
2661 2657 % (self.display_id, len(rawtext))
2662 2658 )
2663 2659
2664 2660 node = node or self.hash(rawtext, p1, p2)
2665 2661 rev = self.index.get_rev(node)
2666 2662 if rev is not None:
2667 2663 return rev
2668 2664
2669 2665 if validatehash:
2670 2666 self.checkhash(rawtext, node, p1=p1, p2=p2)
2671 2667
2672 2668 return self.addrawrevision(
2673 2669 rawtext,
2674 2670 transaction,
2675 2671 link,
2676 2672 p1,
2677 2673 p2,
2678 2674 node,
2679 2675 flags,
2680 2676 cachedelta=cachedelta,
2681 2677 deltacomputer=deltacomputer,
2682 2678 sidedata=sidedata,
2683 2679 )
2684 2680
2685 2681 def addrawrevision(
2686 2682 self,
2687 2683 rawtext,
2688 2684 transaction,
2689 2685 link,
2690 2686 p1,
2691 2687 p2,
2692 2688 node,
2693 2689 flags,
2694 2690 cachedelta=None,
2695 2691 deltacomputer=None,
2696 2692 sidedata=None,
2697 2693 ):
2698 2694 """add a raw revision with known flags, node and parents
2699 2695 useful when reusing a revision not stored in this revlog (ex: received
2700 2696 over wire, or read from an external bundle).
2701 2697 """
2702 2698 with self._writing(transaction):
2703 2699 return self._addrevision(
2704 2700 node,
2705 2701 rawtext,
2706 2702 transaction,
2707 2703 link,
2708 2704 p1,
2709 2705 p2,
2710 2706 flags,
2711 2707 cachedelta,
2712 2708 deltacomputer=deltacomputer,
2713 2709 sidedata=sidedata,
2714 2710 )
2715 2711
2716 2712 def compress(self, data):
2717 2713 """Generate a possibly-compressed representation of data."""
2718 2714 if not data:
2719 2715 return b'', data
2720 2716
2721 2717 compressed = self._compressor.compress(data)
2722 2718
2723 2719 if compressed:
2724 2720 # The revlog compressor added the header in the returned data.
2725 2721 return b'', compressed
2726 2722
2727 2723 if data[0:1] == b'\0':
2728 2724 return b'', data
2729 2725 return b'u', data
2730 2726
2731 2727 def decompress(self, data):
2732 2728 """Decompress a revlog chunk.
2733 2729
2734 2730 The chunk is expected to begin with a header identifying the
2735 2731 format type so it can be routed to an appropriate decompressor.
2736 2732 """
2737 2733 if not data:
2738 2734 return data
2739 2735
2740 2736 # Revlogs are read much more frequently than they are written and many
2741 2737 # chunks only take microseconds to decompress, so performance is
2742 2738 # important here.
2743 2739 #
2744 2740 # We can make a few assumptions about revlogs:
2745 2741 #
2746 2742 # 1) the majority of chunks will be compressed (as opposed to inline
2747 2743 # raw data).
2748 2744 # 2) decompressing *any* data will likely by at least 10x slower than
2749 2745 # returning raw inline data.
2750 2746 # 3) we want to prioritize common and officially supported compression
2751 2747 # engines
2752 2748 #
2753 2749 # It follows that we want to optimize for "decompress compressed data
2754 2750 # when encoded with common and officially supported compression engines"
2755 2751 # case over "raw data" and "data encoded by less common or non-official
2756 2752 # compression engines." That is why we have the inline lookup first
2757 2753 # followed by the compengines lookup.
2758 2754 #
2759 2755 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2760 2756 # compressed chunks. And this matters for changelog and manifest reads.
2761 2757 t = data[0:1]
2762 2758
2763 2759 if t == b'x':
2764 2760 try:
2765 2761 return _zlibdecompress(data)
2766 2762 except zlib.error as e:
2767 2763 raise error.RevlogError(
2768 2764 _(b'revlog decompress error: %s')
2769 2765 % stringutil.forcebytestr(e)
2770 2766 )
2771 2767 # '\0' is more common than 'u' so it goes first.
2772 2768 elif t == b'\0':
2773 2769 return data
2774 2770 elif t == b'u':
2775 2771 return util.buffer(data, 1)
2776 2772
2777 2773 compressor = self._get_decompressor(t)
2778 2774
2779 2775 return compressor.decompress(data)
2780 2776
2781 2777 def _addrevision(
2782 2778 self,
2783 2779 node,
2784 2780 rawtext,
2785 2781 transaction,
2786 2782 link,
2787 2783 p1,
2788 2784 p2,
2789 2785 flags,
2790 2786 cachedelta,
2791 2787 alwayscache=False,
2792 2788 deltacomputer=None,
2793 2789 sidedata=None,
2794 2790 ):
2795 2791 """internal function to add revisions to the log
2796 2792
2797 2793 see addrevision for argument descriptions.
2798 2794
2799 2795 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2800 2796
2801 2797 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2802 2798 be used.
2803 2799
2804 2800 invariants:
2805 2801 - rawtext is optional (can be None); if not set, cachedelta must be set.
2806 2802 if both are set, they must correspond to each other.
2807 2803 """
2808 2804 if node == self.nullid:
2809 2805 raise error.RevlogError(
2810 2806 _(b"%s: attempt to add null revision") % self.display_id
2811 2807 )
2812 2808 if (
2813 2809 node == self.nodeconstants.wdirid
2814 2810 or node in self.nodeconstants.wdirfilenodeids
2815 2811 ):
2816 2812 raise error.RevlogError(
2817 2813 _(b"%s: attempt to add wdir revision") % self.display_id
2818 2814 )
2819 2815 if self._writinghandles is None:
2820 2816 msg = b'adding revision outside `revlog._writing` context'
2821 2817 raise error.ProgrammingError(msg)
2822 2818
2823 2819 btext = [rawtext]
2824 2820
2825 2821 curr = len(self)
2826 2822 prev = curr - 1
2827 2823
2828 2824 offset = self._get_data_offset(prev)
2829 2825
2830 2826 if self._concurrencychecker:
2831 2827 ifh, dfh, sdfh = self._writinghandles
2832 2828 # XXX no checking for the sidedata file
2833 2829 if self._inline:
2834 2830 # offset is "as if" it were in the .d file, so we need to add on
2835 2831 # the size of the entry metadata.
2836 2832 self._concurrencychecker(
2837 2833 ifh, self._indexfile, offset + curr * self.index.entry_size
2838 2834 )
2839 2835 else:
2840 2836 # Entries in the .i are a consistent size.
2841 2837 self._concurrencychecker(
2842 2838 ifh, self._indexfile, curr * self.index.entry_size
2843 2839 )
2844 2840 self._concurrencychecker(dfh, self._datafile, offset)
2845 2841
2846 2842 p1r, p2r = self.rev(p1), self.rev(p2)
2847 2843
2848 2844 # full versions are inserted when the needed deltas
2849 2845 # become comparable to the uncompressed text
2850 2846 if rawtext is None:
2851 2847 # need rawtext size, before changed by flag processors, which is
2852 2848 # the non-raw size. use revlog explicitly to avoid filelog's extra
2853 2849 # logic that might remove metadata size.
2854 2850 textlen = mdiff.patchedsize(
2855 2851 revlog.size(self, cachedelta[0]), cachedelta[1]
2856 2852 )
2857 2853 else:
2858 2854 textlen = len(rawtext)
2859 2855
2860 2856 if deltacomputer is None:
2861 2857 write_debug = None
2862 2858 if self.delta_config.debug_delta:
2863 2859 write_debug = transaction._report
2864 2860 deltacomputer = deltautil.deltacomputer(
2865 2861 self, write_debug=write_debug
2866 2862 )
2867 2863
2868 2864 if cachedelta is not None and len(cachedelta) == 2:
2869 2865 # If the cached delta has no information about how it should be
2870 2866 # reused, add the default reuse instruction according to the
2871 2867 # revlog's configuration.
2872 2868 if (
2873 2869 self.delta_config.general_delta
2874 2870 and self.delta_config.lazy_delta_base
2875 2871 ):
2876 2872 delta_base_reuse = DELTA_BASE_REUSE_TRY
2877 2873 else:
2878 2874 delta_base_reuse = DELTA_BASE_REUSE_NO
2879 2875 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2880 2876
2881 2877 revinfo = revlogutils.revisioninfo(
2882 2878 node,
2883 2879 p1,
2884 2880 p2,
2885 2881 btext,
2886 2882 textlen,
2887 2883 cachedelta,
2888 2884 flags,
2889 2885 )
2890 2886
2891 2887 deltainfo = deltacomputer.finddeltainfo(revinfo)
2892 2888
2893 2889 compression_mode = COMP_MODE_INLINE
2894 2890 if self._docket is not None:
2895 2891 default_comp = self._docket.default_compression_header
2896 2892 r = deltautil.delta_compression(default_comp, deltainfo)
2897 2893 compression_mode, deltainfo = r
2898 2894
2899 2895 sidedata_compression_mode = COMP_MODE_INLINE
2900 2896 if sidedata and self.feature_config.has_side_data:
2901 2897 sidedata_compression_mode = COMP_MODE_PLAIN
2902 2898 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2903 2899 sidedata_offset = self._docket.sidedata_end
2904 2900 h, comp_sidedata = self.compress(serialized_sidedata)
2905 2901 if (
2906 2902 h != b'u'
2907 2903 and comp_sidedata[0:1] != b'\0'
2908 2904 and len(comp_sidedata) < len(serialized_sidedata)
2909 2905 ):
2910 2906 assert not h
2911 2907 if (
2912 2908 comp_sidedata[0:1]
2913 2909 == self._docket.default_compression_header
2914 2910 ):
2915 2911 sidedata_compression_mode = COMP_MODE_DEFAULT
2916 2912 serialized_sidedata = comp_sidedata
2917 2913 else:
2918 2914 sidedata_compression_mode = COMP_MODE_INLINE
2919 2915 serialized_sidedata = comp_sidedata
2920 2916 else:
2921 2917 serialized_sidedata = b""
2922 2918 # Don't store the offset if the sidedata is empty, that way
2923 2919 # we can easily detect empty sidedata and they will be no different
2924 2920 # than ones we manually add.
2925 2921 sidedata_offset = 0
2926 2922
2927 2923 rank = RANK_UNKNOWN
2928 2924 if self.feature_config.compute_rank:
2929 2925 if (p1r, p2r) == (nullrev, nullrev):
2930 2926 rank = 1
2931 2927 elif p1r != nullrev and p2r == nullrev:
2932 2928 rank = 1 + self.fast_rank(p1r)
2933 2929 elif p1r == nullrev and p2r != nullrev:
2934 2930 rank = 1 + self.fast_rank(p2r)
2935 2931 else: # merge node
2936 2932 if rustdagop is not None and self.index.rust_ext_compat:
2937 2933 rank = rustdagop.rank(self.index, p1r, p2r)
2938 2934 else:
2939 2935 pmin, pmax = sorted((p1r, p2r))
2940 2936 rank = 1 + self.fast_rank(pmax)
2941 2937 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2942 2938
2943 2939 e = revlogutils.entry(
2944 2940 flags=flags,
2945 2941 data_offset=offset,
2946 2942 data_compressed_length=deltainfo.deltalen,
2947 2943 data_uncompressed_length=textlen,
2948 2944 data_compression_mode=compression_mode,
2949 2945 data_delta_base=deltainfo.base,
2950 2946 link_rev=link,
2951 2947 parent_rev_1=p1r,
2952 2948 parent_rev_2=p2r,
2953 2949 node_id=node,
2954 2950 sidedata_offset=sidedata_offset,
2955 2951 sidedata_compressed_length=len(serialized_sidedata),
2956 2952 sidedata_compression_mode=sidedata_compression_mode,
2957 2953 rank=rank,
2958 2954 )
2959 2955
2960 2956 self.index.append(e)
2961 2957 entry = self.index.entry_binary(curr)
2962 2958 if curr == 0 and self._docket is None:
2963 2959 header = self._format_flags | self._format_version
2964 2960 header = self.index.pack_header(header)
2965 2961 entry = header + entry
2966 2962 self._writeentry(
2967 2963 transaction,
2968 2964 entry,
2969 2965 deltainfo.data,
2970 2966 link,
2971 2967 offset,
2972 2968 serialized_sidedata,
2973 2969 sidedata_offset,
2974 2970 )
2975 2971
2976 2972 rawtext = btext[0]
2977 2973
2978 2974 if alwayscache and rawtext is None:
2979 2975 rawtext = deltacomputer.buildtext(revinfo)
2980 2976
2981 2977 if type(rawtext) == bytes: # only accept immutable objects
2982 2978 self._revisioncache = (node, curr, rawtext)
2983 2979 self._chainbasecache[curr] = deltainfo.chainbase
2984 2980 return curr
2985 2981
2986 2982 def _get_data_offset(self, prev):
2987 2983 """Returns the current offset in the (in-transaction) data file.
2988 2984 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2989 2985 file to store that information: since sidedata can be rewritten to the
2990 2986 end of the data file within a transaction, you can have cases where, for
2991 2987 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2992 2988 to `n - 1`'s sidedata being written after `n`'s data.
2993 2989
2994 2990 TODO cache this in a docket file before getting out of experimental."""
2995 2991 if self._docket is None:
2996 2992 return self.end(prev)
2997 2993 else:
2998 2994 return self._docket.data_end
2999 2995
3000 2996 def _writeentry(
3001 2997 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3002 2998 ):
3003 2999 # Files opened in a+ mode have inconsistent behavior on various
3004 3000 # platforms. Windows requires that a file positioning call be made
3005 3001 # when the file handle transitions between reads and writes. See
3006 3002 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3007 3003 # platforms, Python or the platform itself can be buggy. Some versions
3008 3004 # of Solaris have been observed to not append at the end of the file
3009 3005 # if the file was seeked to before the end. See issue4943 for more.
3010 3006 #
3011 3007 # We work around this issue by inserting a seek() before writing.
3012 3008 # Note: This is likely not necessary on Python 3. However, because
3013 3009 # the file handle is reused for reads and may be seeked there, we need
3014 3010 # to be careful before changing this.
3015 3011 if self._writinghandles is None:
3016 3012 msg = b'adding revision outside `revlog._writing` context'
3017 3013 raise error.ProgrammingError(msg)
3018 3014 ifh, dfh, sdfh = self._writinghandles
3019 3015 if self._docket is None:
3020 3016 ifh.seek(0, os.SEEK_END)
3021 3017 else:
3022 3018 ifh.seek(self._docket.index_end, os.SEEK_SET)
3023 3019 if dfh:
3024 3020 if self._docket is None:
3025 3021 dfh.seek(0, os.SEEK_END)
3026 3022 else:
3027 3023 dfh.seek(self._docket.data_end, os.SEEK_SET)
3028 3024 if sdfh:
3029 3025 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3030 3026
3031 3027 curr = len(self) - 1
3032 3028 if not self._inline:
3033 3029 transaction.add(self._datafile, offset)
3034 3030 if self._sidedatafile:
3035 3031 transaction.add(self._sidedatafile, sidedata_offset)
3036 3032 transaction.add(self._indexfile, curr * len(entry))
3037 3033 if data[0]:
3038 3034 dfh.write(data[0])
3039 3035 dfh.write(data[1])
3040 3036 if sidedata:
3041 3037 sdfh.write(sidedata)
3042 3038 ifh.write(entry)
3043 3039 else:
3044 3040 offset += curr * self.index.entry_size
3045 3041 transaction.add(self._indexfile, offset)
3046 3042 ifh.write(entry)
3047 3043 ifh.write(data[0])
3048 3044 ifh.write(data[1])
3049 3045 assert not sidedata
3050 3046 self._enforceinlinesize(transaction)
3051 3047 if self._docket is not None:
3052 3048 # revlog-v2 always has 3 writing handles, help Pytype
3053 3049 wh1 = self._writinghandles[0]
3054 3050 wh2 = self._writinghandles[1]
3055 3051 wh3 = self._writinghandles[2]
3056 3052 assert wh1 is not None
3057 3053 assert wh2 is not None
3058 3054 assert wh3 is not None
3059 3055 self._docket.index_end = wh1.tell()
3060 3056 self._docket.data_end = wh2.tell()
3061 3057 self._docket.sidedata_end = wh3.tell()
3062 3058
3063 3059 nodemaputil.setup_persistent_nodemap(transaction, self)
3064 3060
3065 3061 def addgroup(
3066 3062 self,
3067 3063 deltas,
3068 3064 linkmapper,
3069 3065 transaction,
3070 3066 alwayscache=False,
3071 3067 addrevisioncb=None,
3072 3068 duplicaterevisioncb=None,
3073 3069 debug_info=None,
3074 3070 delta_base_reuse_policy=None,
3075 3071 ):
3076 3072 """
3077 3073 add a delta group
3078 3074
3079 3075 given a set of deltas, add them to the revision log. the
3080 3076 first delta is against its parent, which should be in our
3081 3077 log, the rest are against the previous delta.
3082 3078
3083 3079 If ``addrevisioncb`` is defined, it will be called with arguments of
3084 3080 this revlog and the node that was added.
3085 3081 """
3086 3082
3087 3083 if self._adding_group:
3088 3084 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3089 3085
3090 3086 # read the default delta-base reuse policy from revlog config if the
3091 3087 # group did not specify one.
3092 3088 if delta_base_reuse_policy is None:
3093 3089 if (
3094 3090 self.delta_config.general_delta
3095 3091 and self.delta_config.lazy_delta_base
3096 3092 ):
3097 3093 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3098 3094 else:
3099 3095 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3100 3096
3101 3097 self._adding_group = True
3102 3098 empty = True
3103 3099 try:
3104 3100 with self._writing(transaction):
3105 3101 write_debug = None
3106 3102 if self.delta_config.debug_delta:
3107 3103 write_debug = transaction._report
3108 3104 deltacomputer = deltautil.deltacomputer(
3109 3105 self,
3110 3106 write_debug=write_debug,
3111 3107 debug_info=debug_info,
3112 3108 )
3113 3109 # loop through our set of deltas
3114 3110 for data in deltas:
3115 3111 (
3116 3112 node,
3117 3113 p1,
3118 3114 p2,
3119 3115 linknode,
3120 3116 deltabase,
3121 3117 delta,
3122 3118 flags,
3123 3119 sidedata,
3124 3120 ) = data
3125 3121 link = linkmapper(linknode)
3126 3122 flags = flags or REVIDX_DEFAULT_FLAGS
3127 3123
3128 3124 rev = self.index.get_rev(node)
3129 3125 if rev is not None:
3130 3126 # this can happen if two branches make the same change
3131 3127 self._nodeduplicatecallback(transaction, rev)
3132 3128 if duplicaterevisioncb:
3133 3129 duplicaterevisioncb(self, rev)
3134 3130 empty = False
3135 3131 continue
3136 3132
3137 3133 for p in (p1, p2):
3138 3134 if not self.index.has_node(p):
3139 3135 raise error.LookupError(
3140 3136 p, self.radix, _(b'unknown parent')
3141 3137 )
3142 3138
3143 3139 if not self.index.has_node(deltabase):
3144 3140 raise error.LookupError(
3145 3141 deltabase, self.display_id, _(b'unknown delta base')
3146 3142 )
3147 3143
3148 3144 baserev = self.rev(deltabase)
3149 3145
3150 3146 if baserev != nullrev and self.iscensored(baserev):
3151 3147 # if base is censored, delta must be full replacement in a
3152 3148 # single patch operation
3153 3149 hlen = struct.calcsize(b">lll")
3154 3150 oldlen = self.rawsize(baserev)
3155 3151 newlen = len(delta) - hlen
3156 3152 if delta[:hlen] != mdiff.replacediffheader(
3157 3153 oldlen, newlen
3158 3154 ):
3159 3155 raise error.CensoredBaseError(
3160 3156 self.display_id, self.node(baserev)
3161 3157 )
3162 3158
3163 3159 if not flags and self._peek_iscensored(baserev, delta):
3164 3160 flags |= REVIDX_ISCENSORED
3165 3161
3166 3162 # We assume consumers of addrevisioncb will want to retrieve
3167 3163 # the added revision, which will require a call to
3168 3164 # revision(). revision() will fast path if there is a cache
3169 3165 # hit. So, we tell _addrevision() to always cache in this case.
3170 3166 # We're only using addgroup() in the context of changegroup
3171 3167 # generation so the revision data can always be handled as raw
3172 3168 # by the flagprocessor.
3173 3169 rev = self._addrevision(
3174 3170 node,
3175 3171 None,
3176 3172 transaction,
3177 3173 link,
3178 3174 p1,
3179 3175 p2,
3180 3176 flags,
3181 3177 (baserev, delta, delta_base_reuse_policy),
3182 3178 alwayscache=alwayscache,
3183 3179 deltacomputer=deltacomputer,
3184 3180 sidedata=sidedata,
3185 3181 )
3186 3182
3187 3183 if addrevisioncb:
3188 3184 addrevisioncb(self, rev)
3189 3185 empty = False
3190 3186 finally:
3191 3187 self._adding_group = False
3192 3188 return not empty
3193 3189
3194 3190 def iscensored(self, rev):
3195 3191 """Check if a file revision is censored."""
3196 3192 if not self.feature_config.censorable:
3197 3193 return False
3198 3194
3199 3195 return self.flags(rev) & REVIDX_ISCENSORED
3200 3196
3201 3197 def _peek_iscensored(self, baserev, delta):
3202 3198 """Quickly check if a delta produces a censored revision."""
3203 3199 if not self.feature_config.censorable:
3204 3200 return False
3205 3201
3206 3202 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3207 3203
3208 3204 def getstrippoint(self, minlink):
3209 3205 """find the minimum rev that must be stripped to strip the linkrev
3210 3206
3211 3207 Returns a tuple containing the minimum rev and a set of all revs that
3212 3208 have linkrevs that will be broken by this strip.
3213 3209 """
3214 3210 return storageutil.resolvestripinfo(
3215 3211 minlink,
3216 3212 len(self) - 1,
3217 3213 self.headrevs(),
3218 3214 self.linkrev,
3219 3215 self.parentrevs,
3220 3216 )
3221 3217
3222 3218 def strip(self, minlink, transaction):
3223 3219 """truncate the revlog on the first revision with a linkrev >= minlink
3224 3220
3225 3221 This function is called when we're stripping revision minlink and
3226 3222 its descendants from the repository.
3227 3223
3228 3224 We have to remove all revisions with linkrev >= minlink, because
3229 3225 the equivalent changelog revisions will be renumbered after the
3230 3226 strip.
3231 3227
3232 3228 So we truncate the revlog on the first of these revisions, and
3233 3229 trust that the caller has saved the revisions that shouldn't be
3234 3230 removed and that it'll re-add them after this truncation.
3235 3231 """
3236 3232 if len(self) == 0:
3237 3233 return
3238 3234
3239 3235 rev, _ = self.getstrippoint(minlink)
3240 3236 if rev == len(self):
3241 3237 return
3242 3238
3243 3239 # first truncate the files on disk
3244 3240 data_end = self.start(rev)
3245 3241 if not self._inline:
3246 3242 transaction.add(self._datafile, data_end)
3247 3243 end = rev * self.index.entry_size
3248 3244 else:
3249 3245 end = data_end + (rev * self.index.entry_size)
3250 3246
3251 3247 if self._sidedatafile:
3252 3248 sidedata_end = self.sidedata_cut_off(rev)
3253 3249 transaction.add(self._sidedatafile, sidedata_end)
3254 3250
3255 3251 transaction.add(self._indexfile, end)
3256 3252 if self._docket is not None:
3257 3253 # XXX we could, leverage the docket while stripping. However it is
3258 3254 # not powerfull enough at the time of this comment
3259 3255 self._docket.index_end = end
3260 3256 self._docket.data_end = data_end
3261 3257 self._docket.sidedata_end = sidedata_end
3262 3258 self._docket.write(transaction, stripping=True)
3263 3259
3264 3260 # then reset internal state in memory to forget those revisions
3265 3261 self._revisioncache = None
3266 3262 self._chaininfocache = util.lrucachedict(500)
3267 3263 self._segmentfile.clear_cache()
3268 3264 self._segmentfile_sidedata.clear_cache()
3269 3265
3270 3266 del self.index[rev:-1]
3271 3267
3272 3268 def checksize(self):
3273 3269 """Check size of index and data files
3274 3270
3275 3271 return a (dd, di) tuple.
3276 3272 - dd: extra bytes for the "data" file
3277 3273 - di: extra bytes for the "index" file
3278 3274
3279 3275 A healthy revlog will return (0, 0).
3280 3276 """
3281 3277 expected = 0
3282 3278 if len(self):
3283 3279 expected = max(0, self.end(len(self) - 1))
3284 3280
3285 3281 try:
3286 3282 with self._datafp() as f:
3287 3283 f.seek(0, io.SEEK_END)
3288 3284 actual = f.tell()
3289 3285 dd = actual - expected
3290 3286 except FileNotFoundError:
3291 3287 dd = 0
3292 3288
3293 3289 try:
3294 3290 f = self.opener(self._indexfile)
3295 3291 f.seek(0, io.SEEK_END)
3296 3292 actual = f.tell()
3297 3293 f.close()
3298 3294 s = self.index.entry_size
3299 3295 i = max(0, actual // s)
3300 3296 di = actual - (i * s)
3301 3297 if self._inline:
3302 3298 databytes = 0
3303 3299 for r in self:
3304 3300 databytes += max(0, self.length(r))
3305 3301 dd = 0
3306 3302 di = actual - len(self) * s - databytes
3307 3303 except FileNotFoundError:
3308 3304 di = 0
3309 3305
3310 3306 return (dd, di)
3311 3307
3312 3308 def files(self):
3313 3309 res = [self._indexfile]
3314 3310 if self._docket_file is None:
3315 3311 if not self._inline:
3316 3312 res.append(self._datafile)
3317 3313 else:
3318 3314 res.append(self._docket_file)
3319 3315 res.extend(self._docket.old_index_filepaths(include_empty=False))
3320 3316 if self._docket.data_end:
3321 3317 res.append(self._datafile)
3322 3318 res.extend(self._docket.old_data_filepaths(include_empty=False))
3323 3319 if self._docket.sidedata_end:
3324 3320 res.append(self._sidedatafile)
3325 3321 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3326 3322 return res
3327 3323
3328 3324 def emitrevisions(
3329 3325 self,
3330 3326 nodes,
3331 3327 nodesorder=None,
3332 3328 revisiondata=False,
3333 3329 assumehaveparentrevisions=False,
3334 3330 deltamode=repository.CG_DELTAMODE_STD,
3335 3331 sidedata_helpers=None,
3336 3332 debug_info=None,
3337 3333 ):
3338 3334 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3339 3335 raise error.ProgrammingError(
3340 3336 b'unhandled value for nodesorder: %s' % nodesorder
3341 3337 )
3342 3338
3343 3339 if nodesorder is None and not self.delta_config.general_delta:
3344 3340 nodesorder = b'storage'
3345 3341
3346 3342 if (
3347 3343 not self._storedeltachains
3348 3344 and deltamode != repository.CG_DELTAMODE_PREV
3349 3345 ):
3350 3346 deltamode = repository.CG_DELTAMODE_FULL
3351 3347
3352 3348 return storageutil.emitrevisions(
3353 3349 self,
3354 3350 nodes,
3355 3351 nodesorder,
3356 3352 revlogrevisiondelta,
3357 3353 deltaparentfn=self.deltaparent,
3358 3354 candeltafn=self._candelta,
3359 3355 rawsizefn=self.rawsize,
3360 3356 revdifffn=self.revdiff,
3361 3357 flagsfn=self.flags,
3362 3358 deltamode=deltamode,
3363 3359 revisiondata=revisiondata,
3364 3360 assumehaveparentrevisions=assumehaveparentrevisions,
3365 3361 sidedata_helpers=sidedata_helpers,
3366 3362 debug_info=debug_info,
3367 3363 )
3368 3364
3369 3365 DELTAREUSEALWAYS = b'always'
3370 3366 DELTAREUSESAMEREVS = b'samerevs'
3371 3367 DELTAREUSENEVER = b'never'
3372 3368
3373 3369 DELTAREUSEFULLADD = b'fulladd'
3374 3370
3375 3371 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3376 3372
3377 3373 def clone(
3378 3374 self,
3379 3375 tr,
3380 3376 destrevlog,
3381 3377 addrevisioncb=None,
3382 3378 deltareuse=DELTAREUSESAMEREVS,
3383 3379 forcedeltabothparents=None,
3384 3380 sidedata_helpers=None,
3385 3381 ):
3386 3382 """Copy this revlog to another, possibly with format changes.
3387 3383
3388 3384 The destination revlog will contain the same revisions and nodes.
3389 3385 However, it may not be bit-for-bit identical due to e.g. delta encoding
3390 3386 differences.
3391 3387
3392 3388 The ``deltareuse`` argument control how deltas from the existing revlog
3393 3389 are preserved in the destination revlog. The argument can have the
3394 3390 following values:
3395 3391
3396 3392 DELTAREUSEALWAYS
3397 3393 Deltas will always be reused (if possible), even if the destination
3398 3394 revlog would not select the same revisions for the delta. This is the
3399 3395 fastest mode of operation.
3400 3396 DELTAREUSESAMEREVS
3401 3397 Deltas will be reused if the destination revlog would pick the same
3402 3398 revisions for the delta. This mode strikes a balance between speed
3403 3399 and optimization.
3404 3400 DELTAREUSENEVER
3405 3401 Deltas will never be reused. This is the slowest mode of execution.
3406 3402 This mode can be used to recompute deltas (e.g. if the diff/delta
3407 3403 algorithm changes).
3408 3404 DELTAREUSEFULLADD
3409 3405 Revision will be re-added as if their were new content. This is
3410 3406 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3411 3407 eg: large file detection and handling.
3412 3408
3413 3409 Delta computation can be slow, so the choice of delta reuse policy can
3414 3410 significantly affect run time.
3415 3411
3416 3412 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3417 3413 two extremes. Deltas will be reused if they are appropriate. But if the
3418 3414 delta could choose a better revision, it will do so. This means if you
3419 3415 are converting a non-generaldelta revlog to a generaldelta revlog,
3420 3416 deltas will be recomputed if the delta's parent isn't a parent of the
3421 3417 revision.
3422 3418
3423 3419 In addition to the delta policy, the ``forcedeltabothparents``
3424 3420 argument controls whether to force compute deltas against both parents
3425 3421 for merges. By default, the current default is used.
3426 3422
3427 3423 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3428 3424 `sidedata_helpers`.
3429 3425 """
3430 3426 if deltareuse not in self.DELTAREUSEALL:
3431 3427 raise ValueError(
3432 3428 _(b'value for deltareuse invalid: %s') % deltareuse
3433 3429 )
3434 3430
3435 3431 if len(destrevlog):
3436 3432 raise ValueError(_(b'destination revlog is not empty'))
3437 3433
3438 3434 if getattr(self, 'filteredrevs', None):
3439 3435 raise ValueError(_(b'source revlog has filtered revisions'))
3440 3436 if getattr(destrevlog, 'filteredrevs', None):
3441 3437 raise ValueError(_(b'destination revlog has filtered revisions'))
3442 3438
3443 3439 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3444 3440 # if possible.
3445 3441 old_delta_config = destrevlog.delta_config
3446 3442 destrevlog.delta_config = destrevlog.delta_config.copy()
3447 3443
3448 3444 try:
3449 3445 if deltareuse == self.DELTAREUSEALWAYS:
3450 3446 destrevlog.delta_config.lazy_delta_base = True
3451 3447 destrevlog.delta_config.lazy_delta = True
3452 3448 elif deltareuse == self.DELTAREUSESAMEREVS:
3453 3449 destrevlog.delta_config.lazy_delta_base = False
3454 3450 destrevlog.delta_config.lazy_delta = True
3455 3451 elif deltareuse == self.DELTAREUSENEVER:
3456 3452 destrevlog.delta_config.lazy_delta_base = False
3457 3453 destrevlog.delta_config.lazy_delta = False
3458 3454
3459 3455 delta_both_parents = (
3460 3456 forcedeltabothparents or old_delta_config.delta_both_parents
3461 3457 )
3462 3458 destrevlog.delta_config.delta_both_parents = delta_both_parents
3463 3459
3464 3460 with self.reading(), destrevlog._writing(tr):
3465 3461 self._clone(
3466 3462 tr,
3467 3463 destrevlog,
3468 3464 addrevisioncb,
3469 3465 deltareuse,
3470 3466 forcedeltabothparents,
3471 3467 sidedata_helpers,
3472 3468 )
3473 3469
3474 3470 finally:
3475 3471 destrevlog.delta_config = old_delta_config
3476 3472
3477 3473 def _clone(
3478 3474 self,
3479 3475 tr,
3480 3476 destrevlog,
3481 3477 addrevisioncb,
3482 3478 deltareuse,
3483 3479 forcedeltabothparents,
3484 3480 sidedata_helpers,
3485 3481 ):
3486 3482 """perform the core duty of `revlog.clone` after parameter processing"""
3487 3483 write_debug = None
3488 3484 if self.delta_config.debug_delta:
3489 3485 write_debug = tr._report
3490 3486 deltacomputer = deltautil.deltacomputer(
3491 3487 destrevlog,
3492 3488 write_debug=write_debug,
3493 3489 )
3494 3490 index = self.index
3495 3491 for rev in self:
3496 3492 entry = index[rev]
3497 3493
3498 3494 # Some classes override linkrev to take filtered revs into
3499 3495 # account. Use raw entry from index.
3500 3496 flags = entry[0] & 0xFFFF
3501 3497 linkrev = entry[4]
3502 3498 p1 = index[entry[5]][7]
3503 3499 p2 = index[entry[6]][7]
3504 3500 node = entry[7]
3505 3501
3506 3502 # (Possibly) reuse the delta from the revlog if allowed and
3507 3503 # the revlog chunk is a delta.
3508 3504 cachedelta = None
3509 3505 rawtext = None
3510 3506 if deltareuse == self.DELTAREUSEFULLADD:
3511 3507 text = self._revisiondata(rev)
3512 3508 sidedata = self.sidedata(rev)
3513 3509
3514 3510 if sidedata_helpers is not None:
3515 3511 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3516 3512 self, sidedata_helpers, sidedata, rev
3517 3513 )
3518 3514 flags = flags | new_flags[0] & ~new_flags[1]
3519 3515
3520 3516 destrevlog.addrevision(
3521 3517 text,
3522 3518 tr,
3523 3519 linkrev,
3524 3520 p1,
3525 3521 p2,
3526 3522 cachedelta=cachedelta,
3527 3523 node=node,
3528 3524 flags=flags,
3529 3525 deltacomputer=deltacomputer,
3530 3526 sidedata=sidedata,
3531 3527 )
3532 3528 else:
3533 3529 if destrevlog.delta_config.lazy_delta:
3534 3530 dp = self.deltaparent(rev)
3535 3531 if dp != nullrev:
3536 3532 cachedelta = (dp, bytes(self._chunk(rev)))
3537 3533
3538 3534 sidedata = None
3539 3535 if not cachedelta:
3540 3536 try:
3541 3537 rawtext = self._revisiondata(rev)
3542 3538 except error.CensoredNodeError as censored:
3543 3539 assert flags & REVIDX_ISCENSORED
3544 3540 rawtext = censored.tombstone
3545 3541 sidedata = self.sidedata(rev)
3546 3542 if sidedata is None:
3547 3543 sidedata = self.sidedata(rev)
3548 3544
3549 3545 if sidedata_helpers is not None:
3550 3546 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3551 3547 self, sidedata_helpers, sidedata, rev
3552 3548 )
3553 3549 flags = flags | new_flags[0] & ~new_flags[1]
3554 3550
3555 3551 destrevlog._addrevision(
3556 3552 node,
3557 3553 rawtext,
3558 3554 tr,
3559 3555 linkrev,
3560 3556 p1,
3561 3557 p2,
3562 3558 flags,
3563 3559 cachedelta,
3564 3560 deltacomputer=deltacomputer,
3565 3561 sidedata=sidedata,
3566 3562 )
3567 3563
3568 3564 if addrevisioncb:
3569 3565 addrevisioncb(self, rev, node)
3570 3566
3571 3567 def censorrevision(self, tr, censornode, tombstone=b''):
3572 3568 if self._format_version == REVLOGV0:
3573 3569 raise error.RevlogError(
3574 3570 _(b'cannot censor with version %d revlogs')
3575 3571 % self._format_version
3576 3572 )
3577 3573 elif self._format_version == REVLOGV1:
3578 3574 rewrite.v1_censor(self, tr, censornode, tombstone)
3579 3575 else:
3580 3576 rewrite.v2_censor(self, tr, censornode, tombstone)
3581 3577
3582 3578 def verifyintegrity(self, state):
3583 3579 """Verifies the integrity of the revlog.
3584 3580
3585 3581 Yields ``revlogproblem`` instances describing problems that are
3586 3582 found.
3587 3583 """
3588 3584 dd, di = self.checksize()
3589 3585 if dd:
3590 3586 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3591 3587 if di:
3592 3588 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3593 3589
3594 3590 version = self._format_version
3595 3591
3596 3592 # The verifier tells us what version revlog we should be.
3597 3593 if version != state[b'expectedversion']:
3598 3594 yield revlogproblem(
3599 3595 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3600 3596 % (self.display_id, version, state[b'expectedversion'])
3601 3597 )
3602 3598
3603 3599 state[b'skipread'] = set()
3604 3600 state[b'safe_renamed'] = set()
3605 3601
3606 3602 for rev in self:
3607 3603 node = self.node(rev)
3608 3604
3609 3605 # Verify contents. 4 cases to care about:
3610 3606 #
3611 3607 # common: the most common case
3612 3608 # rename: with a rename
3613 3609 # meta: file content starts with b'\1\n', the metadata
3614 3610 # header defined in filelog.py, but without a rename
3615 3611 # ext: content stored externally
3616 3612 #
3617 3613 # More formally, their differences are shown below:
3618 3614 #
3619 3615 # | common | rename | meta | ext
3620 3616 # -------------------------------------------------------
3621 3617 # flags() | 0 | 0 | 0 | not 0
3622 3618 # renamed() | False | True | False | ?
3623 3619 # rawtext[0:2]=='\1\n'| False | True | True | ?
3624 3620 #
3625 3621 # "rawtext" means the raw text stored in revlog data, which
3626 3622 # could be retrieved by "rawdata(rev)". "text"
3627 3623 # mentioned below is "revision(rev)".
3628 3624 #
3629 3625 # There are 3 different lengths stored physically:
3630 3626 # 1. L1: rawsize, stored in revlog index
3631 3627 # 2. L2: len(rawtext), stored in revlog data
3632 3628 # 3. L3: len(text), stored in revlog data if flags==0, or
3633 3629 # possibly somewhere else if flags!=0
3634 3630 #
3635 3631 # L1 should be equal to L2. L3 could be different from them.
3636 3632 # "text" may or may not affect commit hash depending on flag
3637 3633 # processors (see flagutil.addflagprocessor).
3638 3634 #
3639 3635 # | common | rename | meta | ext
3640 3636 # -------------------------------------------------
3641 3637 # rawsize() | L1 | L1 | L1 | L1
3642 3638 # size() | L1 | L2-LM | L1(*) | L1 (?)
3643 3639 # len(rawtext) | L2 | L2 | L2 | L2
3644 3640 # len(text) | L2 | L2 | L2 | L3
3645 3641 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3646 3642 #
3647 3643 # LM: length of metadata, depending on rawtext
3648 3644 # (*): not ideal, see comment in filelog.size
3649 3645 # (?): could be "- len(meta)" if the resolved content has
3650 3646 # rename metadata
3651 3647 #
3652 3648 # Checks needed to be done:
3653 3649 # 1. length check: L1 == L2, in all cases.
3654 3650 # 2. hash check: depending on flag processor, we may need to
3655 3651 # use either "text" (external), or "rawtext" (in revlog).
3656 3652
3657 3653 try:
3658 3654 skipflags = state.get(b'skipflags', 0)
3659 3655 if skipflags:
3660 3656 skipflags &= self.flags(rev)
3661 3657
3662 3658 _verify_revision(self, skipflags, state, node)
3663 3659
3664 3660 l1 = self.rawsize(rev)
3665 3661 l2 = len(self.rawdata(node))
3666 3662
3667 3663 if l1 != l2:
3668 3664 yield revlogproblem(
3669 3665 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3670 3666 node=node,
3671 3667 )
3672 3668
3673 3669 except error.CensoredNodeError:
3674 3670 if state[b'erroroncensored']:
3675 3671 yield revlogproblem(
3676 3672 error=_(b'censored file data'), node=node
3677 3673 )
3678 3674 state[b'skipread'].add(node)
3679 3675 except Exception as e:
3680 3676 yield revlogproblem(
3681 3677 error=_(b'unpacking %s: %s')
3682 3678 % (short(node), stringutil.forcebytestr(e)),
3683 3679 node=node,
3684 3680 )
3685 3681 state[b'skipread'].add(node)
3686 3682
3687 3683 def storageinfo(
3688 3684 self,
3689 3685 exclusivefiles=False,
3690 3686 sharedfiles=False,
3691 3687 revisionscount=False,
3692 3688 trackedsize=False,
3693 3689 storedsize=False,
3694 3690 ):
3695 3691 d = {}
3696 3692
3697 3693 if exclusivefiles:
3698 3694 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3699 3695 if not self._inline:
3700 3696 d[b'exclusivefiles'].append((self.opener, self._datafile))
3701 3697
3702 3698 if sharedfiles:
3703 3699 d[b'sharedfiles'] = []
3704 3700
3705 3701 if revisionscount:
3706 3702 d[b'revisionscount'] = len(self)
3707 3703
3708 3704 if trackedsize:
3709 3705 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3710 3706
3711 3707 if storedsize:
3712 3708 d[b'storedsize'] = sum(
3713 3709 self.opener.stat(path).st_size for path in self.files()
3714 3710 )
3715 3711
3716 3712 return d
3717 3713
3718 3714 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3719 3715 if not self.feature_config.has_side_data:
3720 3716 return
3721 3717 # revlog formats with sidedata support does not support inline
3722 3718 assert not self._inline
3723 3719 if not helpers[1] and not helpers[2]:
3724 3720 # Nothing to generate or remove
3725 3721 return
3726 3722
3727 3723 new_entries = []
3728 3724 # append the new sidedata
3729 3725 with self._writing(transaction):
3730 3726 ifh, dfh, sdfh = self._writinghandles
3731 3727 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3732 3728
3733 3729 current_offset = sdfh.tell()
3734 3730 for rev in range(startrev, endrev + 1):
3735 3731 entry = self.index[rev]
3736 3732 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3737 3733 store=self,
3738 3734 sidedata_helpers=helpers,
3739 3735 sidedata={},
3740 3736 rev=rev,
3741 3737 )
3742 3738
3743 3739 serialized_sidedata = sidedatautil.serialize_sidedata(
3744 3740 new_sidedata
3745 3741 )
3746 3742
3747 3743 sidedata_compression_mode = COMP_MODE_INLINE
3748 3744 if serialized_sidedata and self.feature_config.has_side_data:
3749 3745 sidedata_compression_mode = COMP_MODE_PLAIN
3750 3746 h, comp_sidedata = self.compress(serialized_sidedata)
3751 3747 if (
3752 3748 h != b'u'
3753 3749 and comp_sidedata[0] != b'\0'
3754 3750 and len(comp_sidedata) < len(serialized_sidedata)
3755 3751 ):
3756 3752 assert not h
3757 3753 if (
3758 3754 comp_sidedata[0]
3759 3755 == self._docket.default_compression_header
3760 3756 ):
3761 3757 sidedata_compression_mode = COMP_MODE_DEFAULT
3762 3758 serialized_sidedata = comp_sidedata
3763 3759 else:
3764 3760 sidedata_compression_mode = COMP_MODE_INLINE
3765 3761 serialized_sidedata = comp_sidedata
3766 3762 if entry[8] != 0 or entry[9] != 0:
3767 3763 # rewriting entries that already have sidedata is not
3768 3764 # supported yet, because it introduces garbage data in the
3769 3765 # revlog.
3770 3766 msg = b"rewriting existing sidedata is not supported yet"
3771 3767 raise error.Abort(msg)
3772 3768
3773 3769 # Apply (potential) flags to add and to remove after running
3774 3770 # the sidedata helpers
3775 3771 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3776 3772 entry_update = (
3777 3773 current_offset,
3778 3774 len(serialized_sidedata),
3779 3775 new_offset_flags,
3780 3776 sidedata_compression_mode,
3781 3777 )
3782 3778
3783 3779 # the sidedata computation might have move the file cursors around
3784 3780 sdfh.seek(current_offset, os.SEEK_SET)
3785 3781 sdfh.write(serialized_sidedata)
3786 3782 new_entries.append(entry_update)
3787 3783 current_offset += len(serialized_sidedata)
3788 3784 self._docket.sidedata_end = sdfh.tell()
3789 3785
3790 3786 # rewrite the new index entries
3791 3787 ifh.seek(startrev * self.index.entry_size)
3792 3788 for i, e in enumerate(new_entries):
3793 3789 rev = startrev + i
3794 3790 self.index.replace_sidedata_info(rev, *e)
3795 3791 packed = self.index.entry_binary(rev)
3796 3792 if rev == 0 and self._docket is None:
3797 3793 header = self._format_flags | self._format_version
3798 3794 header = self.index.pack_header(header)
3799 3795 packed = header + packed
3800 3796 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now