##// END OF EJS Templates
revlog: also migrates `revlog.upperboundcomp` to ConfigClass...
marmoute -
r51976:5645524c default
parent child Browse files
Show More
@@ -1,3797 +1,3807 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 self.upperboundcomp = upperboundcomp
429 428
430 429 self.radix = radix
431 430
432 431 self._docket_file = None
433 432 self._indexfile = None
434 433 self._datafile = None
435 434 self._sidedatafile = None
436 435 self._nodemap_file = None
437 436 self.postfix = postfix
438 437 self._trypending = trypending
439 438 self._try_split = try_split
440 439 self.opener = opener
441 440 if persistentnodemap:
442 441 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 442
444 443 assert target[0] in ALL_KINDS
445 444 assert len(target) == 2
446 445 self.target = target
447 446 if b'feature-config' in self.opener.options:
448 447 self.feature_config = self.opener.options[b'feature-config'].copy()
449 448 else:
450 449 self.feature_config = FeatureConfig()
451 450 self.feature_config.censorable = censorable
452 451 self.feature_config.canonical_parent_order = canonical_parent_order
453 452 if b'data-config' in self.opener.options:
454 453 self.data_config = self.opener.options[b'data-config'].copy()
455 454 else:
456 455 self.data_config = DataConfig()
457 456 self.data_config.check_ambig = checkambig
458 457 self.data_config.mmap_large_index = mmaplargeindex
459 458 if b'delta-config' in self.opener.options:
460 459 self.delta_config = self.opener.options[b'delta-config'].copy()
461 460 else:
462 461 self.delta_config = DeltaConfig()
462 self.delta_config.upper_bound_comp = upperboundcomp
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 util.nouideprecwarn(
496 496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 497 )
498 498 return self.delta_config.general_delta
499 499
500 500 @property
501 501 def _checkambig(self):
502 502 """temporary compatibility proxy"""
503 503 util.nouideprecwarn(
504 504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 505 )
506 506 return self.data_config.check_ambig
507 507
508 508 @property
509 509 def _mmaplargeindex(self):
510 510 """temporary compatibility proxy"""
511 511 util.nouideprecwarn(
512 512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 513 )
514 514 return self.data_config.mmap_large_index
515 515
516 516 @property
517 517 def _censorable(self):
518 518 """temporary compatibility proxy"""
519 519 util.nouideprecwarn(
520 520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 521 )
522 522 return self.feature_config.censorable
523 523
524 524 @property
525 525 def _chunkcachesize(self):
526 526 """temporary compatibility proxy"""
527 527 util.nouideprecwarn(
528 528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 529 )
530 530 return self.data_config.chunk_cache_size
531 531
532 532 @property
533 533 def _maxchainlen(self):
534 534 """temporary compatibility proxy"""
535 535 util.nouideprecwarn(
536 536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 537 )
538 538 return self.delta_config.max_chain_len
539 539
540 540 @property
541 541 def _deltabothparents(self):
542 542 """temporary compatibility proxy"""
543 543 util.nouideprecwarn(
544 544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 545 )
546 546 return self.delta_config.delta_both_parents
547 547
548 548 @property
549 549 def _candidate_group_chunk_size(self):
550 550 """temporary compatibility proxy"""
551 551 util.nouideprecwarn(
552 552 b"use revlog.delta_config.candidate_group_chunk_size",
553 553 b"6.6",
554 554 stacklevel=2,
555 555 )
556 556 return self.delta_config.candidate_group_chunk_size
557 557
558 558 @property
559 559 def _debug_delta(self):
560 560 """temporary compatibility proxy"""
561 561 util.nouideprecwarn(
562 562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 563 )
564 564 return self.delta_config.debug_delta
565 565
566 566 @property
567 567 def _compengine(self):
568 568 """temporary compatibility proxy"""
569 569 util.nouideprecwarn(
570 570 b"use revlog.feature_config.compression_engine",
571 571 b"6.6",
572 572 stacklevel=2,
573 573 )
574 574 return self.feature_config.compression_engine
575 575
576 576 @property
577 def upperboundcomp(self):
578 """temporary compatibility proxy"""
579 util.nouideprecwarn(
580 b"use revlog.delta_config.upper_bound_comp",
581 b"6.6",
582 stacklevel=2,
583 )
584 return self.delta_config.upper_bound_comp
585
586 @property
577 587 def _compengineopts(self):
578 588 """temporary compatibility proxy"""
579 589 util.nouideprecwarn(
580 590 b"use revlog.feature_config.compression_engine_options",
581 591 b"6.6",
582 592 stacklevel=2,
583 593 )
584 594 return self.feature_config.compression_engine_options
585 595
586 596 @property
587 597 def _maxdeltachainspan(self):
588 598 """temporary compatibility proxy"""
589 599 util.nouideprecwarn(
590 600 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
591 601 )
592 602 return self.delta_config.max_deltachain_span
593 603
594 604 @property
595 605 def _withsparseread(self):
596 606 """temporary compatibility proxy"""
597 607 util.nouideprecwarn(
598 608 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
599 609 )
600 610 return self.data_config.with_sparse_read
601 611
602 612 @property
603 613 def _sparserevlog(self):
604 614 """temporary compatibility proxy"""
605 615 util.nouideprecwarn(
606 616 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
607 617 )
608 618 return self.delta_config.sparse_revlog
609 619
610 620 @property
611 621 def hassidedata(self):
612 622 """temporary compatibility proxy"""
613 623 util.nouideprecwarn(
614 624 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
615 625 )
616 626 return self.feature_config.has_side_data
617 627
618 628 @property
619 629 def _srdensitythreshold(self):
620 630 """temporary compatibility proxy"""
621 631 util.nouideprecwarn(
622 632 b"use revlog.data_config.sr_density_threshold",
623 633 b"6.6",
624 634 stacklevel=2,
625 635 )
626 636 return self.data_config.sr_density_threshold
627 637
628 638 @property
629 639 def _srmingapsize(self):
630 640 """temporary compatibility proxy"""
631 641 util.nouideprecwarn(
632 642 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
633 643 )
634 644 return self.data_config.sr_min_gap_size
635 645
636 646 @property
637 647 def _compute_rank(self):
638 648 """temporary compatibility proxy"""
639 649 util.nouideprecwarn(
640 650 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
641 651 )
642 652 return self.feature_config.compute_rank
643 653
644 654 @property
645 655 def canonical_parent_order(self):
646 656 """temporary compatibility proxy"""
647 657 util.nouideprecwarn(
648 658 b"use revlog.feature_config.canonical_parent_order",
649 659 b"6.6",
650 660 stacklevel=2,
651 661 )
652 662 return self.feature_config.canonical_parent_order
653 663
654 664 @property
655 665 def _lazydelta(self):
656 666 """temporary compatibility proxy"""
657 667 util.nouideprecwarn(
658 668 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
659 669 )
660 670 return self.delta_config.lazy_delta
661 671
662 672 @property
663 673 def _lazydeltabase(self):
664 674 """temporary compatibility proxy"""
665 675 util.nouideprecwarn(
666 676 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
667 677 )
668 678 return self.delta_config.lazy_delta_base
669 679
670 680 def _init_opts(self):
671 681 """process options (from above/config) to setup associated default revlog mode
672 682
673 683 These values might be affected when actually reading on disk information.
674 684
675 685 The relevant values are returned for use in _loadindex().
676 686
677 687 * newversionflags:
678 688 version header to use if we need to create a new revlog
679 689
680 690 * mmapindexthreshold:
681 691 minimal index size for start to use mmap
682 692
683 693 * force_nodemap:
684 694 force the usage of a "development" version of the nodemap code
685 695 """
686 696 opts = self.opener.options
687 697
688 698 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
689 699 new_header = CHANGELOGV2
690 700 compute_rank = opts.get(b'changelogv2.compute-rank', True)
691 701 self.feature_config.compute_rank = compute_rank
692 702 elif b'revlogv2' in opts:
693 703 new_header = REVLOGV2
694 704 elif b'revlogv1' in opts:
695 705 new_header = REVLOGV1 | FLAG_INLINE_DATA
696 706 if b'generaldelta' in opts:
697 707 new_header |= FLAG_GENERALDELTA
698 708 elif b'revlogv0' in self.opener.options:
699 709 new_header = REVLOGV0
700 710 else:
701 711 new_header = REVLOG_DEFAULT_VERSION
702 712
703 713 mmapindexthreshold = None
704 714 if self.data_config.mmap_large_index:
705 715 mmapindexthreshold = self.data_config.mmap_index_threshold
706 716 if self.feature_config.enable_ellipsis:
707 717 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
708 718
709 719 # revlog v0 doesn't have flag processors
710 720 for flag, processor in opts.get(b'flagprocessors', {}).items():
711 721 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
712 722
713 723 chunk_cache_size = self.data_config.chunk_cache_size
714 724 if chunk_cache_size <= 0:
715 725 raise error.RevlogError(
716 726 _(b'revlog chunk cache size %r is not greater than 0')
717 727 % chunk_cache_size
718 728 )
719 729 elif chunk_cache_size & (chunk_cache_size - 1):
720 730 raise error.RevlogError(
721 731 _(b'revlog chunk cache size %r is not a power of 2')
722 732 % chunk_cache_size
723 733 )
724 734 force_nodemap = opts.get(b'devel-force-nodemap', False)
725 735 return new_header, mmapindexthreshold, force_nodemap
726 736
727 737 def _get_data(self, filepath, mmap_threshold, size=None):
728 738 """return a file content with or without mmap
729 739
730 740 If the file is missing return the empty string"""
731 741 try:
732 742 with self.opener(filepath) as fp:
733 743 if mmap_threshold is not None:
734 744 file_size = self.opener.fstat(fp).st_size
735 745 if file_size >= mmap_threshold:
736 746 if size is not None:
737 747 # avoid potentiel mmap crash
738 748 size = min(file_size, size)
739 749 # TODO: should .close() to release resources without
740 750 # relying on Python GC
741 751 if size is None:
742 752 return util.buffer(util.mmapread(fp))
743 753 else:
744 754 return util.buffer(util.mmapread(fp, size))
745 755 if size is None:
746 756 return fp.read()
747 757 else:
748 758 return fp.read(size)
749 759 except FileNotFoundError:
750 760 return b''
751 761
752 762 def get_streams(self, max_linkrev, force_inline=False):
753 763 """return a list of streams that represent this revlog
754 764
755 765 This is used by stream-clone to do bytes to bytes copies of a repository.
756 766
757 767 This streams data for all revisions that refer to a changelog revision up
758 768 to `max_linkrev`.
759 769
760 770 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
761 771
762 772 It returns is a list of three-tuple:
763 773
764 774 [
765 775 (filename, bytes_stream, stream_size),
766 776 …
767 777 ]
768 778 """
769 779 n = len(self)
770 780 index = self.index
771 781 while n > 0:
772 782 linkrev = index[n - 1][4]
773 783 if linkrev < max_linkrev:
774 784 break
775 785 # note: this loop will rarely go through multiple iterations, since
776 786 # it only traverses commits created during the current streaming
777 787 # pull operation.
778 788 #
779 789 # If this become a problem, using a binary search should cap the
780 790 # runtime of this.
781 791 n = n - 1
782 792 if n == 0:
783 793 # no data to send
784 794 return []
785 795 index_size = n * index.entry_size
786 796 data_size = self.end(n - 1)
787 797
788 798 # XXX we might have been split (or stripped) since the object
789 799 # initialization, We need to close this race too, but having a way to
790 800 # pre-open the file we feed to the revlog and never closing them before
791 801 # we are done streaming.
792 802
793 803 if self._inline:
794 804
795 805 def get_stream():
796 806 with self.opener(self._indexfile, mode=b"r") as fp:
797 807 yield None
798 808 size = index_size + data_size
799 809 if size <= 65536:
800 810 yield fp.read(size)
801 811 else:
802 812 yield from util.filechunkiter(fp, limit=size)
803 813
804 814 inline_stream = get_stream()
805 815 next(inline_stream)
806 816 return [
807 817 (self._indexfile, inline_stream, index_size + data_size),
808 818 ]
809 819 elif force_inline:
810 820
811 821 def get_stream():
812 822 with self.reading():
813 823 yield None
814 824
815 825 for rev in range(n):
816 826 idx = self.index.entry_binary(rev)
817 827 if rev == 0 and self._docket is None:
818 828 # re-inject the inline flag
819 829 header = self._format_flags
820 830 header |= self._format_version
821 831 header |= FLAG_INLINE_DATA
822 832 header = self.index.pack_header(header)
823 833 idx = header + idx
824 834 yield idx
825 835 yield self._getsegmentforrevs(rev, rev)[1]
826 836
827 837 inline_stream = get_stream()
828 838 next(inline_stream)
829 839 return [
830 840 (self._indexfile, inline_stream, index_size + data_size),
831 841 ]
832 842 else:
833 843
834 844 def get_index_stream():
835 845 with self.opener(self._indexfile, mode=b"r") as fp:
836 846 yield None
837 847 if index_size <= 65536:
838 848 yield fp.read(index_size)
839 849 else:
840 850 yield from util.filechunkiter(fp, limit=index_size)
841 851
842 852 def get_data_stream():
843 853 with self._datafp() as fp:
844 854 yield None
845 855 if data_size <= 65536:
846 856 yield fp.read(data_size)
847 857 else:
848 858 yield from util.filechunkiter(fp, limit=data_size)
849 859
850 860 index_stream = get_index_stream()
851 861 next(index_stream)
852 862 data_stream = get_data_stream()
853 863 next(data_stream)
854 864 return [
855 865 (self._datafile, data_stream, data_size),
856 866 (self._indexfile, index_stream, index_size),
857 867 ]
858 868
859 869 def _loadindex(self, docket=None):
860 870
861 871 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
862 872
863 873 if self.postfix is not None:
864 874 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
865 875 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
866 876 entry_point = b'%s.i.a' % self.radix
867 877 elif self._try_split and self.opener.exists(self._split_index_file):
868 878 entry_point = self._split_index_file
869 879 else:
870 880 entry_point = b'%s.i' % self.radix
871 881
872 882 if docket is not None:
873 883 self._docket = docket
874 884 self._docket_file = entry_point
875 885 else:
876 886 self._initempty = True
877 887 entry_data = self._get_data(entry_point, mmapindexthreshold)
878 888 if len(entry_data) > 0:
879 889 header = INDEX_HEADER.unpack(entry_data[:4])[0]
880 890 self._initempty = False
881 891 else:
882 892 header = new_header
883 893
884 894 self._format_flags = header & ~0xFFFF
885 895 self._format_version = header & 0xFFFF
886 896
887 897 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
888 898 if supported_flags is None:
889 899 msg = _(b'unknown version (%d) in revlog %s')
890 900 msg %= (self._format_version, self.display_id)
891 901 raise error.RevlogError(msg)
892 902 elif self._format_flags & ~supported_flags:
893 903 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
894 904 display_flag = self._format_flags >> 16
895 905 msg %= (display_flag, self._format_version, self.display_id)
896 906 raise error.RevlogError(msg)
897 907
898 908 features = FEATURES_BY_VERSION[self._format_version]
899 909 self._inline = features[b'inline'](self._format_flags)
900 910 self.delta_config.general_delta = features[b'generaldelta'](
901 911 self._format_flags
902 912 )
903 913 self.feature_config.has_side_data = features[b'sidedata']
904 914
905 915 if not features[b'docket']:
906 916 self._indexfile = entry_point
907 917 index_data = entry_data
908 918 else:
909 919 self._docket_file = entry_point
910 920 if self._initempty:
911 921 self._docket = docketutil.default_docket(self, header)
912 922 else:
913 923 self._docket = docketutil.parse_docket(
914 924 self, entry_data, use_pending=self._trypending
915 925 )
916 926
917 927 if self._docket is not None:
918 928 self._indexfile = self._docket.index_filepath()
919 929 index_data = b''
920 930 index_size = self._docket.index_end
921 931 if index_size > 0:
922 932 index_data = self._get_data(
923 933 self._indexfile, mmapindexthreshold, size=index_size
924 934 )
925 935 if len(index_data) < index_size:
926 936 msg = _(b'too few index data for %s: got %d, expected %d')
927 937 msg %= (self.display_id, len(index_data), index_size)
928 938 raise error.RevlogError(msg)
929 939
930 940 self._inline = False
931 941 # generaldelta implied by version 2 revlogs.
932 942 self.delta_config.general_delta = True
933 943 # the logic for persistent nodemap will be dealt with within the
934 944 # main docket, so disable it for now.
935 945 self._nodemap_file = None
936 946
937 947 if self._docket is not None:
938 948 self._datafile = self._docket.data_filepath()
939 949 self._sidedatafile = self._docket.sidedata_filepath()
940 950 elif self.postfix is None:
941 951 self._datafile = b'%s.d' % self.radix
942 952 else:
943 953 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
944 954
945 955 self.nodeconstants = sha1nodeconstants
946 956 self.nullid = self.nodeconstants.nullid
947 957
948 958 # sparse-revlog can't be on without general-delta (issue6056)
949 959 if not self.delta_config.general_delta:
950 960 self.delta_config.sparse_revlog = False
951 961
952 962 self._storedeltachains = True
953 963
954 964 devel_nodemap = (
955 965 self._nodemap_file
956 966 and force_nodemap
957 967 and parse_index_v1_nodemap is not None
958 968 )
959 969
960 970 use_rust_index = False
961 971 if rustrevlog is not None:
962 972 if self._nodemap_file is not None:
963 973 use_rust_index = True
964 974 else:
965 975 use_rust_index = self.opener.options.get(b'rust.index')
966 976
967 977 self._parse_index = parse_index_v1
968 978 if self._format_version == REVLOGV0:
969 979 self._parse_index = revlogv0.parse_index_v0
970 980 elif self._format_version == REVLOGV2:
971 981 self._parse_index = parse_index_v2
972 982 elif self._format_version == CHANGELOGV2:
973 983 self._parse_index = parse_index_cl_v2
974 984 elif devel_nodemap:
975 985 self._parse_index = parse_index_v1_nodemap
976 986 elif use_rust_index:
977 987 self._parse_index = parse_index_v1_mixed
978 988 try:
979 989 d = self._parse_index(index_data, self._inline)
980 990 index, chunkcache = d
981 991 use_nodemap = (
982 992 not self._inline
983 993 and self._nodemap_file is not None
984 994 and hasattr(index, 'update_nodemap_data')
985 995 )
986 996 if use_nodemap:
987 997 nodemap_data = nodemaputil.persisted_data(self)
988 998 if nodemap_data is not None:
989 999 docket = nodemap_data[0]
990 1000 if (
991 1001 len(d[0]) > docket.tip_rev
992 1002 and d[0][docket.tip_rev][7] == docket.tip_node
993 1003 ):
994 1004 # no changelog tampering
995 1005 self._nodemap_docket = docket
996 1006 index.update_nodemap_data(*nodemap_data)
997 1007 except (ValueError, IndexError):
998 1008 raise error.RevlogError(
999 1009 _(b"index %s is corrupted") % self.display_id
1000 1010 )
1001 1011 self.index = index
1002 1012 self._segmentfile = randomaccessfile.randomaccessfile(
1003 1013 self.opener,
1004 1014 (self._indexfile if self._inline else self._datafile),
1005 1015 self.data_config.chunk_cache_size,
1006 1016 chunkcache,
1007 1017 )
1008 1018 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1009 1019 self.opener,
1010 1020 self._sidedatafile,
1011 1021 self.data_config.chunk_cache_size,
1012 1022 )
1013 1023 # revnum -> (chain-length, sum-delta-length)
1014 1024 self._chaininfocache = util.lrucachedict(500)
1015 1025 # revlog header -> revlog compressor
1016 1026 self._decompressors = {}
1017 1027
1018 1028 def get_revlog(self):
1019 1029 """simple function to mirror API of other not-really-revlog API"""
1020 1030 return self
1021 1031
1022 1032 @util.propertycache
1023 1033 def revlog_kind(self):
1024 1034 return self.target[0]
1025 1035
1026 1036 @util.propertycache
1027 1037 def display_id(self):
1028 1038 """The public facing "ID" of the revlog that we use in message"""
1029 1039 if self.revlog_kind == KIND_FILELOG:
1030 1040 # Reference the file without the "data/" prefix, so it is familiar
1031 1041 # to the user.
1032 1042 return self.target[1]
1033 1043 else:
1034 1044 return self.radix
1035 1045
1036 1046 def _get_decompressor(self, t):
1037 1047 try:
1038 1048 compressor = self._decompressors[t]
1039 1049 except KeyError:
1040 1050 try:
1041 1051 engine = util.compengines.forrevlogheader(t)
1042 1052 compressor = engine.revlogcompressor(
1043 1053 self.feature_config.compression_engine_options
1044 1054 )
1045 1055 self._decompressors[t] = compressor
1046 1056 except KeyError:
1047 1057 raise error.RevlogError(
1048 1058 _(b'unknown compression type %s') % binascii.hexlify(t)
1049 1059 )
1050 1060 return compressor
1051 1061
1052 1062 @util.propertycache
1053 1063 def _compressor(self):
1054 1064 engine = util.compengines[self.feature_config.compression_engine]
1055 1065 return engine.revlogcompressor(
1056 1066 self.feature_config.compression_engine_options
1057 1067 )
1058 1068
1059 1069 @util.propertycache
1060 1070 def _decompressor(self):
1061 1071 """the default decompressor"""
1062 1072 if self._docket is None:
1063 1073 return None
1064 1074 t = self._docket.default_compression_header
1065 1075 c = self._get_decompressor(t)
1066 1076 return c.decompress
1067 1077
1068 1078 def __index_write_fp(self):
1069 1079 # You should not use this directly and use `_writing` instead
1070 1080 try:
1071 1081 f = self.opener(
1072 1082 self._indexfile,
1073 1083 mode=b"r+",
1074 1084 checkambig=self.data_config.check_ambig,
1075 1085 )
1076 1086 if self._docket is None:
1077 1087 f.seek(0, os.SEEK_END)
1078 1088 else:
1079 1089 f.seek(self._docket.index_end, os.SEEK_SET)
1080 1090 return f
1081 1091 except FileNotFoundError:
1082 1092 return self.opener(
1083 1093 self._indexfile,
1084 1094 mode=b"w+",
1085 1095 checkambig=self.data_config.check_ambig,
1086 1096 )
1087 1097
1088 1098 def __index_new_fp(self):
1089 1099 # You should not use this unless you are upgrading from inline revlog
1090 1100 return self.opener(
1091 1101 self._indexfile,
1092 1102 mode=b"w",
1093 1103 checkambig=self.data_config.check_ambig,
1094 1104 atomictemp=True,
1095 1105 )
1096 1106
1097 1107 def _datafp(self, mode=b'r'):
1098 1108 """file object for the revlog's data file"""
1099 1109 return self.opener(self._datafile, mode=mode)
1100 1110
1101 1111 @contextlib.contextmanager
1102 1112 def _sidedatareadfp(self):
1103 1113 """file object suitable to read sidedata"""
1104 1114 if self._writinghandles:
1105 1115 yield self._writinghandles[2]
1106 1116 else:
1107 1117 with self.opener(self._sidedatafile) as fp:
1108 1118 yield fp
1109 1119
1110 1120 def tiprev(self):
1111 1121 return len(self.index) - 1
1112 1122
1113 1123 def tip(self):
1114 1124 return self.node(self.tiprev())
1115 1125
1116 1126 def __contains__(self, rev):
1117 1127 return 0 <= rev < len(self)
1118 1128
1119 1129 def __len__(self):
1120 1130 return len(self.index)
1121 1131
1122 1132 def __iter__(self):
1123 1133 return iter(range(len(self)))
1124 1134
1125 1135 def revs(self, start=0, stop=None):
1126 1136 """iterate over all rev in this revlog (from start to stop)"""
1127 1137 return storageutil.iterrevs(len(self), start=start, stop=stop)
1128 1138
1129 1139 def hasnode(self, node):
1130 1140 try:
1131 1141 self.rev(node)
1132 1142 return True
1133 1143 except KeyError:
1134 1144 return False
1135 1145
1136 1146 def _candelta(self, baserev, rev):
1137 1147 """whether two revisions (baserev, rev) can be delta-ed or not"""
1138 1148 # Disable delta if either rev requires a content-changing flag
1139 1149 # processor (ex. LFS). This is because such flag processor can alter
1140 1150 # the rawtext content that the delta will be based on, and two clients
1141 1151 # could have a same revlog node with different flags (i.e. different
1142 1152 # rawtext contents) and the delta could be incompatible.
1143 1153 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1144 1154 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1145 1155 ):
1146 1156 return False
1147 1157 return True
1148 1158
1149 1159 def update_caches(self, transaction):
1150 1160 """update on disk cache
1151 1161
1152 1162 If a transaction is passed, the update may be delayed to transaction
1153 1163 commit."""
1154 1164 if self._nodemap_file is not None:
1155 1165 if transaction is None:
1156 1166 nodemaputil.update_persistent_nodemap(self)
1157 1167 else:
1158 1168 nodemaputil.setup_persistent_nodemap(transaction, self)
1159 1169
1160 1170 def clearcaches(self):
1161 1171 """Clear in-memory caches"""
1162 1172 self._revisioncache = None
1163 1173 self._chainbasecache.clear()
1164 1174 self._segmentfile.clear_cache()
1165 1175 self._segmentfile_sidedata.clear_cache()
1166 1176 self._pcache = {}
1167 1177 self._nodemap_docket = None
1168 1178 self.index.clearcaches()
1169 1179 # The python code is the one responsible for validating the docket, we
1170 1180 # end up having to refresh it here.
1171 1181 use_nodemap = (
1172 1182 not self._inline
1173 1183 and self._nodemap_file is not None
1174 1184 and hasattr(self.index, 'update_nodemap_data')
1175 1185 )
1176 1186 if use_nodemap:
1177 1187 nodemap_data = nodemaputil.persisted_data(self)
1178 1188 if nodemap_data is not None:
1179 1189 self._nodemap_docket = nodemap_data[0]
1180 1190 self.index.update_nodemap_data(*nodemap_data)
1181 1191
1182 1192 def rev(self, node):
1183 1193 """return the revision number associated with a <nodeid>"""
1184 1194 try:
1185 1195 return self.index.rev(node)
1186 1196 except TypeError:
1187 1197 raise
1188 1198 except error.RevlogError:
1189 1199 # parsers.c radix tree lookup failed
1190 1200 if (
1191 1201 node == self.nodeconstants.wdirid
1192 1202 or node in self.nodeconstants.wdirfilenodeids
1193 1203 ):
1194 1204 raise error.WdirUnsupported
1195 1205 raise error.LookupError(node, self.display_id, _(b'no node'))
1196 1206
1197 1207 # Accessors for index entries.
1198 1208
1199 1209 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1200 1210 # are flags.
1201 1211 def start(self, rev):
1202 1212 return int(self.index[rev][0] >> 16)
1203 1213
1204 1214 def sidedata_cut_off(self, rev):
1205 1215 sd_cut_off = self.index[rev][8]
1206 1216 if sd_cut_off != 0:
1207 1217 return sd_cut_off
1208 1218 # This is some annoying dance, because entries without sidedata
1209 1219 # currently use 0 as their ofsset. (instead of previous-offset +
1210 1220 # previous-size)
1211 1221 #
1212 1222 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1213 1223 # In the meantime, we need this.
1214 1224 while 0 <= rev:
1215 1225 e = self.index[rev]
1216 1226 if e[9] != 0:
1217 1227 return e[8] + e[9]
1218 1228 rev -= 1
1219 1229 return 0
1220 1230
1221 1231 def flags(self, rev):
1222 1232 return self.index[rev][0] & 0xFFFF
1223 1233
1224 1234 def length(self, rev):
1225 1235 return self.index[rev][1]
1226 1236
1227 1237 def sidedata_length(self, rev):
1228 1238 if not self.feature_config.has_side_data:
1229 1239 return 0
1230 1240 return self.index[rev][9]
1231 1241
1232 1242 def rawsize(self, rev):
1233 1243 """return the length of the uncompressed text for a given revision"""
1234 1244 l = self.index[rev][2]
1235 1245 if l >= 0:
1236 1246 return l
1237 1247
1238 1248 t = self.rawdata(rev)
1239 1249 return len(t)
1240 1250
1241 1251 def size(self, rev):
1242 1252 """length of non-raw text (processed by a "read" flag processor)"""
1243 1253 # fast path: if no "read" flag processor could change the content,
1244 1254 # size is rawsize. note: ELLIPSIS is known to not change the content.
1245 1255 flags = self.flags(rev)
1246 1256 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1247 1257 return self.rawsize(rev)
1248 1258
1249 1259 return len(self.revision(rev))
1250 1260
1251 1261 def fast_rank(self, rev):
1252 1262 """Return the rank of a revision if already known, or None otherwise.
1253 1263
1254 1264 The rank of a revision is the size of the sub-graph it defines as a
1255 1265 head. Equivalently, the rank of a revision `r` is the size of the set
1256 1266 `ancestors(r)`, `r` included.
1257 1267
1258 1268 This method returns the rank retrieved from the revlog in constant
1259 1269 time. It makes no attempt at computing unknown values for versions of
1260 1270 the revlog which do not persist the rank.
1261 1271 """
1262 1272 rank = self.index[rev][ENTRY_RANK]
1263 1273 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1264 1274 return None
1265 1275 if rev == nullrev:
1266 1276 return 0 # convention
1267 1277 return rank
1268 1278
1269 1279 def chainbase(self, rev):
1270 1280 base = self._chainbasecache.get(rev)
1271 1281 if base is not None:
1272 1282 return base
1273 1283
1274 1284 index = self.index
1275 1285 iterrev = rev
1276 1286 base = index[iterrev][3]
1277 1287 while base != iterrev:
1278 1288 iterrev = base
1279 1289 base = index[iterrev][3]
1280 1290
1281 1291 self._chainbasecache[rev] = base
1282 1292 return base
1283 1293
1284 1294 def linkrev(self, rev):
1285 1295 return self.index[rev][4]
1286 1296
1287 1297 def parentrevs(self, rev):
1288 1298 try:
1289 1299 entry = self.index[rev]
1290 1300 except IndexError:
1291 1301 if rev == wdirrev:
1292 1302 raise error.WdirUnsupported
1293 1303 raise
1294 1304
1295 1305 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1296 1306 return entry[6], entry[5]
1297 1307 else:
1298 1308 return entry[5], entry[6]
1299 1309
1300 1310 # fast parentrevs(rev) where rev isn't filtered
1301 1311 _uncheckedparentrevs = parentrevs
1302 1312
1303 1313 def node(self, rev):
1304 1314 try:
1305 1315 return self.index[rev][7]
1306 1316 except IndexError:
1307 1317 if rev == wdirrev:
1308 1318 raise error.WdirUnsupported
1309 1319 raise
1310 1320
1311 1321 # Derived from index values.
1312 1322
1313 1323 def end(self, rev):
1314 1324 return self.start(rev) + self.length(rev)
1315 1325
1316 1326 def parents(self, node):
1317 1327 i = self.index
1318 1328 d = i[self.rev(node)]
1319 1329 # inline node() to avoid function call overhead
1320 1330 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1321 1331 return i[d[6]][7], i[d[5]][7]
1322 1332 else:
1323 1333 return i[d[5]][7], i[d[6]][7]
1324 1334
1325 1335 def chainlen(self, rev):
1326 1336 return self._chaininfo(rev)[0]
1327 1337
1328 1338 def _chaininfo(self, rev):
1329 1339 chaininfocache = self._chaininfocache
1330 1340 if rev in chaininfocache:
1331 1341 return chaininfocache[rev]
1332 1342 index = self.index
1333 1343 generaldelta = self.delta_config.general_delta
1334 1344 iterrev = rev
1335 1345 e = index[iterrev]
1336 1346 clen = 0
1337 1347 compresseddeltalen = 0
1338 1348 while iterrev != e[3]:
1339 1349 clen += 1
1340 1350 compresseddeltalen += e[1]
1341 1351 if generaldelta:
1342 1352 iterrev = e[3]
1343 1353 else:
1344 1354 iterrev -= 1
1345 1355 if iterrev in chaininfocache:
1346 1356 t = chaininfocache[iterrev]
1347 1357 clen += t[0]
1348 1358 compresseddeltalen += t[1]
1349 1359 break
1350 1360 e = index[iterrev]
1351 1361 else:
1352 1362 # Add text length of base since decompressing that also takes
1353 1363 # work. For cache hits the length is already included.
1354 1364 compresseddeltalen += e[1]
1355 1365 r = (clen, compresseddeltalen)
1356 1366 chaininfocache[rev] = r
1357 1367 return r
1358 1368
1359 1369 def _deltachain(self, rev, stoprev=None):
1360 1370 """Obtain the delta chain for a revision.
1361 1371
1362 1372 ``stoprev`` specifies a revision to stop at. If not specified, we
1363 1373 stop at the base of the chain.
1364 1374
1365 1375 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1366 1376 revs in ascending order and ``stopped`` is a bool indicating whether
1367 1377 ``stoprev`` was hit.
1368 1378 """
1369 1379 generaldelta = self.delta_config.general_delta
1370 1380 # Try C implementation.
1371 1381 try:
1372 1382 return self.index.deltachain(rev, stoprev, generaldelta)
1373 1383 except AttributeError:
1374 1384 pass
1375 1385
1376 1386 chain = []
1377 1387
1378 1388 # Alias to prevent attribute lookup in tight loop.
1379 1389 index = self.index
1380 1390
1381 1391 iterrev = rev
1382 1392 e = index[iterrev]
1383 1393 while iterrev != e[3] and iterrev != stoprev:
1384 1394 chain.append(iterrev)
1385 1395 if generaldelta:
1386 1396 iterrev = e[3]
1387 1397 else:
1388 1398 iterrev -= 1
1389 1399 e = index[iterrev]
1390 1400
1391 1401 if iterrev == stoprev:
1392 1402 stopped = True
1393 1403 else:
1394 1404 chain.append(iterrev)
1395 1405 stopped = False
1396 1406
1397 1407 chain.reverse()
1398 1408 return chain, stopped
1399 1409
1400 1410 def ancestors(self, revs, stoprev=0, inclusive=False):
1401 1411 """Generate the ancestors of 'revs' in reverse revision order.
1402 1412 Does not generate revs lower than stoprev.
1403 1413
1404 1414 See the documentation for ancestor.lazyancestors for more details."""
1405 1415
1406 1416 # first, make sure start revisions aren't filtered
1407 1417 revs = list(revs)
1408 1418 checkrev = self.node
1409 1419 for r in revs:
1410 1420 checkrev(r)
1411 1421 # and we're sure ancestors aren't filtered as well
1412 1422
1413 1423 if rustancestor is not None and self.index.rust_ext_compat:
1414 1424 lazyancestors = rustancestor.LazyAncestors
1415 1425 arg = self.index
1416 1426 else:
1417 1427 lazyancestors = ancestor.lazyancestors
1418 1428 arg = self._uncheckedparentrevs
1419 1429 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1420 1430
1421 1431 def descendants(self, revs):
1422 1432 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1423 1433
1424 1434 def findcommonmissing(self, common=None, heads=None):
1425 1435 """Return a tuple of the ancestors of common and the ancestors of heads
1426 1436 that are not ancestors of common. In revset terminology, we return the
1427 1437 tuple:
1428 1438
1429 1439 ::common, (::heads) - (::common)
1430 1440
1431 1441 The list is sorted by revision number, meaning it is
1432 1442 topologically sorted.
1433 1443
1434 1444 'heads' and 'common' are both lists of node IDs. If heads is
1435 1445 not supplied, uses all of the revlog's heads. If common is not
1436 1446 supplied, uses nullid."""
1437 1447 if common is None:
1438 1448 common = [self.nullid]
1439 1449 if heads is None:
1440 1450 heads = self.heads()
1441 1451
1442 1452 common = [self.rev(n) for n in common]
1443 1453 heads = [self.rev(n) for n in heads]
1444 1454
1445 1455 # we want the ancestors, but inclusive
1446 1456 class lazyset:
1447 1457 def __init__(self, lazyvalues):
1448 1458 self.addedvalues = set()
1449 1459 self.lazyvalues = lazyvalues
1450 1460
1451 1461 def __contains__(self, value):
1452 1462 return value in self.addedvalues or value in self.lazyvalues
1453 1463
1454 1464 def __iter__(self):
1455 1465 added = self.addedvalues
1456 1466 for r in added:
1457 1467 yield r
1458 1468 for r in self.lazyvalues:
1459 1469 if not r in added:
1460 1470 yield r
1461 1471
1462 1472 def add(self, value):
1463 1473 self.addedvalues.add(value)
1464 1474
1465 1475 def update(self, values):
1466 1476 self.addedvalues.update(values)
1467 1477
1468 1478 has = lazyset(self.ancestors(common))
1469 1479 has.add(nullrev)
1470 1480 has.update(common)
1471 1481
1472 1482 # take all ancestors from heads that aren't in has
1473 1483 missing = set()
1474 1484 visit = collections.deque(r for r in heads if r not in has)
1475 1485 while visit:
1476 1486 r = visit.popleft()
1477 1487 if r in missing:
1478 1488 continue
1479 1489 else:
1480 1490 missing.add(r)
1481 1491 for p in self.parentrevs(r):
1482 1492 if p not in has:
1483 1493 visit.append(p)
1484 1494 missing = list(missing)
1485 1495 missing.sort()
1486 1496 return has, [self.node(miss) for miss in missing]
1487 1497
1488 1498 def incrementalmissingrevs(self, common=None):
1489 1499 """Return an object that can be used to incrementally compute the
1490 1500 revision numbers of the ancestors of arbitrary sets that are not
1491 1501 ancestors of common. This is an ancestor.incrementalmissingancestors
1492 1502 object.
1493 1503
1494 1504 'common' is a list of revision numbers. If common is not supplied, uses
1495 1505 nullrev.
1496 1506 """
1497 1507 if common is None:
1498 1508 common = [nullrev]
1499 1509
1500 1510 if rustancestor is not None and self.index.rust_ext_compat:
1501 1511 return rustancestor.MissingAncestors(self.index, common)
1502 1512 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1503 1513
1504 1514 def findmissingrevs(self, common=None, heads=None):
1505 1515 """Return the revision numbers of the ancestors of heads that
1506 1516 are not ancestors of common.
1507 1517
1508 1518 More specifically, return a list of revision numbers corresponding to
1509 1519 nodes N such that every N satisfies the following constraints:
1510 1520
1511 1521 1. N is an ancestor of some node in 'heads'
1512 1522 2. N is not an ancestor of any node in 'common'
1513 1523
1514 1524 The list is sorted by revision number, meaning it is
1515 1525 topologically sorted.
1516 1526
1517 1527 'heads' and 'common' are both lists of revision numbers. If heads is
1518 1528 not supplied, uses all of the revlog's heads. If common is not
1519 1529 supplied, uses nullid."""
1520 1530 if common is None:
1521 1531 common = [nullrev]
1522 1532 if heads is None:
1523 1533 heads = self.headrevs()
1524 1534
1525 1535 inc = self.incrementalmissingrevs(common=common)
1526 1536 return inc.missingancestors(heads)
1527 1537
1528 1538 def findmissing(self, common=None, heads=None):
1529 1539 """Return the ancestors of heads that are not ancestors of common.
1530 1540
1531 1541 More specifically, return a list of nodes N such that every N
1532 1542 satisfies the following constraints:
1533 1543
1534 1544 1. N is an ancestor of some node in 'heads'
1535 1545 2. N is not an ancestor of any node in 'common'
1536 1546
1537 1547 The list is sorted by revision number, meaning it is
1538 1548 topologically sorted.
1539 1549
1540 1550 'heads' and 'common' are both lists of node IDs. If heads is
1541 1551 not supplied, uses all of the revlog's heads. If common is not
1542 1552 supplied, uses nullid."""
1543 1553 if common is None:
1544 1554 common = [self.nullid]
1545 1555 if heads is None:
1546 1556 heads = self.heads()
1547 1557
1548 1558 common = [self.rev(n) for n in common]
1549 1559 heads = [self.rev(n) for n in heads]
1550 1560
1551 1561 inc = self.incrementalmissingrevs(common=common)
1552 1562 return [self.node(r) for r in inc.missingancestors(heads)]
1553 1563
1554 1564 def nodesbetween(self, roots=None, heads=None):
1555 1565 """Return a topological path from 'roots' to 'heads'.
1556 1566
1557 1567 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1558 1568 topologically sorted list of all nodes N that satisfy both of
1559 1569 these constraints:
1560 1570
1561 1571 1. N is a descendant of some node in 'roots'
1562 1572 2. N is an ancestor of some node in 'heads'
1563 1573
1564 1574 Every node is considered to be both a descendant and an ancestor
1565 1575 of itself, so every reachable node in 'roots' and 'heads' will be
1566 1576 included in 'nodes'.
1567 1577
1568 1578 'outroots' is the list of reachable nodes in 'roots', i.e., the
1569 1579 subset of 'roots' that is returned in 'nodes'. Likewise,
1570 1580 'outheads' is the subset of 'heads' that is also in 'nodes'.
1571 1581
1572 1582 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1573 1583 unspecified, uses nullid as the only root. If 'heads' is
1574 1584 unspecified, uses list of all of the revlog's heads."""
1575 1585 nonodes = ([], [], [])
1576 1586 if roots is not None:
1577 1587 roots = list(roots)
1578 1588 if not roots:
1579 1589 return nonodes
1580 1590 lowestrev = min([self.rev(n) for n in roots])
1581 1591 else:
1582 1592 roots = [self.nullid] # Everybody's a descendant of nullid
1583 1593 lowestrev = nullrev
1584 1594 if (lowestrev == nullrev) and (heads is None):
1585 1595 # We want _all_ the nodes!
1586 1596 return (
1587 1597 [self.node(r) for r in self],
1588 1598 [self.nullid],
1589 1599 list(self.heads()),
1590 1600 )
1591 1601 if heads is None:
1592 1602 # All nodes are ancestors, so the latest ancestor is the last
1593 1603 # node.
1594 1604 highestrev = len(self) - 1
1595 1605 # Set ancestors to None to signal that every node is an ancestor.
1596 1606 ancestors = None
1597 1607 # Set heads to an empty dictionary for later discovery of heads
1598 1608 heads = {}
1599 1609 else:
1600 1610 heads = list(heads)
1601 1611 if not heads:
1602 1612 return nonodes
1603 1613 ancestors = set()
1604 1614 # Turn heads into a dictionary so we can remove 'fake' heads.
1605 1615 # Also, later we will be using it to filter out the heads we can't
1606 1616 # find from roots.
1607 1617 heads = dict.fromkeys(heads, False)
1608 1618 # Start at the top and keep marking parents until we're done.
1609 1619 nodestotag = set(heads)
1610 1620 # Remember where the top was so we can use it as a limit later.
1611 1621 highestrev = max([self.rev(n) for n in nodestotag])
1612 1622 while nodestotag:
1613 1623 # grab a node to tag
1614 1624 n = nodestotag.pop()
1615 1625 # Never tag nullid
1616 1626 if n == self.nullid:
1617 1627 continue
1618 1628 # A node's revision number represents its place in a
1619 1629 # topologically sorted list of nodes.
1620 1630 r = self.rev(n)
1621 1631 if r >= lowestrev:
1622 1632 if n not in ancestors:
1623 1633 # If we are possibly a descendant of one of the roots
1624 1634 # and we haven't already been marked as an ancestor
1625 1635 ancestors.add(n) # Mark as ancestor
1626 1636 # Add non-nullid parents to list of nodes to tag.
1627 1637 nodestotag.update(
1628 1638 [p for p in self.parents(n) if p != self.nullid]
1629 1639 )
1630 1640 elif n in heads: # We've seen it before, is it a fake head?
1631 1641 # So it is, real heads should not be the ancestors of
1632 1642 # any other heads.
1633 1643 heads.pop(n)
1634 1644 if not ancestors:
1635 1645 return nonodes
1636 1646 # Now that we have our set of ancestors, we want to remove any
1637 1647 # roots that are not ancestors.
1638 1648
1639 1649 # If one of the roots was nullid, everything is included anyway.
1640 1650 if lowestrev > nullrev:
1641 1651 # But, since we weren't, let's recompute the lowest rev to not
1642 1652 # include roots that aren't ancestors.
1643 1653
1644 1654 # Filter out roots that aren't ancestors of heads
1645 1655 roots = [root for root in roots if root in ancestors]
1646 1656 # Recompute the lowest revision
1647 1657 if roots:
1648 1658 lowestrev = min([self.rev(root) for root in roots])
1649 1659 else:
1650 1660 # No more roots? Return empty list
1651 1661 return nonodes
1652 1662 else:
1653 1663 # We are descending from nullid, and don't need to care about
1654 1664 # any other roots.
1655 1665 lowestrev = nullrev
1656 1666 roots = [self.nullid]
1657 1667 # Transform our roots list into a set.
1658 1668 descendants = set(roots)
1659 1669 # Also, keep the original roots so we can filter out roots that aren't
1660 1670 # 'real' roots (i.e. are descended from other roots).
1661 1671 roots = descendants.copy()
1662 1672 # Our topologically sorted list of output nodes.
1663 1673 orderedout = []
1664 1674 # Don't start at nullid since we don't want nullid in our output list,
1665 1675 # and if nullid shows up in descendants, empty parents will look like
1666 1676 # they're descendants.
1667 1677 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1668 1678 n = self.node(r)
1669 1679 isdescendant = False
1670 1680 if lowestrev == nullrev: # Everybody is a descendant of nullid
1671 1681 isdescendant = True
1672 1682 elif n in descendants:
1673 1683 # n is already a descendant
1674 1684 isdescendant = True
1675 1685 # This check only needs to be done here because all the roots
1676 1686 # will start being marked is descendants before the loop.
1677 1687 if n in roots:
1678 1688 # If n was a root, check if it's a 'real' root.
1679 1689 p = tuple(self.parents(n))
1680 1690 # If any of its parents are descendants, it's not a root.
1681 1691 if (p[0] in descendants) or (p[1] in descendants):
1682 1692 roots.remove(n)
1683 1693 else:
1684 1694 p = tuple(self.parents(n))
1685 1695 # A node is a descendant if either of its parents are
1686 1696 # descendants. (We seeded the dependents list with the roots
1687 1697 # up there, remember?)
1688 1698 if (p[0] in descendants) or (p[1] in descendants):
1689 1699 descendants.add(n)
1690 1700 isdescendant = True
1691 1701 if isdescendant and ((ancestors is None) or (n in ancestors)):
1692 1702 # Only include nodes that are both descendants and ancestors.
1693 1703 orderedout.append(n)
1694 1704 if (ancestors is not None) and (n in heads):
1695 1705 # We're trying to figure out which heads are reachable
1696 1706 # from roots.
1697 1707 # Mark this head as having been reached
1698 1708 heads[n] = True
1699 1709 elif ancestors is None:
1700 1710 # Otherwise, we're trying to discover the heads.
1701 1711 # Assume this is a head because if it isn't, the next step
1702 1712 # will eventually remove it.
1703 1713 heads[n] = True
1704 1714 # But, obviously its parents aren't.
1705 1715 for p in self.parents(n):
1706 1716 heads.pop(p, None)
1707 1717 heads = [head for head, flag in heads.items() if flag]
1708 1718 roots = list(roots)
1709 1719 assert orderedout
1710 1720 assert roots
1711 1721 assert heads
1712 1722 return (orderedout, roots, heads)
1713 1723
1714 1724 def headrevs(self, revs=None):
1715 1725 if revs is None:
1716 1726 try:
1717 1727 return self.index.headrevs()
1718 1728 except AttributeError:
1719 1729 return self._headrevs()
1720 1730 if rustdagop is not None and self.index.rust_ext_compat:
1721 1731 return rustdagop.headrevs(self.index, revs)
1722 1732 return dagop.headrevs(revs, self._uncheckedparentrevs)
1723 1733
1724 1734 def computephases(self, roots):
1725 1735 return self.index.computephasesmapsets(roots)
1726 1736
1727 1737 def _headrevs(self):
1728 1738 count = len(self)
1729 1739 if not count:
1730 1740 return [nullrev]
1731 1741 # we won't iter over filtered rev so nobody is a head at start
1732 1742 ishead = [0] * (count + 1)
1733 1743 index = self.index
1734 1744 for r in self:
1735 1745 ishead[r] = 1 # I may be an head
1736 1746 e = index[r]
1737 1747 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1738 1748 return [r for r, val in enumerate(ishead) if val]
1739 1749
1740 1750 def heads(self, start=None, stop=None):
1741 1751 """return the list of all nodes that have no children
1742 1752
1743 1753 if start is specified, only heads that are descendants of
1744 1754 start will be returned
1745 1755 if stop is specified, it will consider all the revs from stop
1746 1756 as if they had no children
1747 1757 """
1748 1758 if start is None and stop is None:
1749 1759 if not len(self):
1750 1760 return [self.nullid]
1751 1761 return [self.node(r) for r in self.headrevs()]
1752 1762
1753 1763 if start is None:
1754 1764 start = nullrev
1755 1765 else:
1756 1766 start = self.rev(start)
1757 1767
1758 1768 stoprevs = {self.rev(n) for n in stop or []}
1759 1769
1760 1770 revs = dagop.headrevssubset(
1761 1771 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1762 1772 )
1763 1773
1764 1774 return [self.node(rev) for rev in revs]
1765 1775
1766 1776 def children(self, node):
1767 1777 """find the children of a given node"""
1768 1778 c = []
1769 1779 p = self.rev(node)
1770 1780 for r in self.revs(start=p + 1):
1771 1781 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1772 1782 if prevs:
1773 1783 for pr in prevs:
1774 1784 if pr == p:
1775 1785 c.append(self.node(r))
1776 1786 elif p == nullrev:
1777 1787 c.append(self.node(r))
1778 1788 return c
1779 1789
1780 1790 def commonancestorsheads(self, a, b):
1781 1791 """calculate all the heads of the common ancestors of nodes a and b"""
1782 1792 a, b = self.rev(a), self.rev(b)
1783 1793 ancs = self._commonancestorsheads(a, b)
1784 1794 return pycompat.maplist(self.node, ancs)
1785 1795
1786 1796 def _commonancestorsheads(self, *revs):
1787 1797 """calculate all the heads of the common ancestors of revs"""
1788 1798 try:
1789 1799 ancs = self.index.commonancestorsheads(*revs)
1790 1800 except (AttributeError, OverflowError): # C implementation failed
1791 1801 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1792 1802 return ancs
1793 1803
1794 1804 def isancestor(self, a, b):
1795 1805 """return True if node a is an ancestor of node b
1796 1806
1797 1807 A revision is considered an ancestor of itself."""
1798 1808 a, b = self.rev(a), self.rev(b)
1799 1809 return self.isancestorrev(a, b)
1800 1810
1801 1811 def isancestorrev(self, a, b):
1802 1812 """return True if revision a is an ancestor of revision b
1803 1813
1804 1814 A revision is considered an ancestor of itself.
1805 1815
1806 1816 The implementation of this is trivial but the use of
1807 1817 reachableroots is not."""
1808 1818 if a == nullrev:
1809 1819 return True
1810 1820 elif a == b:
1811 1821 return True
1812 1822 elif a > b:
1813 1823 return False
1814 1824 return bool(self.reachableroots(a, [b], [a], includepath=False))
1815 1825
1816 1826 def reachableroots(self, minroot, heads, roots, includepath=False):
1817 1827 """return (heads(::(<roots> and <roots>::<heads>)))
1818 1828
1819 1829 If includepath is True, return (<roots>::<heads>)."""
1820 1830 try:
1821 1831 return self.index.reachableroots2(
1822 1832 minroot, heads, roots, includepath
1823 1833 )
1824 1834 except AttributeError:
1825 1835 return dagop._reachablerootspure(
1826 1836 self.parentrevs, minroot, roots, heads, includepath
1827 1837 )
1828 1838
1829 1839 def ancestor(self, a, b):
1830 1840 """calculate the "best" common ancestor of nodes a and b"""
1831 1841
1832 1842 a, b = self.rev(a), self.rev(b)
1833 1843 try:
1834 1844 ancs = self.index.ancestors(a, b)
1835 1845 except (AttributeError, OverflowError):
1836 1846 ancs = ancestor.ancestors(self.parentrevs, a, b)
1837 1847 if ancs:
1838 1848 # choose a consistent winner when there's a tie
1839 1849 return min(map(self.node, ancs))
1840 1850 return self.nullid
1841 1851
1842 1852 def _match(self, id):
1843 1853 if isinstance(id, int):
1844 1854 # rev
1845 1855 return self.node(id)
1846 1856 if len(id) == self.nodeconstants.nodelen:
1847 1857 # possibly a binary node
1848 1858 # odds of a binary node being all hex in ASCII are 1 in 10**25
1849 1859 try:
1850 1860 node = id
1851 1861 self.rev(node) # quick search the index
1852 1862 return node
1853 1863 except error.LookupError:
1854 1864 pass # may be partial hex id
1855 1865 try:
1856 1866 # str(rev)
1857 1867 rev = int(id)
1858 1868 if b"%d" % rev != id:
1859 1869 raise ValueError
1860 1870 if rev < 0:
1861 1871 rev = len(self) + rev
1862 1872 if rev < 0 or rev >= len(self):
1863 1873 raise ValueError
1864 1874 return self.node(rev)
1865 1875 except (ValueError, OverflowError):
1866 1876 pass
1867 1877 if len(id) == 2 * self.nodeconstants.nodelen:
1868 1878 try:
1869 1879 # a full hex nodeid?
1870 1880 node = bin(id)
1871 1881 self.rev(node)
1872 1882 return node
1873 1883 except (binascii.Error, error.LookupError):
1874 1884 pass
1875 1885
1876 1886 def _partialmatch(self, id):
1877 1887 # we don't care wdirfilenodeids as they should be always full hash
1878 1888 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1879 1889 ambiguous = False
1880 1890 try:
1881 1891 partial = self.index.partialmatch(id)
1882 1892 if partial and self.hasnode(partial):
1883 1893 if maybewdir:
1884 1894 # single 'ff...' match in radix tree, ambiguous with wdir
1885 1895 ambiguous = True
1886 1896 else:
1887 1897 return partial
1888 1898 elif maybewdir:
1889 1899 # no 'ff...' match in radix tree, wdir identified
1890 1900 raise error.WdirUnsupported
1891 1901 else:
1892 1902 return None
1893 1903 except error.RevlogError:
1894 1904 # parsers.c radix tree lookup gave multiple matches
1895 1905 # fast path: for unfiltered changelog, radix tree is accurate
1896 1906 if not getattr(self, 'filteredrevs', None):
1897 1907 ambiguous = True
1898 1908 # fall through to slow path that filters hidden revisions
1899 1909 except (AttributeError, ValueError):
1900 1910 # we are pure python, or key is not hex
1901 1911 pass
1902 1912 if ambiguous:
1903 1913 raise error.AmbiguousPrefixLookupError(
1904 1914 id, self.display_id, _(b'ambiguous identifier')
1905 1915 )
1906 1916
1907 1917 if id in self._pcache:
1908 1918 return self._pcache[id]
1909 1919
1910 1920 if len(id) <= 40:
1911 1921 # hex(node)[:...]
1912 1922 l = len(id) // 2 * 2 # grab an even number of digits
1913 1923 try:
1914 1924 # we're dropping the last digit, so let's check that it's hex,
1915 1925 # to avoid the expensive computation below if it's not
1916 1926 if len(id) % 2 > 0:
1917 1927 if not (id[-1] in hexdigits):
1918 1928 return None
1919 1929 prefix = bin(id[:l])
1920 1930 except binascii.Error:
1921 1931 pass
1922 1932 else:
1923 1933 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1924 1934 nl = [
1925 1935 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1926 1936 ]
1927 1937 if self.nodeconstants.nullhex.startswith(id):
1928 1938 nl.append(self.nullid)
1929 1939 if len(nl) > 0:
1930 1940 if len(nl) == 1 and not maybewdir:
1931 1941 self._pcache[id] = nl[0]
1932 1942 return nl[0]
1933 1943 raise error.AmbiguousPrefixLookupError(
1934 1944 id, self.display_id, _(b'ambiguous identifier')
1935 1945 )
1936 1946 if maybewdir:
1937 1947 raise error.WdirUnsupported
1938 1948 return None
1939 1949
1940 1950 def lookup(self, id):
1941 1951 """locate a node based on:
1942 1952 - revision number or str(revision number)
1943 1953 - nodeid or subset of hex nodeid
1944 1954 """
1945 1955 n = self._match(id)
1946 1956 if n is not None:
1947 1957 return n
1948 1958 n = self._partialmatch(id)
1949 1959 if n:
1950 1960 return n
1951 1961
1952 1962 raise error.LookupError(id, self.display_id, _(b'no match found'))
1953 1963
1954 1964 def shortest(self, node, minlength=1):
1955 1965 """Find the shortest unambiguous prefix that matches node."""
1956 1966
1957 1967 def isvalid(prefix):
1958 1968 try:
1959 1969 matchednode = self._partialmatch(prefix)
1960 1970 except error.AmbiguousPrefixLookupError:
1961 1971 return False
1962 1972 except error.WdirUnsupported:
1963 1973 # single 'ff...' match
1964 1974 return True
1965 1975 if matchednode is None:
1966 1976 raise error.LookupError(node, self.display_id, _(b'no node'))
1967 1977 return True
1968 1978
1969 1979 def maybewdir(prefix):
1970 1980 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1971 1981
1972 1982 hexnode = hex(node)
1973 1983
1974 1984 def disambiguate(hexnode, minlength):
1975 1985 """Disambiguate against wdirid."""
1976 1986 for length in range(minlength, len(hexnode) + 1):
1977 1987 prefix = hexnode[:length]
1978 1988 if not maybewdir(prefix):
1979 1989 return prefix
1980 1990
1981 1991 if not getattr(self, 'filteredrevs', None):
1982 1992 try:
1983 1993 length = max(self.index.shortest(node), minlength)
1984 1994 return disambiguate(hexnode, length)
1985 1995 except error.RevlogError:
1986 1996 if node != self.nodeconstants.wdirid:
1987 1997 raise error.LookupError(
1988 1998 node, self.display_id, _(b'no node')
1989 1999 )
1990 2000 except AttributeError:
1991 2001 # Fall through to pure code
1992 2002 pass
1993 2003
1994 2004 if node == self.nodeconstants.wdirid:
1995 2005 for length in range(minlength, len(hexnode) + 1):
1996 2006 prefix = hexnode[:length]
1997 2007 if isvalid(prefix):
1998 2008 return prefix
1999 2009
2000 2010 for length in range(minlength, len(hexnode) + 1):
2001 2011 prefix = hexnode[:length]
2002 2012 if isvalid(prefix):
2003 2013 return disambiguate(hexnode, length)
2004 2014
2005 2015 def cmp(self, node, text):
2006 2016 """compare text with a given file revision
2007 2017
2008 2018 returns True if text is different than what is stored.
2009 2019 """
2010 2020 p1, p2 = self.parents(node)
2011 2021 return storageutil.hashrevisionsha1(text, p1, p2) != node
2012 2022
2013 2023 def _getsegmentforrevs(self, startrev, endrev):
2014 2024 """Obtain a segment of raw data corresponding to a range of revisions.
2015 2025
2016 2026 Accepts the start and end revisions and an optional already-open
2017 2027 file handle to be used for reading. If the file handle is read, its
2018 2028 seek position will not be preserved.
2019 2029
2020 2030 Requests for data may be satisfied by a cache.
2021 2031
2022 2032 Returns a 2-tuple of (offset, data) for the requested range of
2023 2033 revisions. Offset is the integer offset from the beginning of the
2024 2034 revlog and data is a str or buffer of the raw byte data.
2025 2035
2026 2036 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2027 2037 to determine where each revision's data begins and ends.
2028 2038 """
2029 2039 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2030 2040 # (functions are expensive).
2031 2041 index = self.index
2032 2042 istart = index[startrev]
2033 2043 start = int(istart[0] >> 16)
2034 2044 if startrev == endrev:
2035 2045 end = start + istart[1]
2036 2046 else:
2037 2047 iend = index[endrev]
2038 2048 end = int(iend[0] >> 16) + iend[1]
2039 2049
2040 2050 if self._inline:
2041 2051 start += (startrev + 1) * self.index.entry_size
2042 2052 end += (endrev + 1) * self.index.entry_size
2043 2053 length = end - start
2044 2054
2045 2055 return start, self._segmentfile.read_chunk(start, length)
2046 2056
2047 2057 def _chunk(self, rev):
2048 2058 """Obtain a single decompressed chunk for a revision.
2049 2059
2050 2060 Accepts an integer revision and an optional already-open file handle
2051 2061 to be used for reading. If used, the seek position of the file will not
2052 2062 be preserved.
2053 2063
2054 2064 Returns a str holding uncompressed data for the requested revision.
2055 2065 """
2056 2066 compression_mode = self.index[rev][10]
2057 2067 data = self._getsegmentforrevs(rev, rev)[1]
2058 2068 if compression_mode == COMP_MODE_PLAIN:
2059 2069 return data
2060 2070 elif compression_mode == COMP_MODE_DEFAULT:
2061 2071 return self._decompressor(data)
2062 2072 elif compression_mode == COMP_MODE_INLINE:
2063 2073 return self.decompress(data)
2064 2074 else:
2065 2075 msg = b'unknown compression mode %d'
2066 2076 msg %= compression_mode
2067 2077 raise error.RevlogError(msg)
2068 2078
2069 2079 def _chunks(self, revs, targetsize=None):
2070 2080 """Obtain decompressed chunks for the specified revisions.
2071 2081
2072 2082 Accepts an iterable of numeric revisions that are assumed to be in
2073 2083 ascending order. Also accepts an optional already-open file handle
2074 2084 to be used for reading. If used, the seek position of the file will
2075 2085 not be preserved.
2076 2086
2077 2087 This function is similar to calling ``self._chunk()`` multiple times,
2078 2088 but is faster.
2079 2089
2080 2090 Returns a list with decompressed data for each requested revision.
2081 2091 """
2082 2092 if not revs:
2083 2093 return []
2084 2094 start = self.start
2085 2095 length = self.length
2086 2096 inline = self._inline
2087 2097 iosize = self.index.entry_size
2088 2098 buffer = util.buffer
2089 2099
2090 2100 l = []
2091 2101 ladd = l.append
2092 2102
2093 2103 if not self.data_config.with_sparse_read:
2094 2104 slicedchunks = (revs,)
2095 2105 else:
2096 2106 slicedchunks = deltautil.slicechunk(
2097 2107 self, revs, targetsize=targetsize
2098 2108 )
2099 2109
2100 2110 for revschunk in slicedchunks:
2101 2111 firstrev = revschunk[0]
2102 2112 # Skip trailing revisions with empty diff
2103 2113 for lastrev in revschunk[::-1]:
2104 2114 if length(lastrev) != 0:
2105 2115 break
2106 2116
2107 2117 try:
2108 2118 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2109 2119 except OverflowError:
2110 2120 # issue4215 - we can't cache a run of chunks greater than
2111 2121 # 2G on Windows
2112 2122 return [self._chunk(rev) for rev in revschunk]
2113 2123
2114 2124 decomp = self.decompress
2115 2125 # self._decompressor might be None, but will not be used in that case
2116 2126 def_decomp = self._decompressor
2117 2127 for rev in revschunk:
2118 2128 chunkstart = start(rev)
2119 2129 if inline:
2120 2130 chunkstart += (rev + 1) * iosize
2121 2131 chunklength = length(rev)
2122 2132 comp_mode = self.index[rev][10]
2123 2133 c = buffer(data, chunkstart - offset, chunklength)
2124 2134 if comp_mode == COMP_MODE_PLAIN:
2125 2135 ladd(c)
2126 2136 elif comp_mode == COMP_MODE_INLINE:
2127 2137 ladd(decomp(c))
2128 2138 elif comp_mode == COMP_MODE_DEFAULT:
2129 2139 ladd(def_decomp(c))
2130 2140 else:
2131 2141 msg = b'unknown compression mode %d'
2132 2142 msg %= comp_mode
2133 2143 raise error.RevlogError(msg)
2134 2144
2135 2145 return l
2136 2146
2137 2147 def deltaparent(self, rev):
2138 2148 """return deltaparent of the given revision"""
2139 2149 base = self.index[rev][3]
2140 2150 if base == rev:
2141 2151 return nullrev
2142 2152 elif self.delta_config.general_delta:
2143 2153 return base
2144 2154 else:
2145 2155 return rev - 1
2146 2156
2147 2157 def issnapshot(self, rev):
2148 2158 """tells whether rev is a snapshot"""
2149 2159 if not self.delta_config.sparse_revlog:
2150 2160 return self.deltaparent(rev) == nullrev
2151 2161 elif hasattr(self.index, 'issnapshot'):
2152 2162 # directly assign the method to cache the testing and access
2153 2163 self.issnapshot = self.index.issnapshot
2154 2164 return self.issnapshot(rev)
2155 2165 if rev == nullrev:
2156 2166 return True
2157 2167 entry = self.index[rev]
2158 2168 base = entry[3]
2159 2169 if base == rev:
2160 2170 return True
2161 2171 if base == nullrev:
2162 2172 return True
2163 2173 p1 = entry[5]
2164 2174 while self.length(p1) == 0:
2165 2175 b = self.deltaparent(p1)
2166 2176 if b == p1:
2167 2177 break
2168 2178 p1 = b
2169 2179 p2 = entry[6]
2170 2180 while self.length(p2) == 0:
2171 2181 b = self.deltaparent(p2)
2172 2182 if b == p2:
2173 2183 break
2174 2184 p2 = b
2175 2185 if base == p1 or base == p2:
2176 2186 return False
2177 2187 return self.issnapshot(base)
2178 2188
2179 2189 def snapshotdepth(self, rev):
2180 2190 """number of snapshot in the chain before this one"""
2181 2191 if not self.issnapshot(rev):
2182 2192 raise error.ProgrammingError(b'revision %d not a snapshot')
2183 2193 return len(self._deltachain(rev)[0]) - 1
2184 2194
2185 2195 def revdiff(self, rev1, rev2):
2186 2196 """return or calculate a delta between two revisions
2187 2197
2188 2198 The delta calculated is in binary form and is intended to be written to
2189 2199 revlog data directly. So this function needs raw revision data.
2190 2200 """
2191 2201 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2192 2202 return bytes(self._chunk(rev2))
2193 2203
2194 2204 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2195 2205
2196 2206 def revision(self, nodeorrev):
2197 2207 """return an uncompressed revision of a given node or revision
2198 2208 number.
2199 2209 """
2200 2210 return self._revisiondata(nodeorrev)
2201 2211
2202 2212 def sidedata(self, nodeorrev):
2203 2213 """a map of extra data related to the changeset but not part of the hash
2204 2214
2205 2215 This function currently return a dictionary. However, more advanced
2206 2216 mapping object will likely be used in the future for a more
2207 2217 efficient/lazy code.
2208 2218 """
2209 2219 # deal with <nodeorrev> argument type
2210 2220 if isinstance(nodeorrev, int):
2211 2221 rev = nodeorrev
2212 2222 else:
2213 2223 rev = self.rev(nodeorrev)
2214 2224 return self._sidedata(rev)
2215 2225
2216 2226 def _revisiondata(self, nodeorrev, raw=False):
2217 2227 # deal with <nodeorrev> argument type
2218 2228 if isinstance(nodeorrev, int):
2219 2229 rev = nodeorrev
2220 2230 node = self.node(rev)
2221 2231 else:
2222 2232 node = nodeorrev
2223 2233 rev = None
2224 2234
2225 2235 # fast path the special `nullid` rev
2226 2236 if node == self.nullid:
2227 2237 return b""
2228 2238
2229 2239 # ``rawtext`` is the text as stored inside the revlog. Might be the
2230 2240 # revision or might need to be processed to retrieve the revision.
2231 2241 rev, rawtext, validated = self._rawtext(node, rev)
2232 2242
2233 2243 if raw and validated:
2234 2244 # if we don't want to process the raw text and that raw
2235 2245 # text is cached, we can exit early.
2236 2246 return rawtext
2237 2247 if rev is None:
2238 2248 rev = self.rev(node)
2239 2249 # the revlog's flag for this revision
2240 2250 # (usually alter its state or content)
2241 2251 flags = self.flags(rev)
2242 2252
2243 2253 if validated and flags == REVIDX_DEFAULT_FLAGS:
2244 2254 # no extra flags set, no flag processor runs, text = rawtext
2245 2255 return rawtext
2246 2256
2247 2257 if raw:
2248 2258 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2249 2259 text = rawtext
2250 2260 else:
2251 2261 r = flagutil.processflagsread(self, rawtext, flags)
2252 2262 text, validatehash = r
2253 2263 if validatehash:
2254 2264 self.checkhash(text, node, rev=rev)
2255 2265 if not validated:
2256 2266 self._revisioncache = (node, rev, rawtext)
2257 2267
2258 2268 return text
2259 2269
2260 2270 def _rawtext(self, node, rev):
2261 2271 """return the possibly unvalidated rawtext for a revision
2262 2272
2263 2273 returns (rev, rawtext, validated)
2264 2274 """
2265 2275
2266 2276 # revision in the cache (could be useful to apply delta)
2267 2277 cachedrev = None
2268 2278 # An intermediate text to apply deltas to
2269 2279 basetext = None
2270 2280
2271 2281 # Check if we have the entry in cache
2272 2282 # The cache entry looks like (node, rev, rawtext)
2273 2283 if self._revisioncache:
2274 2284 if self._revisioncache[0] == node:
2275 2285 return (rev, self._revisioncache[2], True)
2276 2286 cachedrev = self._revisioncache[1]
2277 2287
2278 2288 if rev is None:
2279 2289 rev = self.rev(node)
2280 2290
2281 2291 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2282 2292 if stopped:
2283 2293 basetext = self._revisioncache[2]
2284 2294
2285 2295 # drop cache to save memory, the caller is expected to
2286 2296 # update self._revisioncache after validating the text
2287 2297 self._revisioncache = None
2288 2298
2289 2299 targetsize = None
2290 2300 rawsize = self.index[rev][2]
2291 2301 if 0 <= rawsize:
2292 2302 targetsize = 4 * rawsize
2293 2303
2294 2304 bins = self._chunks(chain, targetsize=targetsize)
2295 2305 if basetext is None:
2296 2306 basetext = bytes(bins[0])
2297 2307 bins = bins[1:]
2298 2308
2299 2309 rawtext = mdiff.patches(basetext, bins)
2300 2310 del basetext # let us have a chance to free memory early
2301 2311 return (rev, rawtext, False)
2302 2312
2303 2313 def _sidedata(self, rev):
2304 2314 """Return the sidedata for a given revision number."""
2305 2315 index_entry = self.index[rev]
2306 2316 sidedata_offset = index_entry[8]
2307 2317 sidedata_size = index_entry[9]
2308 2318
2309 2319 if self._inline:
2310 2320 sidedata_offset += self.index.entry_size * (1 + rev)
2311 2321 if sidedata_size == 0:
2312 2322 return {}
2313 2323
2314 2324 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2315 2325 filename = self._sidedatafile
2316 2326 end = self._docket.sidedata_end
2317 2327 offset = sidedata_offset
2318 2328 length = sidedata_size
2319 2329 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2320 2330 raise error.RevlogError(m)
2321 2331
2322 2332 comp_segment = self._segmentfile_sidedata.read_chunk(
2323 2333 sidedata_offset, sidedata_size
2324 2334 )
2325 2335
2326 2336 comp = self.index[rev][11]
2327 2337 if comp == COMP_MODE_PLAIN:
2328 2338 segment = comp_segment
2329 2339 elif comp == COMP_MODE_DEFAULT:
2330 2340 segment = self._decompressor(comp_segment)
2331 2341 elif comp == COMP_MODE_INLINE:
2332 2342 segment = self.decompress(comp_segment)
2333 2343 else:
2334 2344 msg = b'unknown compression mode %d'
2335 2345 msg %= comp
2336 2346 raise error.RevlogError(msg)
2337 2347
2338 2348 sidedata = sidedatautil.deserialize_sidedata(segment)
2339 2349 return sidedata
2340 2350
2341 2351 def rawdata(self, nodeorrev):
2342 2352 """return an uncompressed raw data of a given node or revision number."""
2343 2353 return self._revisiondata(nodeorrev, raw=True)
2344 2354
2345 2355 def hash(self, text, p1, p2):
2346 2356 """Compute a node hash.
2347 2357
2348 2358 Available as a function so that subclasses can replace the hash
2349 2359 as needed.
2350 2360 """
2351 2361 return storageutil.hashrevisionsha1(text, p1, p2)
2352 2362
2353 2363 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2354 2364 """Check node hash integrity.
2355 2365
2356 2366 Available as a function so that subclasses can extend hash mismatch
2357 2367 behaviors as needed.
2358 2368 """
2359 2369 try:
2360 2370 if p1 is None and p2 is None:
2361 2371 p1, p2 = self.parents(node)
2362 2372 if node != self.hash(text, p1, p2):
2363 2373 # Clear the revision cache on hash failure. The revision cache
2364 2374 # only stores the raw revision and clearing the cache does have
2365 2375 # the side-effect that we won't have a cache hit when the raw
2366 2376 # revision data is accessed. But this case should be rare and
2367 2377 # it is extra work to teach the cache about the hash
2368 2378 # verification state.
2369 2379 if self._revisioncache and self._revisioncache[0] == node:
2370 2380 self._revisioncache = None
2371 2381
2372 2382 revornode = rev
2373 2383 if revornode is None:
2374 2384 revornode = templatefilters.short(hex(node))
2375 2385 raise error.RevlogError(
2376 2386 _(b"integrity check failed on %s:%s")
2377 2387 % (self.display_id, pycompat.bytestr(revornode))
2378 2388 )
2379 2389 except error.RevlogError:
2380 2390 if self.feature_config.censorable and storageutil.iscensoredtext(
2381 2391 text
2382 2392 ):
2383 2393 raise error.CensoredNodeError(self.display_id, node, text)
2384 2394 raise
2385 2395
2386 2396 @property
2387 2397 def _split_index_file(self):
2388 2398 """the path where to expect the index of an ongoing splitting operation
2389 2399
2390 2400 The file will only exist if a splitting operation is in progress, but
2391 2401 it is always expected at the same location."""
2392 2402 parts = self.radix.split(b'/')
2393 2403 if len(parts) > 1:
2394 2404 # adds a '-s' prefix to the ``data/` or `meta/` base
2395 2405 head = parts[0] + b'-s'
2396 2406 mids = parts[1:-1]
2397 2407 tail = parts[-1] + b'.i'
2398 2408 pieces = [head] + mids + [tail]
2399 2409 return b'/'.join(pieces)
2400 2410 else:
2401 2411 # the revlog is stored at the root of the store (changelog or
2402 2412 # manifest), no risk of collision.
2403 2413 return self.radix + b'.i.s'
2404 2414
2405 2415 def _enforceinlinesize(self, tr, side_write=True):
2406 2416 """Check if the revlog is too big for inline and convert if so.
2407 2417
2408 2418 This should be called after revisions are added to the revlog. If the
2409 2419 revlog has grown too large to be an inline revlog, it will convert it
2410 2420 to use multiple index and data files.
2411 2421 """
2412 2422 tiprev = len(self) - 1
2413 2423 total_size = self.start(tiprev) + self.length(tiprev)
2414 2424 if not self._inline or total_size < _maxinline:
2415 2425 return
2416 2426
2417 2427 troffset = tr.findoffset(self._indexfile)
2418 2428 if troffset is None:
2419 2429 raise error.RevlogError(
2420 2430 _(b"%s not found in the transaction") % self._indexfile
2421 2431 )
2422 2432 if troffset:
2423 2433 tr.addbackup(self._indexfile, for_offset=True)
2424 2434 tr.add(self._datafile, 0)
2425 2435
2426 2436 existing_handles = False
2427 2437 if self._writinghandles is not None:
2428 2438 existing_handles = True
2429 2439 fp = self._writinghandles[0]
2430 2440 fp.flush()
2431 2441 fp.close()
2432 2442 # We can't use the cached file handle after close(). So prevent
2433 2443 # its usage.
2434 2444 self._writinghandles = None
2435 2445 self._segmentfile.writing_handle = None
2436 2446 # No need to deal with sidedata writing handle as it is only
2437 2447 # relevant with revlog-v2 which is never inline, not reaching
2438 2448 # this code
2439 2449 if side_write:
2440 2450 old_index_file_path = self._indexfile
2441 2451 new_index_file_path = self._split_index_file
2442 2452 opener = self.opener
2443 2453 weak_self = weakref.ref(self)
2444 2454
2445 2455 # the "split" index replace the real index when the transaction is finalized
2446 2456 def finalize_callback(tr):
2447 2457 opener.rename(
2448 2458 new_index_file_path,
2449 2459 old_index_file_path,
2450 2460 checkambig=True,
2451 2461 )
2452 2462 maybe_self = weak_self()
2453 2463 if maybe_self is not None:
2454 2464 maybe_self._indexfile = old_index_file_path
2455 2465
2456 2466 def abort_callback(tr):
2457 2467 maybe_self = weak_self()
2458 2468 if maybe_self is not None:
2459 2469 maybe_self._indexfile = old_index_file_path
2460 2470
2461 2471 tr.registertmp(new_index_file_path)
2462 2472 if self.target[1] is not None:
2463 2473 callback_id = b'000-revlog-split-%d-%s' % self.target
2464 2474 else:
2465 2475 callback_id = b'000-revlog-split-%d' % self.target[0]
2466 2476 tr.addfinalize(callback_id, finalize_callback)
2467 2477 tr.addabort(callback_id, abort_callback)
2468 2478
2469 2479 new_dfh = self._datafp(b'w+')
2470 2480 new_dfh.truncate(0) # drop any potentially existing data
2471 2481 try:
2472 2482 with self.reading():
2473 2483 for r in self:
2474 2484 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2475 2485 new_dfh.flush()
2476 2486
2477 2487 if side_write:
2478 2488 self._indexfile = new_index_file_path
2479 2489 with self.__index_new_fp() as fp:
2480 2490 self._format_flags &= ~FLAG_INLINE_DATA
2481 2491 self._inline = False
2482 2492 for i in self:
2483 2493 e = self.index.entry_binary(i)
2484 2494 if i == 0 and self._docket is None:
2485 2495 header = self._format_flags | self._format_version
2486 2496 header = self.index.pack_header(header)
2487 2497 e = header + e
2488 2498 fp.write(e)
2489 2499 if self._docket is not None:
2490 2500 self._docket.index_end = fp.tell()
2491 2501
2492 2502 # If we don't use side-write, the temp file replace the real
2493 2503 # index when we exit the context manager
2494 2504
2495 2505 nodemaputil.setup_persistent_nodemap(tr, self)
2496 2506 self._segmentfile = randomaccessfile.randomaccessfile(
2497 2507 self.opener,
2498 2508 self._datafile,
2499 2509 self.data_config.chunk_cache_size,
2500 2510 )
2501 2511
2502 2512 if existing_handles:
2503 2513 # switched from inline to conventional reopen the index
2504 2514 ifh = self.__index_write_fp()
2505 2515 self._writinghandles = (ifh, new_dfh, None)
2506 2516 self._segmentfile.writing_handle = new_dfh
2507 2517 new_dfh = None
2508 2518 # No need to deal with sidedata writing handle as it is only
2509 2519 # relevant with revlog-v2 which is never inline, not reaching
2510 2520 # this code
2511 2521 finally:
2512 2522 if new_dfh is not None:
2513 2523 new_dfh.close()
2514 2524
2515 2525 def _nodeduplicatecallback(self, transaction, node):
2516 2526 """called when trying to add a node already stored."""
2517 2527
2518 2528 @contextlib.contextmanager
2519 2529 def reading(self):
2520 2530 """Context manager that keeps data and sidedata files open for reading"""
2521 2531 if len(self.index) == 0:
2522 2532 yield # nothing to be read
2523 2533 else:
2524 2534 with self._segmentfile.reading():
2525 2535 with self._segmentfile_sidedata.reading():
2526 2536 yield
2527 2537
2528 2538 @contextlib.contextmanager
2529 2539 def _writing(self, transaction):
2530 2540 if self._trypending:
2531 2541 msg = b'try to write in a `trypending` revlog: %s'
2532 2542 msg %= self.display_id
2533 2543 raise error.ProgrammingError(msg)
2534 2544 if self._writinghandles is not None:
2535 2545 yield
2536 2546 else:
2537 2547 ifh = dfh = sdfh = None
2538 2548 try:
2539 2549 r = len(self)
2540 2550 # opening the data file.
2541 2551 dsize = 0
2542 2552 if r:
2543 2553 dsize = self.end(r - 1)
2544 2554 dfh = None
2545 2555 if not self._inline:
2546 2556 try:
2547 2557 dfh = self._datafp(b"r+")
2548 2558 if self._docket is None:
2549 2559 dfh.seek(0, os.SEEK_END)
2550 2560 else:
2551 2561 dfh.seek(self._docket.data_end, os.SEEK_SET)
2552 2562 except FileNotFoundError:
2553 2563 dfh = self._datafp(b"w+")
2554 2564 transaction.add(self._datafile, dsize)
2555 2565 if self._sidedatafile is not None:
2556 2566 # revlog-v2 does not inline, help Pytype
2557 2567 assert dfh is not None
2558 2568 try:
2559 2569 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2560 2570 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2561 2571 except FileNotFoundError:
2562 2572 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2563 2573 transaction.add(
2564 2574 self._sidedatafile, self._docket.sidedata_end
2565 2575 )
2566 2576
2567 2577 # opening the index file.
2568 2578 isize = r * self.index.entry_size
2569 2579 ifh = self.__index_write_fp()
2570 2580 if self._inline:
2571 2581 transaction.add(self._indexfile, dsize + isize)
2572 2582 else:
2573 2583 transaction.add(self._indexfile, isize)
2574 2584 # exposing all file handle for writing.
2575 2585 self._writinghandles = (ifh, dfh, sdfh)
2576 2586 self._segmentfile.writing_handle = ifh if self._inline else dfh
2577 2587 self._segmentfile_sidedata.writing_handle = sdfh
2578 2588 yield
2579 2589 if self._docket is not None:
2580 2590 self._write_docket(transaction)
2581 2591 finally:
2582 2592 self._writinghandles = None
2583 2593 self._segmentfile.writing_handle = None
2584 2594 self._segmentfile_sidedata.writing_handle = None
2585 2595 if dfh is not None:
2586 2596 dfh.close()
2587 2597 if sdfh is not None:
2588 2598 sdfh.close()
2589 2599 # closing the index file last to avoid exposing referent to
2590 2600 # potential unflushed data content.
2591 2601 if ifh is not None:
2592 2602 ifh.close()
2593 2603
2594 2604 def _write_docket(self, transaction):
2595 2605 """write the current docket on disk
2596 2606
2597 2607 Exist as a method to help changelog to implement transaction logic
2598 2608
2599 2609 We could also imagine using the same transaction logic for all revlog
2600 2610 since docket are cheap."""
2601 2611 self._docket.write(transaction)
2602 2612
2603 2613 def addrevision(
2604 2614 self,
2605 2615 text,
2606 2616 transaction,
2607 2617 link,
2608 2618 p1,
2609 2619 p2,
2610 2620 cachedelta=None,
2611 2621 node=None,
2612 2622 flags=REVIDX_DEFAULT_FLAGS,
2613 2623 deltacomputer=None,
2614 2624 sidedata=None,
2615 2625 ):
2616 2626 """add a revision to the log
2617 2627
2618 2628 text - the revision data to add
2619 2629 transaction - the transaction object used for rollback
2620 2630 link - the linkrev data to add
2621 2631 p1, p2 - the parent nodeids of the revision
2622 2632 cachedelta - an optional precomputed delta
2623 2633 node - nodeid of revision; typically node is not specified, and it is
2624 2634 computed by default as hash(text, p1, p2), however subclasses might
2625 2635 use different hashing method (and override checkhash() in such case)
2626 2636 flags - the known flags to set on the revision
2627 2637 deltacomputer - an optional deltacomputer instance shared between
2628 2638 multiple calls
2629 2639 """
2630 2640 if link == nullrev:
2631 2641 raise error.RevlogError(
2632 2642 _(b"attempted to add linkrev -1 to %s") % self.display_id
2633 2643 )
2634 2644
2635 2645 if sidedata is None:
2636 2646 sidedata = {}
2637 2647 elif sidedata and not self.feature_config.has_side_data:
2638 2648 raise error.ProgrammingError(
2639 2649 _(b"trying to add sidedata to a revlog who don't support them")
2640 2650 )
2641 2651
2642 2652 if flags:
2643 2653 node = node or self.hash(text, p1, p2)
2644 2654
2645 2655 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2646 2656
2647 2657 # If the flag processor modifies the revision data, ignore any provided
2648 2658 # cachedelta.
2649 2659 if rawtext != text:
2650 2660 cachedelta = None
2651 2661
2652 2662 if len(rawtext) > _maxentrysize:
2653 2663 raise error.RevlogError(
2654 2664 _(
2655 2665 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2656 2666 )
2657 2667 % (self.display_id, len(rawtext))
2658 2668 )
2659 2669
2660 2670 node = node or self.hash(rawtext, p1, p2)
2661 2671 rev = self.index.get_rev(node)
2662 2672 if rev is not None:
2663 2673 return rev
2664 2674
2665 2675 if validatehash:
2666 2676 self.checkhash(rawtext, node, p1=p1, p2=p2)
2667 2677
2668 2678 return self.addrawrevision(
2669 2679 rawtext,
2670 2680 transaction,
2671 2681 link,
2672 2682 p1,
2673 2683 p2,
2674 2684 node,
2675 2685 flags,
2676 2686 cachedelta=cachedelta,
2677 2687 deltacomputer=deltacomputer,
2678 2688 sidedata=sidedata,
2679 2689 )
2680 2690
2681 2691 def addrawrevision(
2682 2692 self,
2683 2693 rawtext,
2684 2694 transaction,
2685 2695 link,
2686 2696 p1,
2687 2697 p2,
2688 2698 node,
2689 2699 flags,
2690 2700 cachedelta=None,
2691 2701 deltacomputer=None,
2692 2702 sidedata=None,
2693 2703 ):
2694 2704 """add a raw revision with known flags, node and parents
2695 2705 useful when reusing a revision not stored in this revlog (ex: received
2696 2706 over wire, or read from an external bundle).
2697 2707 """
2698 2708 with self._writing(transaction):
2699 2709 return self._addrevision(
2700 2710 node,
2701 2711 rawtext,
2702 2712 transaction,
2703 2713 link,
2704 2714 p1,
2705 2715 p2,
2706 2716 flags,
2707 2717 cachedelta,
2708 2718 deltacomputer=deltacomputer,
2709 2719 sidedata=sidedata,
2710 2720 )
2711 2721
2712 2722 def compress(self, data):
2713 2723 """Generate a possibly-compressed representation of data."""
2714 2724 if not data:
2715 2725 return b'', data
2716 2726
2717 2727 compressed = self._compressor.compress(data)
2718 2728
2719 2729 if compressed:
2720 2730 # The revlog compressor added the header in the returned data.
2721 2731 return b'', compressed
2722 2732
2723 2733 if data[0:1] == b'\0':
2724 2734 return b'', data
2725 2735 return b'u', data
2726 2736
2727 2737 def decompress(self, data):
2728 2738 """Decompress a revlog chunk.
2729 2739
2730 2740 The chunk is expected to begin with a header identifying the
2731 2741 format type so it can be routed to an appropriate decompressor.
2732 2742 """
2733 2743 if not data:
2734 2744 return data
2735 2745
2736 2746 # Revlogs are read much more frequently than they are written and many
2737 2747 # chunks only take microseconds to decompress, so performance is
2738 2748 # important here.
2739 2749 #
2740 2750 # We can make a few assumptions about revlogs:
2741 2751 #
2742 2752 # 1) the majority of chunks will be compressed (as opposed to inline
2743 2753 # raw data).
2744 2754 # 2) decompressing *any* data will likely by at least 10x slower than
2745 2755 # returning raw inline data.
2746 2756 # 3) we want to prioritize common and officially supported compression
2747 2757 # engines
2748 2758 #
2749 2759 # It follows that we want to optimize for "decompress compressed data
2750 2760 # when encoded with common and officially supported compression engines"
2751 2761 # case over "raw data" and "data encoded by less common or non-official
2752 2762 # compression engines." That is why we have the inline lookup first
2753 2763 # followed by the compengines lookup.
2754 2764 #
2755 2765 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2756 2766 # compressed chunks. And this matters for changelog and manifest reads.
2757 2767 t = data[0:1]
2758 2768
2759 2769 if t == b'x':
2760 2770 try:
2761 2771 return _zlibdecompress(data)
2762 2772 except zlib.error as e:
2763 2773 raise error.RevlogError(
2764 2774 _(b'revlog decompress error: %s')
2765 2775 % stringutil.forcebytestr(e)
2766 2776 )
2767 2777 # '\0' is more common than 'u' so it goes first.
2768 2778 elif t == b'\0':
2769 2779 return data
2770 2780 elif t == b'u':
2771 2781 return util.buffer(data, 1)
2772 2782
2773 2783 compressor = self._get_decompressor(t)
2774 2784
2775 2785 return compressor.decompress(data)
2776 2786
2777 2787 def _addrevision(
2778 2788 self,
2779 2789 node,
2780 2790 rawtext,
2781 2791 transaction,
2782 2792 link,
2783 2793 p1,
2784 2794 p2,
2785 2795 flags,
2786 2796 cachedelta,
2787 2797 alwayscache=False,
2788 2798 deltacomputer=None,
2789 2799 sidedata=None,
2790 2800 ):
2791 2801 """internal function to add revisions to the log
2792 2802
2793 2803 see addrevision for argument descriptions.
2794 2804
2795 2805 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2796 2806
2797 2807 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2798 2808 be used.
2799 2809
2800 2810 invariants:
2801 2811 - rawtext is optional (can be None); if not set, cachedelta must be set.
2802 2812 if both are set, they must correspond to each other.
2803 2813 """
2804 2814 if node == self.nullid:
2805 2815 raise error.RevlogError(
2806 2816 _(b"%s: attempt to add null revision") % self.display_id
2807 2817 )
2808 2818 if (
2809 2819 node == self.nodeconstants.wdirid
2810 2820 or node in self.nodeconstants.wdirfilenodeids
2811 2821 ):
2812 2822 raise error.RevlogError(
2813 2823 _(b"%s: attempt to add wdir revision") % self.display_id
2814 2824 )
2815 2825 if self._writinghandles is None:
2816 2826 msg = b'adding revision outside `revlog._writing` context'
2817 2827 raise error.ProgrammingError(msg)
2818 2828
2819 2829 btext = [rawtext]
2820 2830
2821 2831 curr = len(self)
2822 2832 prev = curr - 1
2823 2833
2824 2834 offset = self._get_data_offset(prev)
2825 2835
2826 2836 if self._concurrencychecker:
2827 2837 ifh, dfh, sdfh = self._writinghandles
2828 2838 # XXX no checking for the sidedata file
2829 2839 if self._inline:
2830 2840 # offset is "as if" it were in the .d file, so we need to add on
2831 2841 # the size of the entry metadata.
2832 2842 self._concurrencychecker(
2833 2843 ifh, self._indexfile, offset + curr * self.index.entry_size
2834 2844 )
2835 2845 else:
2836 2846 # Entries in the .i are a consistent size.
2837 2847 self._concurrencychecker(
2838 2848 ifh, self._indexfile, curr * self.index.entry_size
2839 2849 )
2840 2850 self._concurrencychecker(dfh, self._datafile, offset)
2841 2851
2842 2852 p1r, p2r = self.rev(p1), self.rev(p2)
2843 2853
2844 2854 # full versions are inserted when the needed deltas
2845 2855 # become comparable to the uncompressed text
2846 2856 if rawtext is None:
2847 2857 # need rawtext size, before changed by flag processors, which is
2848 2858 # the non-raw size. use revlog explicitly to avoid filelog's extra
2849 2859 # logic that might remove metadata size.
2850 2860 textlen = mdiff.patchedsize(
2851 2861 revlog.size(self, cachedelta[0]), cachedelta[1]
2852 2862 )
2853 2863 else:
2854 2864 textlen = len(rawtext)
2855 2865
2856 2866 if deltacomputer is None:
2857 2867 write_debug = None
2858 2868 if self.delta_config.debug_delta:
2859 2869 write_debug = transaction._report
2860 2870 deltacomputer = deltautil.deltacomputer(
2861 2871 self, write_debug=write_debug
2862 2872 )
2863 2873
2864 2874 if cachedelta is not None and len(cachedelta) == 2:
2865 2875 # If the cached delta has no information about how it should be
2866 2876 # reused, add the default reuse instruction according to the
2867 2877 # revlog's configuration.
2868 2878 if (
2869 2879 self.delta_config.general_delta
2870 2880 and self.delta_config.lazy_delta_base
2871 2881 ):
2872 2882 delta_base_reuse = DELTA_BASE_REUSE_TRY
2873 2883 else:
2874 2884 delta_base_reuse = DELTA_BASE_REUSE_NO
2875 2885 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2876 2886
2877 2887 revinfo = revlogutils.revisioninfo(
2878 2888 node,
2879 2889 p1,
2880 2890 p2,
2881 2891 btext,
2882 2892 textlen,
2883 2893 cachedelta,
2884 2894 flags,
2885 2895 )
2886 2896
2887 2897 deltainfo = deltacomputer.finddeltainfo(revinfo)
2888 2898
2889 2899 compression_mode = COMP_MODE_INLINE
2890 2900 if self._docket is not None:
2891 2901 default_comp = self._docket.default_compression_header
2892 2902 r = deltautil.delta_compression(default_comp, deltainfo)
2893 2903 compression_mode, deltainfo = r
2894 2904
2895 2905 sidedata_compression_mode = COMP_MODE_INLINE
2896 2906 if sidedata and self.feature_config.has_side_data:
2897 2907 sidedata_compression_mode = COMP_MODE_PLAIN
2898 2908 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2899 2909 sidedata_offset = self._docket.sidedata_end
2900 2910 h, comp_sidedata = self.compress(serialized_sidedata)
2901 2911 if (
2902 2912 h != b'u'
2903 2913 and comp_sidedata[0:1] != b'\0'
2904 2914 and len(comp_sidedata) < len(serialized_sidedata)
2905 2915 ):
2906 2916 assert not h
2907 2917 if (
2908 2918 comp_sidedata[0:1]
2909 2919 == self._docket.default_compression_header
2910 2920 ):
2911 2921 sidedata_compression_mode = COMP_MODE_DEFAULT
2912 2922 serialized_sidedata = comp_sidedata
2913 2923 else:
2914 2924 sidedata_compression_mode = COMP_MODE_INLINE
2915 2925 serialized_sidedata = comp_sidedata
2916 2926 else:
2917 2927 serialized_sidedata = b""
2918 2928 # Don't store the offset if the sidedata is empty, that way
2919 2929 # we can easily detect empty sidedata and they will be no different
2920 2930 # than ones we manually add.
2921 2931 sidedata_offset = 0
2922 2932
2923 2933 rank = RANK_UNKNOWN
2924 2934 if self.feature_config.compute_rank:
2925 2935 if (p1r, p2r) == (nullrev, nullrev):
2926 2936 rank = 1
2927 2937 elif p1r != nullrev and p2r == nullrev:
2928 2938 rank = 1 + self.fast_rank(p1r)
2929 2939 elif p1r == nullrev and p2r != nullrev:
2930 2940 rank = 1 + self.fast_rank(p2r)
2931 2941 else: # merge node
2932 2942 if rustdagop is not None and self.index.rust_ext_compat:
2933 2943 rank = rustdagop.rank(self.index, p1r, p2r)
2934 2944 else:
2935 2945 pmin, pmax = sorted((p1r, p2r))
2936 2946 rank = 1 + self.fast_rank(pmax)
2937 2947 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2938 2948
2939 2949 e = revlogutils.entry(
2940 2950 flags=flags,
2941 2951 data_offset=offset,
2942 2952 data_compressed_length=deltainfo.deltalen,
2943 2953 data_uncompressed_length=textlen,
2944 2954 data_compression_mode=compression_mode,
2945 2955 data_delta_base=deltainfo.base,
2946 2956 link_rev=link,
2947 2957 parent_rev_1=p1r,
2948 2958 parent_rev_2=p2r,
2949 2959 node_id=node,
2950 2960 sidedata_offset=sidedata_offset,
2951 2961 sidedata_compressed_length=len(serialized_sidedata),
2952 2962 sidedata_compression_mode=sidedata_compression_mode,
2953 2963 rank=rank,
2954 2964 )
2955 2965
2956 2966 self.index.append(e)
2957 2967 entry = self.index.entry_binary(curr)
2958 2968 if curr == 0 and self._docket is None:
2959 2969 header = self._format_flags | self._format_version
2960 2970 header = self.index.pack_header(header)
2961 2971 entry = header + entry
2962 2972 self._writeentry(
2963 2973 transaction,
2964 2974 entry,
2965 2975 deltainfo.data,
2966 2976 link,
2967 2977 offset,
2968 2978 serialized_sidedata,
2969 2979 sidedata_offset,
2970 2980 )
2971 2981
2972 2982 rawtext = btext[0]
2973 2983
2974 2984 if alwayscache and rawtext is None:
2975 2985 rawtext = deltacomputer.buildtext(revinfo)
2976 2986
2977 2987 if type(rawtext) == bytes: # only accept immutable objects
2978 2988 self._revisioncache = (node, curr, rawtext)
2979 2989 self._chainbasecache[curr] = deltainfo.chainbase
2980 2990 return curr
2981 2991
2982 2992 def _get_data_offset(self, prev):
2983 2993 """Returns the current offset in the (in-transaction) data file.
2984 2994 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2985 2995 file to store that information: since sidedata can be rewritten to the
2986 2996 end of the data file within a transaction, you can have cases where, for
2987 2997 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2988 2998 to `n - 1`'s sidedata being written after `n`'s data.
2989 2999
2990 3000 TODO cache this in a docket file before getting out of experimental."""
2991 3001 if self._docket is None:
2992 3002 return self.end(prev)
2993 3003 else:
2994 3004 return self._docket.data_end
2995 3005
2996 3006 def _writeentry(
2997 3007 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2998 3008 ):
2999 3009 # Files opened in a+ mode have inconsistent behavior on various
3000 3010 # platforms. Windows requires that a file positioning call be made
3001 3011 # when the file handle transitions between reads and writes. See
3002 3012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3003 3013 # platforms, Python or the platform itself can be buggy. Some versions
3004 3014 # of Solaris have been observed to not append at the end of the file
3005 3015 # if the file was seeked to before the end. See issue4943 for more.
3006 3016 #
3007 3017 # We work around this issue by inserting a seek() before writing.
3008 3018 # Note: This is likely not necessary on Python 3. However, because
3009 3019 # the file handle is reused for reads and may be seeked there, we need
3010 3020 # to be careful before changing this.
3011 3021 if self._writinghandles is None:
3012 3022 msg = b'adding revision outside `revlog._writing` context'
3013 3023 raise error.ProgrammingError(msg)
3014 3024 ifh, dfh, sdfh = self._writinghandles
3015 3025 if self._docket is None:
3016 3026 ifh.seek(0, os.SEEK_END)
3017 3027 else:
3018 3028 ifh.seek(self._docket.index_end, os.SEEK_SET)
3019 3029 if dfh:
3020 3030 if self._docket is None:
3021 3031 dfh.seek(0, os.SEEK_END)
3022 3032 else:
3023 3033 dfh.seek(self._docket.data_end, os.SEEK_SET)
3024 3034 if sdfh:
3025 3035 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3026 3036
3027 3037 curr = len(self) - 1
3028 3038 if not self._inline:
3029 3039 transaction.add(self._datafile, offset)
3030 3040 if self._sidedatafile:
3031 3041 transaction.add(self._sidedatafile, sidedata_offset)
3032 3042 transaction.add(self._indexfile, curr * len(entry))
3033 3043 if data[0]:
3034 3044 dfh.write(data[0])
3035 3045 dfh.write(data[1])
3036 3046 if sidedata:
3037 3047 sdfh.write(sidedata)
3038 3048 ifh.write(entry)
3039 3049 else:
3040 3050 offset += curr * self.index.entry_size
3041 3051 transaction.add(self._indexfile, offset)
3042 3052 ifh.write(entry)
3043 3053 ifh.write(data[0])
3044 3054 ifh.write(data[1])
3045 3055 assert not sidedata
3046 3056 self._enforceinlinesize(transaction)
3047 3057 if self._docket is not None:
3048 3058 # revlog-v2 always has 3 writing handles, help Pytype
3049 3059 wh1 = self._writinghandles[0]
3050 3060 wh2 = self._writinghandles[1]
3051 3061 wh3 = self._writinghandles[2]
3052 3062 assert wh1 is not None
3053 3063 assert wh2 is not None
3054 3064 assert wh3 is not None
3055 3065 self._docket.index_end = wh1.tell()
3056 3066 self._docket.data_end = wh2.tell()
3057 3067 self._docket.sidedata_end = wh3.tell()
3058 3068
3059 3069 nodemaputil.setup_persistent_nodemap(transaction, self)
3060 3070
3061 3071 def addgroup(
3062 3072 self,
3063 3073 deltas,
3064 3074 linkmapper,
3065 3075 transaction,
3066 3076 alwayscache=False,
3067 3077 addrevisioncb=None,
3068 3078 duplicaterevisioncb=None,
3069 3079 debug_info=None,
3070 3080 delta_base_reuse_policy=None,
3071 3081 ):
3072 3082 """
3073 3083 add a delta group
3074 3084
3075 3085 given a set of deltas, add them to the revision log. the
3076 3086 first delta is against its parent, which should be in our
3077 3087 log, the rest are against the previous delta.
3078 3088
3079 3089 If ``addrevisioncb`` is defined, it will be called with arguments of
3080 3090 this revlog and the node that was added.
3081 3091 """
3082 3092
3083 3093 if self._adding_group:
3084 3094 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3085 3095
3086 3096 # read the default delta-base reuse policy from revlog config if the
3087 3097 # group did not specify one.
3088 3098 if delta_base_reuse_policy is None:
3089 3099 if (
3090 3100 self.delta_config.general_delta
3091 3101 and self.delta_config.lazy_delta_base
3092 3102 ):
3093 3103 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3094 3104 else:
3095 3105 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3096 3106
3097 3107 self._adding_group = True
3098 3108 empty = True
3099 3109 try:
3100 3110 with self._writing(transaction):
3101 3111 write_debug = None
3102 3112 if self.delta_config.debug_delta:
3103 3113 write_debug = transaction._report
3104 3114 deltacomputer = deltautil.deltacomputer(
3105 3115 self,
3106 3116 write_debug=write_debug,
3107 3117 debug_info=debug_info,
3108 3118 )
3109 3119 # loop through our set of deltas
3110 3120 for data in deltas:
3111 3121 (
3112 3122 node,
3113 3123 p1,
3114 3124 p2,
3115 3125 linknode,
3116 3126 deltabase,
3117 3127 delta,
3118 3128 flags,
3119 3129 sidedata,
3120 3130 ) = data
3121 3131 link = linkmapper(linknode)
3122 3132 flags = flags or REVIDX_DEFAULT_FLAGS
3123 3133
3124 3134 rev = self.index.get_rev(node)
3125 3135 if rev is not None:
3126 3136 # this can happen if two branches make the same change
3127 3137 self._nodeduplicatecallback(transaction, rev)
3128 3138 if duplicaterevisioncb:
3129 3139 duplicaterevisioncb(self, rev)
3130 3140 empty = False
3131 3141 continue
3132 3142
3133 3143 for p in (p1, p2):
3134 3144 if not self.index.has_node(p):
3135 3145 raise error.LookupError(
3136 3146 p, self.radix, _(b'unknown parent')
3137 3147 )
3138 3148
3139 3149 if not self.index.has_node(deltabase):
3140 3150 raise error.LookupError(
3141 3151 deltabase, self.display_id, _(b'unknown delta base')
3142 3152 )
3143 3153
3144 3154 baserev = self.rev(deltabase)
3145 3155
3146 3156 if baserev != nullrev and self.iscensored(baserev):
3147 3157 # if base is censored, delta must be full replacement in a
3148 3158 # single patch operation
3149 3159 hlen = struct.calcsize(b">lll")
3150 3160 oldlen = self.rawsize(baserev)
3151 3161 newlen = len(delta) - hlen
3152 3162 if delta[:hlen] != mdiff.replacediffheader(
3153 3163 oldlen, newlen
3154 3164 ):
3155 3165 raise error.CensoredBaseError(
3156 3166 self.display_id, self.node(baserev)
3157 3167 )
3158 3168
3159 3169 if not flags and self._peek_iscensored(baserev, delta):
3160 3170 flags |= REVIDX_ISCENSORED
3161 3171
3162 3172 # We assume consumers of addrevisioncb will want to retrieve
3163 3173 # the added revision, which will require a call to
3164 3174 # revision(). revision() will fast path if there is a cache
3165 3175 # hit. So, we tell _addrevision() to always cache in this case.
3166 3176 # We're only using addgroup() in the context of changegroup
3167 3177 # generation so the revision data can always be handled as raw
3168 3178 # by the flagprocessor.
3169 3179 rev = self._addrevision(
3170 3180 node,
3171 3181 None,
3172 3182 transaction,
3173 3183 link,
3174 3184 p1,
3175 3185 p2,
3176 3186 flags,
3177 3187 (baserev, delta, delta_base_reuse_policy),
3178 3188 alwayscache=alwayscache,
3179 3189 deltacomputer=deltacomputer,
3180 3190 sidedata=sidedata,
3181 3191 )
3182 3192
3183 3193 if addrevisioncb:
3184 3194 addrevisioncb(self, rev)
3185 3195 empty = False
3186 3196 finally:
3187 3197 self._adding_group = False
3188 3198 return not empty
3189 3199
3190 3200 def iscensored(self, rev):
3191 3201 """Check if a file revision is censored."""
3192 3202 if not self.feature_config.censorable:
3193 3203 return False
3194 3204
3195 3205 return self.flags(rev) & REVIDX_ISCENSORED
3196 3206
3197 3207 def _peek_iscensored(self, baserev, delta):
3198 3208 """Quickly check if a delta produces a censored revision."""
3199 3209 if not self.feature_config.censorable:
3200 3210 return False
3201 3211
3202 3212 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3203 3213
3204 3214 def getstrippoint(self, minlink):
3205 3215 """find the minimum rev that must be stripped to strip the linkrev
3206 3216
3207 3217 Returns a tuple containing the minimum rev and a set of all revs that
3208 3218 have linkrevs that will be broken by this strip.
3209 3219 """
3210 3220 return storageutil.resolvestripinfo(
3211 3221 minlink,
3212 3222 len(self) - 1,
3213 3223 self.headrevs(),
3214 3224 self.linkrev,
3215 3225 self.parentrevs,
3216 3226 )
3217 3227
3218 3228 def strip(self, minlink, transaction):
3219 3229 """truncate the revlog on the first revision with a linkrev >= minlink
3220 3230
3221 3231 This function is called when we're stripping revision minlink and
3222 3232 its descendants from the repository.
3223 3233
3224 3234 We have to remove all revisions with linkrev >= minlink, because
3225 3235 the equivalent changelog revisions will be renumbered after the
3226 3236 strip.
3227 3237
3228 3238 So we truncate the revlog on the first of these revisions, and
3229 3239 trust that the caller has saved the revisions that shouldn't be
3230 3240 removed and that it'll re-add them after this truncation.
3231 3241 """
3232 3242 if len(self) == 0:
3233 3243 return
3234 3244
3235 3245 rev, _ = self.getstrippoint(minlink)
3236 3246 if rev == len(self):
3237 3247 return
3238 3248
3239 3249 # first truncate the files on disk
3240 3250 data_end = self.start(rev)
3241 3251 if not self._inline:
3242 3252 transaction.add(self._datafile, data_end)
3243 3253 end = rev * self.index.entry_size
3244 3254 else:
3245 3255 end = data_end + (rev * self.index.entry_size)
3246 3256
3247 3257 if self._sidedatafile:
3248 3258 sidedata_end = self.sidedata_cut_off(rev)
3249 3259 transaction.add(self._sidedatafile, sidedata_end)
3250 3260
3251 3261 transaction.add(self._indexfile, end)
3252 3262 if self._docket is not None:
3253 3263 # XXX we could, leverage the docket while stripping. However it is
3254 3264 # not powerfull enough at the time of this comment
3255 3265 self._docket.index_end = end
3256 3266 self._docket.data_end = data_end
3257 3267 self._docket.sidedata_end = sidedata_end
3258 3268 self._docket.write(transaction, stripping=True)
3259 3269
3260 3270 # then reset internal state in memory to forget those revisions
3261 3271 self._revisioncache = None
3262 3272 self._chaininfocache = util.lrucachedict(500)
3263 3273 self._segmentfile.clear_cache()
3264 3274 self._segmentfile_sidedata.clear_cache()
3265 3275
3266 3276 del self.index[rev:-1]
3267 3277
3268 3278 def checksize(self):
3269 3279 """Check size of index and data files
3270 3280
3271 3281 return a (dd, di) tuple.
3272 3282 - dd: extra bytes for the "data" file
3273 3283 - di: extra bytes for the "index" file
3274 3284
3275 3285 A healthy revlog will return (0, 0).
3276 3286 """
3277 3287 expected = 0
3278 3288 if len(self):
3279 3289 expected = max(0, self.end(len(self) - 1))
3280 3290
3281 3291 try:
3282 3292 with self._datafp() as f:
3283 3293 f.seek(0, io.SEEK_END)
3284 3294 actual = f.tell()
3285 3295 dd = actual - expected
3286 3296 except FileNotFoundError:
3287 3297 dd = 0
3288 3298
3289 3299 try:
3290 3300 f = self.opener(self._indexfile)
3291 3301 f.seek(0, io.SEEK_END)
3292 3302 actual = f.tell()
3293 3303 f.close()
3294 3304 s = self.index.entry_size
3295 3305 i = max(0, actual // s)
3296 3306 di = actual - (i * s)
3297 3307 if self._inline:
3298 3308 databytes = 0
3299 3309 for r in self:
3300 3310 databytes += max(0, self.length(r))
3301 3311 dd = 0
3302 3312 di = actual - len(self) * s - databytes
3303 3313 except FileNotFoundError:
3304 3314 di = 0
3305 3315
3306 3316 return (dd, di)
3307 3317
3308 3318 def files(self):
3309 3319 """return list of files that compose this revlog"""
3310 3320 res = [self._indexfile]
3311 3321 if self._docket_file is None:
3312 3322 if not self._inline:
3313 3323 res.append(self._datafile)
3314 3324 else:
3315 3325 res.append(self._docket_file)
3316 3326 res.extend(self._docket.old_index_filepaths(include_empty=False))
3317 3327 if self._docket.data_end:
3318 3328 res.append(self._datafile)
3319 3329 res.extend(self._docket.old_data_filepaths(include_empty=False))
3320 3330 if self._docket.sidedata_end:
3321 3331 res.append(self._sidedatafile)
3322 3332 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3323 3333 return res
3324 3334
3325 3335 def emitrevisions(
3326 3336 self,
3327 3337 nodes,
3328 3338 nodesorder=None,
3329 3339 revisiondata=False,
3330 3340 assumehaveparentrevisions=False,
3331 3341 deltamode=repository.CG_DELTAMODE_STD,
3332 3342 sidedata_helpers=None,
3333 3343 debug_info=None,
3334 3344 ):
3335 3345 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3336 3346 raise error.ProgrammingError(
3337 3347 b'unhandled value for nodesorder: %s' % nodesorder
3338 3348 )
3339 3349
3340 3350 if nodesorder is None and not self.delta_config.general_delta:
3341 3351 nodesorder = b'storage'
3342 3352
3343 3353 if (
3344 3354 not self._storedeltachains
3345 3355 and deltamode != repository.CG_DELTAMODE_PREV
3346 3356 ):
3347 3357 deltamode = repository.CG_DELTAMODE_FULL
3348 3358
3349 3359 return storageutil.emitrevisions(
3350 3360 self,
3351 3361 nodes,
3352 3362 nodesorder,
3353 3363 revlogrevisiondelta,
3354 3364 deltaparentfn=self.deltaparent,
3355 3365 candeltafn=self._candelta,
3356 3366 rawsizefn=self.rawsize,
3357 3367 revdifffn=self.revdiff,
3358 3368 flagsfn=self.flags,
3359 3369 deltamode=deltamode,
3360 3370 revisiondata=revisiondata,
3361 3371 assumehaveparentrevisions=assumehaveparentrevisions,
3362 3372 sidedata_helpers=sidedata_helpers,
3363 3373 debug_info=debug_info,
3364 3374 )
3365 3375
3366 3376 DELTAREUSEALWAYS = b'always'
3367 3377 DELTAREUSESAMEREVS = b'samerevs'
3368 3378 DELTAREUSENEVER = b'never'
3369 3379
3370 3380 DELTAREUSEFULLADD = b'fulladd'
3371 3381
3372 3382 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3373 3383
3374 3384 def clone(
3375 3385 self,
3376 3386 tr,
3377 3387 destrevlog,
3378 3388 addrevisioncb=None,
3379 3389 deltareuse=DELTAREUSESAMEREVS,
3380 3390 forcedeltabothparents=None,
3381 3391 sidedata_helpers=None,
3382 3392 ):
3383 3393 """Copy this revlog to another, possibly with format changes.
3384 3394
3385 3395 The destination revlog will contain the same revisions and nodes.
3386 3396 However, it may not be bit-for-bit identical due to e.g. delta encoding
3387 3397 differences.
3388 3398
3389 3399 The ``deltareuse`` argument control how deltas from the existing revlog
3390 3400 are preserved in the destination revlog. The argument can have the
3391 3401 following values:
3392 3402
3393 3403 DELTAREUSEALWAYS
3394 3404 Deltas will always be reused (if possible), even if the destination
3395 3405 revlog would not select the same revisions for the delta. This is the
3396 3406 fastest mode of operation.
3397 3407 DELTAREUSESAMEREVS
3398 3408 Deltas will be reused if the destination revlog would pick the same
3399 3409 revisions for the delta. This mode strikes a balance between speed
3400 3410 and optimization.
3401 3411 DELTAREUSENEVER
3402 3412 Deltas will never be reused. This is the slowest mode of execution.
3403 3413 This mode can be used to recompute deltas (e.g. if the diff/delta
3404 3414 algorithm changes).
3405 3415 DELTAREUSEFULLADD
3406 3416 Revision will be re-added as if their were new content. This is
3407 3417 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3408 3418 eg: large file detection and handling.
3409 3419
3410 3420 Delta computation can be slow, so the choice of delta reuse policy can
3411 3421 significantly affect run time.
3412 3422
3413 3423 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3414 3424 two extremes. Deltas will be reused if they are appropriate. But if the
3415 3425 delta could choose a better revision, it will do so. This means if you
3416 3426 are converting a non-generaldelta revlog to a generaldelta revlog,
3417 3427 deltas will be recomputed if the delta's parent isn't a parent of the
3418 3428 revision.
3419 3429
3420 3430 In addition to the delta policy, the ``forcedeltabothparents``
3421 3431 argument controls whether to force compute deltas against both parents
3422 3432 for merges. By default, the current default is used.
3423 3433
3424 3434 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3425 3435 `sidedata_helpers`.
3426 3436 """
3427 3437 if deltareuse not in self.DELTAREUSEALL:
3428 3438 raise ValueError(
3429 3439 _(b'value for deltareuse invalid: %s') % deltareuse
3430 3440 )
3431 3441
3432 3442 if len(destrevlog):
3433 3443 raise ValueError(_(b'destination revlog is not empty'))
3434 3444
3435 3445 if getattr(self, 'filteredrevs', None):
3436 3446 raise ValueError(_(b'source revlog has filtered revisions'))
3437 3447 if getattr(destrevlog, 'filteredrevs', None):
3438 3448 raise ValueError(_(b'destination revlog has filtered revisions'))
3439 3449
3440 3450 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3441 3451 # if possible.
3442 3452 old_delta_config = destrevlog.delta_config
3443 3453 destrevlog.delta_config = destrevlog.delta_config.copy()
3444 3454
3445 3455 try:
3446 3456 if deltareuse == self.DELTAREUSEALWAYS:
3447 3457 destrevlog.delta_config.lazy_delta_base = True
3448 3458 destrevlog.delta_config.lazy_delta = True
3449 3459 elif deltareuse == self.DELTAREUSESAMEREVS:
3450 3460 destrevlog.delta_config.lazy_delta_base = False
3451 3461 destrevlog.delta_config.lazy_delta = True
3452 3462 elif deltareuse == self.DELTAREUSENEVER:
3453 3463 destrevlog.delta_config.lazy_delta_base = False
3454 3464 destrevlog.delta_config.lazy_delta = False
3455 3465
3456 3466 delta_both_parents = (
3457 3467 forcedeltabothparents or old_delta_config.delta_both_parents
3458 3468 )
3459 3469 destrevlog.delta_config.delta_both_parents = delta_both_parents
3460 3470
3461 3471 with self.reading(), destrevlog._writing(tr):
3462 3472 self._clone(
3463 3473 tr,
3464 3474 destrevlog,
3465 3475 addrevisioncb,
3466 3476 deltareuse,
3467 3477 forcedeltabothparents,
3468 3478 sidedata_helpers,
3469 3479 )
3470 3480
3471 3481 finally:
3472 3482 destrevlog.delta_config = old_delta_config
3473 3483
3474 3484 def _clone(
3475 3485 self,
3476 3486 tr,
3477 3487 destrevlog,
3478 3488 addrevisioncb,
3479 3489 deltareuse,
3480 3490 forcedeltabothparents,
3481 3491 sidedata_helpers,
3482 3492 ):
3483 3493 """perform the core duty of `revlog.clone` after parameter processing"""
3484 3494 write_debug = None
3485 3495 if self.delta_config.debug_delta:
3486 3496 write_debug = tr._report
3487 3497 deltacomputer = deltautil.deltacomputer(
3488 3498 destrevlog,
3489 3499 write_debug=write_debug,
3490 3500 )
3491 3501 index = self.index
3492 3502 for rev in self:
3493 3503 entry = index[rev]
3494 3504
3495 3505 # Some classes override linkrev to take filtered revs into
3496 3506 # account. Use raw entry from index.
3497 3507 flags = entry[0] & 0xFFFF
3498 3508 linkrev = entry[4]
3499 3509 p1 = index[entry[5]][7]
3500 3510 p2 = index[entry[6]][7]
3501 3511 node = entry[7]
3502 3512
3503 3513 # (Possibly) reuse the delta from the revlog if allowed and
3504 3514 # the revlog chunk is a delta.
3505 3515 cachedelta = None
3506 3516 rawtext = None
3507 3517 if deltareuse == self.DELTAREUSEFULLADD:
3508 3518 text = self._revisiondata(rev)
3509 3519 sidedata = self.sidedata(rev)
3510 3520
3511 3521 if sidedata_helpers is not None:
3512 3522 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3513 3523 self, sidedata_helpers, sidedata, rev
3514 3524 )
3515 3525 flags = flags | new_flags[0] & ~new_flags[1]
3516 3526
3517 3527 destrevlog.addrevision(
3518 3528 text,
3519 3529 tr,
3520 3530 linkrev,
3521 3531 p1,
3522 3532 p2,
3523 3533 cachedelta=cachedelta,
3524 3534 node=node,
3525 3535 flags=flags,
3526 3536 deltacomputer=deltacomputer,
3527 3537 sidedata=sidedata,
3528 3538 )
3529 3539 else:
3530 3540 if destrevlog.delta_config.lazy_delta:
3531 3541 dp = self.deltaparent(rev)
3532 3542 if dp != nullrev:
3533 3543 cachedelta = (dp, bytes(self._chunk(rev)))
3534 3544
3535 3545 sidedata = None
3536 3546 if not cachedelta:
3537 3547 try:
3538 3548 rawtext = self._revisiondata(rev)
3539 3549 except error.CensoredNodeError as censored:
3540 3550 assert flags & REVIDX_ISCENSORED
3541 3551 rawtext = censored.tombstone
3542 3552 sidedata = self.sidedata(rev)
3543 3553 if sidedata is None:
3544 3554 sidedata = self.sidedata(rev)
3545 3555
3546 3556 if sidedata_helpers is not None:
3547 3557 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3548 3558 self, sidedata_helpers, sidedata, rev
3549 3559 )
3550 3560 flags = flags | new_flags[0] & ~new_flags[1]
3551 3561
3552 3562 destrevlog._addrevision(
3553 3563 node,
3554 3564 rawtext,
3555 3565 tr,
3556 3566 linkrev,
3557 3567 p1,
3558 3568 p2,
3559 3569 flags,
3560 3570 cachedelta,
3561 3571 deltacomputer=deltacomputer,
3562 3572 sidedata=sidedata,
3563 3573 )
3564 3574
3565 3575 if addrevisioncb:
3566 3576 addrevisioncb(self, rev, node)
3567 3577
3568 3578 def censorrevision(self, tr, censornode, tombstone=b''):
3569 3579 if self._format_version == REVLOGV0:
3570 3580 raise error.RevlogError(
3571 3581 _(b'cannot censor with version %d revlogs')
3572 3582 % self._format_version
3573 3583 )
3574 3584 elif self._format_version == REVLOGV1:
3575 3585 rewrite.v1_censor(self, tr, censornode, tombstone)
3576 3586 else:
3577 3587 rewrite.v2_censor(self, tr, censornode, tombstone)
3578 3588
3579 3589 def verifyintegrity(self, state):
3580 3590 """Verifies the integrity of the revlog.
3581 3591
3582 3592 Yields ``revlogproblem`` instances describing problems that are
3583 3593 found.
3584 3594 """
3585 3595 dd, di = self.checksize()
3586 3596 if dd:
3587 3597 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3588 3598 if di:
3589 3599 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3590 3600
3591 3601 version = self._format_version
3592 3602
3593 3603 # The verifier tells us what version revlog we should be.
3594 3604 if version != state[b'expectedversion']:
3595 3605 yield revlogproblem(
3596 3606 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3597 3607 % (self.display_id, version, state[b'expectedversion'])
3598 3608 )
3599 3609
3600 3610 state[b'skipread'] = set()
3601 3611 state[b'safe_renamed'] = set()
3602 3612
3603 3613 for rev in self:
3604 3614 node = self.node(rev)
3605 3615
3606 3616 # Verify contents. 4 cases to care about:
3607 3617 #
3608 3618 # common: the most common case
3609 3619 # rename: with a rename
3610 3620 # meta: file content starts with b'\1\n', the metadata
3611 3621 # header defined in filelog.py, but without a rename
3612 3622 # ext: content stored externally
3613 3623 #
3614 3624 # More formally, their differences are shown below:
3615 3625 #
3616 3626 # | common | rename | meta | ext
3617 3627 # -------------------------------------------------------
3618 3628 # flags() | 0 | 0 | 0 | not 0
3619 3629 # renamed() | False | True | False | ?
3620 3630 # rawtext[0:2]=='\1\n'| False | True | True | ?
3621 3631 #
3622 3632 # "rawtext" means the raw text stored in revlog data, which
3623 3633 # could be retrieved by "rawdata(rev)". "text"
3624 3634 # mentioned below is "revision(rev)".
3625 3635 #
3626 3636 # There are 3 different lengths stored physically:
3627 3637 # 1. L1: rawsize, stored in revlog index
3628 3638 # 2. L2: len(rawtext), stored in revlog data
3629 3639 # 3. L3: len(text), stored in revlog data if flags==0, or
3630 3640 # possibly somewhere else if flags!=0
3631 3641 #
3632 3642 # L1 should be equal to L2. L3 could be different from them.
3633 3643 # "text" may or may not affect commit hash depending on flag
3634 3644 # processors (see flagutil.addflagprocessor).
3635 3645 #
3636 3646 # | common | rename | meta | ext
3637 3647 # -------------------------------------------------
3638 3648 # rawsize() | L1 | L1 | L1 | L1
3639 3649 # size() | L1 | L2-LM | L1(*) | L1 (?)
3640 3650 # len(rawtext) | L2 | L2 | L2 | L2
3641 3651 # len(text) | L2 | L2 | L2 | L3
3642 3652 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3643 3653 #
3644 3654 # LM: length of metadata, depending on rawtext
3645 3655 # (*): not ideal, see comment in filelog.size
3646 3656 # (?): could be "- len(meta)" if the resolved content has
3647 3657 # rename metadata
3648 3658 #
3649 3659 # Checks needed to be done:
3650 3660 # 1. length check: L1 == L2, in all cases.
3651 3661 # 2. hash check: depending on flag processor, we may need to
3652 3662 # use either "text" (external), or "rawtext" (in revlog).
3653 3663
3654 3664 try:
3655 3665 skipflags = state.get(b'skipflags', 0)
3656 3666 if skipflags:
3657 3667 skipflags &= self.flags(rev)
3658 3668
3659 3669 _verify_revision(self, skipflags, state, node)
3660 3670
3661 3671 l1 = self.rawsize(rev)
3662 3672 l2 = len(self.rawdata(node))
3663 3673
3664 3674 if l1 != l2:
3665 3675 yield revlogproblem(
3666 3676 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3667 3677 node=node,
3668 3678 )
3669 3679
3670 3680 except error.CensoredNodeError:
3671 3681 if state[b'erroroncensored']:
3672 3682 yield revlogproblem(
3673 3683 error=_(b'censored file data'), node=node
3674 3684 )
3675 3685 state[b'skipread'].add(node)
3676 3686 except Exception as e:
3677 3687 yield revlogproblem(
3678 3688 error=_(b'unpacking %s: %s')
3679 3689 % (short(node), stringutil.forcebytestr(e)),
3680 3690 node=node,
3681 3691 )
3682 3692 state[b'skipread'].add(node)
3683 3693
3684 3694 def storageinfo(
3685 3695 self,
3686 3696 exclusivefiles=False,
3687 3697 sharedfiles=False,
3688 3698 revisionscount=False,
3689 3699 trackedsize=False,
3690 3700 storedsize=False,
3691 3701 ):
3692 3702 d = {}
3693 3703
3694 3704 if exclusivefiles:
3695 3705 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3696 3706 if not self._inline:
3697 3707 d[b'exclusivefiles'].append((self.opener, self._datafile))
3698 3708
3699 3709 if sharedfiles:
3700 3710 d[b'sharedfiles'] = []
3701 3711
3702 3712 if revisionscount:
3703 3713 d[b'revisionscount'] = len(self)
3704 3714
3705 3715 if trackedsize:
3706 3716 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3707 3717
3708 3718 if storedsize:
3709 3719 d[b'storedsize'] = sum(
3710 3720 self.opener.stat(path).st_size for path in self.files()
3711 3721 )
3712 3722
3713 3723 return d
3714 3724
3715 3725 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3716 3726 if not self.feature_config.has_side_data:
3717 3727 return
3718 3728 # revlog formats with sidedata support does not support inline
3719 3729 assert not self._inline
3720 3730 if not helpers[1] and not helpers[2]:
3721 3731 # Nothing to generate or remove
3722 3732 return
3723 3733
3724 3734 new_entries = []
3725 3735 # append the new sidedata
3726 3736 with self._writing(transaction):
3727 3737 ifh, dfh, sdfh = self._writinghandles
3728 3738 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3729 3739
3730 3740 current_offset = sdfh.tell()
3731 3741 for rev in range(startrev, endrev + 1):
3732 3742 entry = self.index[rev]
3733 3743 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3734 3744 store=self,
3735 3745 sidedata_helpers=helpers,
3736 3746 sidedata={},
3737 3747 rev=rev,
3738 3748 )
3739 3749
3740 3750 serialized_sidedata = sidedatautil.serialize_sidedata(
3741 3751 new_sidedata
3742 3752 )
3743 3753
3744 3754 sidedata_compression_mode = COMP_MODE_INLINE
3745 3755 if serialized_sidedata and self.feature_config.has_side_data:
3746 3756 sidedata_compression_mode = COMP_MODE_PLAIN
3747 3757 h, comp_sidedata = self.compress(serialized_sidedata)
3748 3758 if (
3749 3759 h != b'u'
3750 3760 and comp_sidedata[0] != b'\0'
3751 3761 and len(comp_sidedata) < len(serialized_sidedata)
3752 3762 ):
3753 3763 assert not h
3754 3764 if (
3755 3765 comp_sidedata[0]
3756 3766 == self._docket.default_compression_header
3757 3767 ):
3758 3768 sidedata_compression_mode = COMP_MODE_DEFAULT
3759 3769 serialized_sidedata = comp_sidedata
3760 3770 else:
3761 3771 sidedata_compression_mode = COMP_MODE_INLINE
3762 3772 serialized_sidedata = comp_sidedata
3763 3773 if entry[8] != 0 or entry[9] != 0:
3764 3774 # rewriting entries that already have sidedata is not
3765 3775 # supported yet, because it introduces garbage data in the
3766 3776 # revlog.
3767 3777 msg = b"rewriting existing sidedata is not supported yet"
3768 3778 raise error.Abort(msg)
3769 3779
3770 3780 # Apply (potential) flags to add and to remove after running
3771 3781 # the sidedata helpers
3772 3782 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3773 3783 entry_update = (
3774 3784 current_offset,
3775 3785 len(serialized_sidedata),
3776 3786 new_offset_flags,
3777 3787 sidedata_compression_mode,
3778 3788 )
3779 3789
3780 3790 # the sidedata computation might have move the file cursors around
3781 3791 sdfh.seek(current_offset, os.SEEK_SET)
3782 3792 sdfh.write(serialized_sidedata)
3783 3793 new_entries.append(entry_update)
3784 3794 current_offset += len(serialized_sidedata)
3785 3795 self._docket.sidedata_end = sdfh.tell()
3786 3796
3787 3797 # rewrite the new index entries
3788 3798 ifh.seek(startrev * self.index.entry_size)
3789 3799 for i, e in enumerate(new_entries):
3790 3800 rev = startrev + i
3791 3801 self.index.replace_sidedata_info(rev, *e)
3792 3802 packed = self.index.entry_binary(rev)
3793 3803 if rev == 0 and self._docket is None:
3794 3804 header = self._format_flags | self._format_version
3795 3805 header = self.index.pack_header(header)
3796 3806 packed = header + packed
3797 3807 ifh.write(packed)
@@ -1,1630 +1,1632 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17
18 18 from .constants import (
19 19 COMP_MODE_DEFAULT,
20 20 COMP_MODE_INLINE,
21 21 COMP_MODE_PLAIN,
22 22 DELTA_BASE_REUSE_FORCE,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 from .. import revlog
51 51
52 52 self._data = data
53 53 self.data_config = revlog.DataConfig()
54 54 self.data_config.sr_density_threshold = density
55 55 self.data_config.sr_min_gap_size = mingap
56 56 self.delta_config = revlog.DeltaConfig()
57 57 self.feature_config = revlog.FeatureConfig()
58 58 self._snapshot = set(snapshot)
59 59 self.index = None
60 60
61 61 def start(self, rev):
62 62 if rev == nullrev:
63 63 return 0
64 64 if rev == 0:
65 65 return 0
66 66 return self._data[rev - 1]
67 67
68 68 def end(self, rev):
69 69 if rev == nullrev:
70 70 return 0
71 71 return self._data[rev]
72 72
73 73 def length(self, rev):
74 74 return self.end(rev) - self.start(rev)
75 75
76 76 def __len__(self):
77 77 return len(self._data)
78 78
79 79 def issnapshot(self, rev):
80 80 if rev == nullrev:
81 81 return True
82 82 return rev in self._snapshot
83 83
84 84
85 85 def slicechunk(revlog, revs, targetsize=None):
86 86 """slice revs to reduce the amount of unrelated data to be read from disk.
87 87
88 88 ``revs`` is sliced into groups that should be read in one time.
89 89 Assume that revs are sorted.
90 90
91 91 The initial chunk is sliced until the overall density (payload/chunks-span
92 92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 93 than `revlog.data_config.sr_min_gap_size` is skipped.
94 94
95 95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 96 For consistency with other slicing choice, this limit won't go lower than
97 97 `revlog.data_config.sr_min_gap_size`.
98 98
99 99 If individual revisions chunk are larger than this limit, they will still
100 100 be raised individually.
101 101
102 102 >>> data = [
103 103 ... 5, #00 (5)
104 104 ... 10, #01 (5)
105 105 ... 12, #02 (2)
106 106 ... 12, #03 (empty)
107 107 ... 27, #04 (15)
108 108 ... 31, #05 (4)
109 109 ... 31, #06 (empty)
110 110 ... 42, #07 (11)
111 111 ... 47, #08 (5)
112 112 ... 47, #09 (empty)
113 113 ... 48, #10 (1)
114 114 ... 51, #11 (3)
115 115 ... 74, #12 (23)
116 116 ... 85, #13 (11)
117 117 ... 86, #14 (1)
118 118 ... 91, #15 (5)
119 119 ... ]
120 120 >>> revlog = _testrevlog(data, snapshot=range(16))
121 121
122 122 >>> list(slicechunk(revlog, list(range(16))))
123 123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 124 >>> list(slicechunk(revlog, [0, 15]))
125 125 [[0], [15]]
126 126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 127 [[0], [11], [15]]
128 128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 129 [[0], [11, 13, 15]]
130 130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 131 [[1, 2], [5, 8, 10, 11], [14]]
132 132
133 133 Slicing with a maximum chunk size
134 134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 135 [[0], [11], [13], [15]]
136 136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 137 [[0], [11], [13, 15]]
138 138
139 139 Slicing involving nullrev
140 140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 141 [[-1, 0], [11], [13, 15]]
142 142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 143 [[-1], [13], [15]]
144 144 """
145 145 if targetsize is not None:
146 146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 147 # targetsize should not be specified when evaluating delta candidates:
148 148 # * targetsize is used to ensure we stay within specification when reading,
149 149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 150 if densityslicing is None:
151 151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 152 for chunk in densityslicing(
153 153 revs,
154 154 revlog.data_config.sr_density_threshold,
155 155 revlog.data_config.sr_min_gap_size,
156 156 ):
157 157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 158 yield subchunk
159 159
160 160
161 161 def _slicechunktosize(revlog, revs, targetsize=None):
162 162 """slice revs to match the target size
163 163
164 164 This is intended to be used on chunk that density slicing selected by that
165 165 are still too large compared to the read garantee of revlog. This might
166 166 happens when "minimal gap size" interrupted the slicing or when chain are
167 167 built in a way that create large blocks next to each other.
168 168
169 169 >>> data = [
170 170 ... 3, #0 (3)
171 171 ... 5, #1 (2)
172 172 ... 6, #2 (1)
173 173 ... 8, #3 (2)
174 174 ... 8, #4 (empty)
175 175 ... 11, #5 (3)
176 176 ... 12, #6 (1)
177 177 ... 13, #7 (1)
178 178 ... 14, #8 (1)
179 179 ... ]
180 180
181 181 == All snapshots cases ==
182 182 >>> revlog = _testrevlog(data, snapshot=range(9))
183 183
184 184 Cases where chunk is already small enough
185 185 >>> list(_slicechunktosize(revlog, [0], 3))
186 186 [[0]]
187 187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 188 [[6, 7]]
189 189 >>> list(_slicechunktosize(revlog, [0], None))
190 190 [[0]]
191 191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 192 [[6, 7]]
193 193
194 194 cases where we need actual slicing
195 195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 196 [[0], [1]]
197 197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 198 [[1], [3]]
199 199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 200 [[1, 2], [3]]
201 201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 202 [[3], [5]]
203 203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 204 [[3], [5]]
205 205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 206 [[5], [6, 7, 8]]
207 207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209 209
210 210 Case with too large individual chunk (must return valid chunk)
211 211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 212 [[0], [1]]
213 213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 214 [[1], [3]]
215 215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 216 [[3], [5]]
217 217
218 218 == No Snapshot cases ==
219 219 >>> revlog = _testrevlog(data)
220 220
221 221 Cases where chunk is already small enough
222 222 >>> list(_slicechunktosize(revlog, [0], 3))
223 223 [[0]]
224 224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 225 [[6, 7]]
226 226 >>> list(_slicechunktosize(revlog, [0], None))
227 227 [[0]]
228 228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 229 [[6, 7]]
230 230
231 231 cases where we need actual slicing
232 232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 233 [[0], [1]]
234 234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 235 [[1], [3]]
236 236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 237 [[1], [2, 3]]
238 238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 239 [[3], [5]]
240 240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 241 [[3], [4, 5]]
242 242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 243 [[5], [6, 7, 8]]
244 244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246 246
247 247 Case with too large individual chunk (must return valid chunk)
248 248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 249 [[0], [1]]
250 250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 251 [[1], [3]]
252 252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 253 [[3], [5]]
254 254
255 255 == mixed case ==
256 256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 259 """
260 260 assert targetsize is None or 0 <= targetsize
261 261 startdata = revlog.start(revs[0])
262 262 enddata = revlog.end(revs[-1])
263 263 fullspan = enddata - startdata
264 264 if targetsize is None or fullspan <= targetsize:
265 265 yield revs
266 266 return
267 267
268 268 startrevidx = 0
269 269 endrevidx = 1
270 270 iterrevs = enumerate(revs)
271 271 next(iterrevs) # skip first rev.
272 272 # first step: get snapshots out of the way
273 273 for idx, r in iterrevs:
274 274 span = revlog.end(r) - startdata
275 275 snapshot = revlog.issnapshot(r)
276 276 if span <= targetsize and snapshot:
277 277 endrevidx = idx + 1
278 278 else:
279 279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 280 if chunk:
281 281 yield chunk
282 282 startrevidx = idx
283 283 startdata = revlog.start(r)
284 284 endrevidx = idx + 1
285 285 if not snapshot:
286 286 break
287 287
288 288 # for the others, we use binary slicing to quickly converge toward valid
289 289 # chunks (otherwise, we might end up looking for start/end of many
290 290 # revisions). This logic is not looking for the perfect slicing point, it
291 291 # focuses on quickly converging toward valid chunks.
292 292 nbitem = len(revs)
293 293 while (enddata - startdata) > targetsize:
294 294 endrevidx = nbitem
295 295 if nbitem - startrevidx <= 1:
296 296 break # protect against individual chunk larger than limit
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 while span > targetsize:
300 300 if endrevidx - startrevidx <= 1:
301 301 break # protect against individual chunk larger than limit
302 302 endrevidx -= (endrevidx - startrevidx) // 2
303 303 localenddata = revlog.end(revs[endrevidx - 1])
304 304 span = localenddata - startdata
305 305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 306 if chunk:
307 307 yield chunk
308 308 startrevidx = endrevidx
309 309 startdata = revlog.start(revs[startrevidx])
310 310
311 311 chunk = _trimchunk(revlog, revs, startrevidx)
312 312 if chunk:
313 313 yield chunk
314 314
315 315
316 316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 317 """slice revs to reduce the amount of unrelated data to be read from disk.
318 318
319 319 ``revs`` is sliced into groups that should be read in one time.
320 320 Assume that revs are sorted.
321 321
322 322 The initial chunk is sliced until the overall density (payload/chunks-span
323 323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 324 skipped.
325 325
326 326 >>> revlog = _testrevlog([
327 327 ... 5, #00 (5)
328 328 ... 10, #01 (5)
329 329 ... 12, #02 (2)
330 330 ... 12, #03 (empty)
331 331 ... 27, #04 (15)
332 332 ... 31, #05 (4)
333 333 ... 31, #06 (empty)
334 334 ... 42, #07 (11)
335 335 ... 47, #08 (5)
336 336 ... 47, #09 (empty)
337 337 ... 48, #10 (1)
338 338 ... 51, #11 (3)
339 339 ... 74, #12 (23)
340 340 ... 85, #13 (11)
341 341 ... 86, #14 (1)
342 342 ... 91, #15 (5)
343 343 ... ])
344 344
345 345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 348 [[0], [15]]
349 349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 350 [[0], [11], [15]]
351 351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 352 [[0], [11, 13, 15]]
353 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 354 [[1, 2], [5, 8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... mingapsize=20))
357 357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 359 ... targetdensity=0.95))
360 360 [[1, 2], [5], [8, 10, 11], [14]]
361 361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 362 ... targetdensity=0.95, mingapsize=12))
363 363 [[1, 2], [5, 8, 10, 11], [14]]
364 364 """
365 365 start = revlog.start
366 366 length = revlog.length
367 367
368 368 if len(revs) <= 1:
369 369 yield revs
370 370 return
371 371
372 372 deltachainspan = segmentspan(revlog, revs)
373 373
374 374 if deltachainspan < mingapsize:
375 375 yield revs
376 376 return
377 377
378 378 readdata = deltachainspan
379 379 chainpayload = sum(length(r) for r in revs)
380 380
381 381 if deltachainspan:
382 382 density = chainpayload / float(deltachainspan)
383 383 else:
384 384 density = 1.0
385 385
386 386 if density >= targetdensity:
387 387 yield revs
388 388 return
389 389
390 390 # Store the gaps in a heap to have them sorted by decreasing size
391 391 gaps = []
392 392 prevend = None
393 393 for i, rev in enumerate(revs):
394 394 revstart = start(rev)
395 395 revlen = length(rev)
396 396
397 397 # Skip empty revisions to form larger holes
398 398 if revlen == 0:
399 399 continue
400 400
401 401 if prevend is not None:
402 402 gapsize = revstart - prevend
403 403 # only consider holes that are large enough
404 404 if gapsize > mingapsize:
405 405 gaps.append((gapsize, i))
406 406
407 407 prevend = revstart + revlen
408 408 # sort the gaps to pop them from largest to small
409 409 gaps.sort()
410 410
411 411 # Collect the indices of the largest holes until the density is acceptable
412 412 selected = []
413 413 while gaps and density < targetdensity:
414 414 gapsize, gapidx = gaps.pop()
415 415
416 416 selected.append(gapidx)
417 417
418 418 # the gap sizes are stored as negatives to be sorted decreasingly
419 419 # by the heap
420 420 readdata -= gapsize
421 421 if readdata > 0:
422 422 density = chainpayload / float(readdata)
423 423 else:
424 424 density = 1.0
425 425 selected.sort()
426 426
427 427 # Cut the revs at collected indices
428 428 previdx = 0
429 429 for idx in selected:
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435 previdx = idx
436 436
437 437 chunk = _trimchunk(revlog, revs, previdx)
438 438 if chunk:
439 439 yield chunk
440 440
441 441
442 442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 443 """returns revs[startidx:endidx] without empty trailing revs
444 444
445 445 Doctest Setup
446 446 >>> revlog = _testrevlog([
447 447 ... 5, #0
448 448 ... 10, #1
449 449 ... 12, #2
450 450 ... 12, #3 (empty)
451 451 ... 17, #4
452 452 ... 21, #5
453 453 ... 21, #6 (empty)
454 454 ... ])
455 455
456 456 Contiguous cases:
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 458 [0, 1, 2, 3, 4, 5]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 460 [0, 1, 2, 3, 4]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 462 [0, 1, 2]
463 463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 464 [2]
465 465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 466 [3, 4, 5]
467 467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 468 [3, 4]
469 469
470 470 Discontiguous cases:
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 472 [1, 3, 5]
473 473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 474 [1]
475 475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 476 [3, 5]
477 477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 478 [3, 5]
479 479 """
480 480 length = revlog.length
481 481
482 482 if endidx is None:
483 483 endidx = len(revs)
484 484
485 485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 486 if revs[endidx - 1] < len(revlog):
487 487 # Trim empty revs at the end, except the very first revision of a chain
488 488 while (
489 489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 490 ):
491 491 endidx -= 1
492 492
493 493 return revs[startidx:endidx]
494 494
495 495
496 496 def segmentspan(revlog, revs):
497 497 """Get the byte span of a segment of revisions
498 498
499 499 revs is a sorted array of revision numbers
500 500
501 501 >>> revlog = _testrevlog([
502 502 ... 5, #0
503 503 ... 10, #1
504 504 ... 12, #2
505 505 ... 12, #3 (empty)
506 506 ... 17, #4
507 507 ... ])
508 508
509 509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 510 17
511 511 >>> segmentspan(revlog, [0, 4])
512 512 17
513 513 >>> segmentspan(revlog, [3, 4])
514 514 5
515 515 >>> segmentspan(revlog, [1, 2, 3,])
516 516 7
517 517 >>> segmentspan(revlog, [1, 3])
518 518 7
519 519 """
520 520 if not revs:
521 521 return 0
522 522 end = revlog.end(revs[-1])
523 523 return end - revlog.start(revs[0])
524 524
525 525
526 526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 527 """build full text from a (base, delta) pair and other metadata"""
528 528 # special case deltas which replace entire base; no need to decode
529 529 # base revision. this neatly avoids censored bases, which throw when
530 530 # they're decoded.
531 531 hlen = struct.calcsize(b">lll")
532 532 if delta[:hlen] == mdiff.replacediffheader(
533 533 revlog.rawsize(baserev), len(delta) - hlen
534 534 ):
535 535 fulltext = delta[hlen:]
536 536 else:
537 537 # deltabase is rawtext before changed by flag processors, which is
538 538 # equivalent to non-raw text
539 539 basetext = revlog.revision(baserev)
540 540 fulltext = mdiff.patch(basetext, delta)
541 541
542 542 try:
543 543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 544 if validatehash:
545 545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 546 if flags & REVIDX_ISCENSORED:
547 547 raise error.StorageError(
548 548 _(b'node %s is not censored') % expectednode
549 549 )
550 550 except error.CensoredNodeError:
551 551 # must pass the censored index flag to add censored revisions
552 552 if not flags & REVIDX_ISCENSORED:
553 553 raise
554 554 return fulltext
555 555
556 556
557 557 @attr.s(slots=True, frozen=True)
558 558 class _deltainfo:
559 559 distance = attr.ib()
560 560 deltalen = attr.ib()
561 561 data = attr.ib()
562 562 base = attr.ib()
563 563 chainbase = attr.ib()
564 564 chainlen = attr.ib()
565 565 compresseddeltalen = attr.ib()
566 566 snapshotdepth = attr.ib()
567 567
568 568
569 569 def drop_u_compression(delta):
570 570 """turn into a "u" (no-compression) into no-compression without header
571 571
572 572 This is useful for revlog format that has better compression method.
573 573 """
574 574 assert delta.data[0] == b'u', delta.data[0]
575 575 return _deltainfo(
576 576 delta.distance,
577 577 delta.deltalen - 1,
578 578 (b'', delta.data[1]),
579 579 delta.base,
580 580 delta.chainbase,
581 581 delta.chainlen,
582 582 delta.compresseddeltalen,
583 583 delta.snapshotdepth,
584 584 )
585 585
586 586
587 587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 588 """Returns True if the given delta is good. Good means that it is within
589 589 the disk span, disk size, and chain length bounds that we know to be
590 590 performant."""
591 591 if deltainfo is None:
592 592 return False
593 593
594 594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 595 # we should never end up asking such question. Adding the assert as a
596 596 # safe-guard to detect anything that would be fishy in this regard.
597 597 assert (
598 598 revinfo.cachedelta is None
599 599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 600 or not revlog.delta_config.general_delta
601 601 )
602 602
603 603 # - 'deltainfo.distance' is the distance from the base revision --
604 604 # bounding it limits the amount of I/O we need to do.
605 605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 606 # deltas we need to apply -- bounding it limits the amount of CPU
607 607 # we consume.
608 608
609 609 textlen = revinfo.textlen
610 610 defaultmax = textlen * 4
611 611 maxdist = revlog.delta_config.max_deltachain_span
612 612 if not maxdist:
613 613 maxdist = deltainfo.distance # ensure the conditional pass
614 614 maxdist = max(maxdist, defaultmax)
615 615
616 616 # Bad delta from read span:
617 617 #
618 618 # If the span of data read is larger than the maximum allowed.
619 619 #
620 620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 621 # to avoid issue related to the span of data. In theory, it would be
622 622 # possible to build pathological revlog where delta pattern would lead
623 623 # to too many reads. However, they do not happen in practice at all. So
624 624 # we skip the span check entirely.
625 625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 626 return False
627 627
628 628 # Bad delta from new delta size:
629 629 #
630 630 # If the delta size is larger than the target text, storing the
631 631 # delta will be inefficient.
632 632 if textlen < deltainfo.deltalen:
633 633 return False
634 634
635 635 # Bad delta from cumulated payload size:
636 636 #
637 637 # If the sum of delta get larger than K * target text length.
638 638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 639 return False
640 640
641 641 # Bad delta from chain length:
642 642 #
643 643 # If the number of delta in the chain gets too high.
644 644 if (
645 645 revlog.delta_config.max_chain_len
646 646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 647 ):
648 648 return False
649 649
650 650 # bad delta from intermediate snapshot size limit
651 651 #
652 652 # If an intermediate snapshot size is higher than the limit. The
653 653 # limit exist to prevent endless chain of intermediate delta to be
654 654 # created.
655 655 if (
656 656 deltainfo.snapshotdepth is not None
657 657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 658 ):
659 659 return False
660 660
661 661 # bad delta if new intermediate snapshot is larger than the previous
662 662 # snapshot
663 663 if (
664 664 deltainfo.snapshotdepth
665 665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 666 ):
667 667 return False
668 668
669 669 return True
670 670
671 671
672 672 # If a revision's full text is that much bigger than a base candidate full
673 673 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 674 # consider these candidates.
675 675 LIMIT_BASE2TEXT = 500
676 676
677 677
678 678 def _candidategroups(
679 679 revlog,
680 680 textlen,
681 681 p1,
682 682 p2,
683 683 cachedelta,
684 684 excluded_bases=None,
685 685 target_rev=None,
686 686 snapshot_cache=None,
687 687 ):
688 688 """Provides group of revision to be tested as delta base
689 689
690 690 This top level function focus on emitting groups with unique and worthwhile
691 691 content. See _raw_candidate_groups for details about the group order.
692 692 """
693 693 # should we try to build a delta?
694 694 if not (len(revlog) and revlog._storedeltachains):
695 695 yield None
696 696 return
697 697
698 698 if target_rev is None:
699 699 target_rev = len(revlog)
700 700
701 701 if not revlog.delta_config.general_delta:
702 702 # before general delta, there is only one possible delta base
703 703 yield (target_rev - 1,)
704 704 yield None
705 705 return
706 706
707 707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
708 708 # we should never end up asking such question. Adding the assert as a
709 709 # safe-guard to detect anything that would be fishy in this regard.
710 710 assert (
711 711 cachedelta is None
712 712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
713 713 or not revlog.delta_config.general_delta
714 714 )
715 715
716 716 deltalength = revlog.length
717 717 deltaparent = revlog.deltaparent
718 718 sparse = revlog.delta_config.sparse_revlog
719 719 good = None
720 720
721 721 deltas_limit = textlen * LIMIT_DELTA2TEXT
722 722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
723 723
724 724 tested = {nullrev}
725 725 candidates = _refinedgroups(
726 726 revlog,
727 727 p1,
728 728 p2,
729 729 cachedelta,
730 730 snapshot_cache=snapshot_cache,
731 731 )
732 732 while True:
733 733 temptative = candidates.send(good)
734 734 if temptative is None:
735 735 break
736 736 group = []
737 737 for rev in temptative:
738 738 # skip over empty delta (no need to include them in a chain)
739 739 while not (rev == nullrev or rev in tested or deltalength(rev)):
740 740 tested.add(rev)
741 741 rev = deltaparent(rev)
742 742 # no need to try a delta against nullrev, this will be done as a
743 743 # last resort.
744 744 if rev == nullrev:
745 745 continue
746 746 # filter out revision we tested already
747 747 if rev in tested:
748 748 continue
749 749
750 750 # an higher authority deamed the base unworthy (e.g. censored)
751 751 if excluded_bases is not None and rev in excluded_bases:
752 752 tested.add(rev)
753 753 continue
754 754 # We are in some recomputation cases and that rev is too high in
755 755 # the revlog
756 756 if target_rev is not None and rev >= target_rev:
757 757 tested.add(rev)
758 758 continue
759 759 # filter out delta base that will never produce good delta
760 760 if deltas_limit < revlog.length(rev):
761 761 tested.add(rev)
762 762 continue
763 763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
764 764 tested.add(rev)
765 765 continue
766 766 # no delta for rawtext-changing revs (see "candelta" for why)
767 767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
768 768 tested.add(rev)
769 769 continue
770 770
771 771 # If we reach here, we are about to build and test a delta.
772 772 # The delta building process will compute the chaininfo in all
773 773 # case, since that computation is cached, it is fine to access it
774 774 # here too.
775 775 chainlen, chainsize = revlog._chaininfo(rev)
776 776 # if chain will be too long, skip base
777 777 if (
778 778 revlog.delta_config.max_chain_len
779 779 and chainlen >= revlog.delta_config.max_chain_len
780 780 ):
781 781 tested.add(rev)
782 782 continue
783 783 # if chain already have too much data, skip base
784 784 if deltas_limit < chainsize:
785 785 tested.add(rev)
786 786 continue
787 if sparse and revlog.upperboundcomp is not None:
788 maxcomp = revlog.upperboundcomp
787 if sparse and revlog.delta_config.upper_bound_comp is not None:
788 maxcomp = revlog.delta_config.upper_bound_comp
789 789 basenotsnap = (p1, p2, nullrev)
790 790 if rev not in basenotsnap and revlog.issnapshot(rev):
791 791 snapshotdepth = revlog.snapshotdepth(rev)
792 792 # If text is significantly larger than the base, we can
793 793 # expect the resulting delta to be proportional to the size
794 794 # difference
795 795 revsize = revlog.rawsize(rev)
796 796 rawsizedistance = max(textlen - revsize, 0)
797 797 # use an estimate of the compression upper bound.
798 798 lowestrealisticdeltalen = rawsizedistance // maxcomp
799 799
800 800 # check the absolute constraint on the delta size
801 801 snapshotlimit = textlen >> snapshotdepth
802 802 if snapshotlimit < lowestrealisticdeltalen:
803 803 # delta lower bound is larger than accepted upper bound
804 804 tested.add(rev)
805 805 continue
806 806
807 807 # check the relative constraint on the delta size
808 808 revlength = revlog.length(rev)
809 809 if revlength < lowestrealisticdeltalen:
810 810 # delta probable lower bound is larger than target base
811 811 tested.add(rev)
812 812 continue
813 813
814 814 group.append(rev)
815 815 if group:
816 816 # When the size of the candidate group is big, it can result in a
817 817 # quite significant performance impact. To reduce this, we can send
818 818 # them in smaller batches until the new batch does not provide any
819 819 # improvements.
820 820 #
821 821 # This might reduce the overall efficiency of the compression in
822 822 # some corner cases, but that should also prevent very pathological
823 823 # cases from being an issue. (eg. 20 000 candidates).
824 824 #
825 825 # XXX note that the ordering of the group becomes important as it
826 826 # now impacts the final result. The current order is unprocessed
827 827 # and can be improved.
828 828 if group_chunk_size == 0:
829 829 tested.update(group)
830 830 good = yield tuple(group)
831 831 else:
832 832 prev_good = good
833 833 for start in range(0, len(group), group_chunk_size):
834 834 sub_group = group[start : start + group_chunk_size]
835 835 tested.update(sub_group)
836 836 good = yield tuple(sub_group)
837 837 if prev_good == good:
838 838 break
839 839
840 840 yield None
841 841
842 842
843 843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
844 844 good = None
845 845 # First we try to reuse a the delta contained in the bundle.
846 846 # (or from the source revlog)
847 847 #
848 848 # This logic only applies to general delta repositories and can be disabled
849 849 # through configuration. Disabling reuse source delta is useful when
850 850 # we want to make sure we recomputed "optimal" deltas.
851 851 debug_info = None
852 852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
853 853 # Assume what we received from the server is a good choice
854 854 # build delta will reuse the cache
855 855 if debug_info is not None:
856 856 debug_info['cached-delta.tested'] += 1
857 857 good = yield (cachedelta[0],)
858 858 if good is not None:
859 859 if debug_info is not None:
860 860 debug_info['cached-delta.accepted'] += 1
861 861 yield None
862 862 return
863 863 if snapshot_cache is None:
864 864 snapshot_cache = SnapshotCache()
865 865 groups = _rawgroups(
866 866 revlog,
867 867 p1,
868 868 p2,
869 869 cachedelta,
870 870 snapshot_cache,
871 871 )
872 872 for candidates in groups:
873 873 good = yield candidates
874 874 if good is not None:
875 875 break
876 876
877 877 # If sparse revlog is enabled, we can try to refine the available deltas
878 878 if not revlog.delta_config.sparse_revlog:
879 879 yield None
880 880 return
881 881
882 882 # if we have a refinable value, try to refine it
883 883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
884 884 # refine snapshot down
885 885 previous = None
886 886 while previous != good:
887 887 previous = good
888 888 base = revlog.deltaparent(good)
889 889 if base == nullrev:
890 890 break
891 891 good = yield (base,)
892 892 # refine snapshot up
893 893 if not snapshot_cache.snapshots:
894 894 snapshot_cache.update(revlog, good + 1)
895 895 previous = None
896 896 while good != previous:
897 897 previous = good
898 898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
899 899 good = yield children
900 900
901 901 if debug_info is not None:
902 902 if good is None:
903 903 debug_info['no-solution'] += 1
904 904
905 905 yield None
906 906
907 907
908 908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
909 909 """Provides group of revision to be tested as delta base
910 910
911 911 This lower level function focus on emitting delta theorically interresting
912 912 without looking it any practical details.
913 913
914 914 The group order aims at providing fast or small candidates first.
915 915 """
916 916 # Why search for delta base if we cannot use a delta base ?
917 917 assert revlog.delta_config.general_delta
918 918 # also see issue6056
919 919 sparse = revlog.delta_config.sparse_revlog
920 920 curr = len(revlog)
921 921 prev = curr - 1
922 922 deltachain = lambda rev: revlog._deltachain(rev)[0]
923 923
924 924 # exclude already lazy tested base if any
925 925 parents = [p for p in (p1, p2) if p != nullrev]
926 926
927 927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
928 928 parents.sort()
929 929 # To minimize the chance of having to build a fulltext,
930 930 # pick first whichever parent is closest to us (max rev)
931 931 yield (parents[1],)
932 932 # then the other one (min rev) if the first did not fit
933 933 yield (parents[0],)
934 934 elif len(parents) > 0:
935 935 # Test all parents (1 or 2), and keep the best candidate
936 936 yield parents
937 937
938 938 if sparse and parents:
939 939 if snapshot_cache is None:
940 940 # map: base-rev: [snapshot-revs]
941 941 snapshot_cache = SnapshotCache()
942 942 # See if we can use an existing snapshot in the parent chains to use as
943 943 # a base for a new intermediate-snapshot
944 944 #
945 945 # search for snapshot in parents delta chain
946 946 # map: snapshot-level: snapshot-rev
947 947 parents_snaps = collections.defaultdict(set)
948 948 candidate_chains = [deltachain(p) for p in parents]
949 949 for chain in candidate_chains:
950 950 for idx, s in enumerate(chain):
951 951 if not revlog.issnapshot(s):
952 952 break
953 953 parents_snaps[idx].add(s)
954 954 snapfloor = min(parents_snaps[0]) + 1
955 955 snapshot_cache.update(revlog, snapfloor)
956 956 # search for the highest "unrelated" revision
957 957 #
958 958 # Adding snapshots used by "unrelated" revision increase the odd we
959 959 # reuse an independant, yet better snapshot chain.
960 960 #
961 961 # XXX instead of building a set of revisions, we could lazily enumerate
962 962 # over the chains. That would be more efficient, however we stick to
963 963 # simple code for now.
964 964 all_revs = set()
965 965 for chain in candidate_chains:
966 966 all_revs.update(chain)
967 967 other = None
968 968 for r in revlog.revs(prev, snapfloor):
969 969 if r not in all_revs:
970 970 other = r
971 971 break
972 972 if other is not None:
973 973 # To avoid unfair competition, we won't use unrelated intermediate
974 974 # snapshot that are deeper than the ones from the parent delta
975 975 # chain.
976 976 max_depth = max(parents_snaps.keys())
977 977 chain = deltachain(other)
978 978 for depth, s in enumerate(chain):
979 979 if s < snapfloor:
980 980 continue
981 981 if max_depth < depth:
982 982 break
983 983 if not revlog.issnapshot(s):
984 984 break
985 985 parents_snaps[depth].add(s)
986 986 # Test them as possible intermediate snapshot base
987 987 # We test them from highest to lowest level. High level one are more
988 988 # likely to result in small delta
989 989 floor = None
990 990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
991 991 siblings = set()
992 992 for s in snaps:
993 993 siblings.update(snapshot_cache.snapshots[s])
994 994 # Before considering making a new intermediate snapshot, we check
995 995 # if an existing snapshot, children of base we consider, would be
996 996 # suitable.
997 997 #
998 998 # It give a change to reuse a delta chain "unrelated" to the
999 999 # current revision instead of starting our own. Without such
1000 1000 # re-use, topological branches would keep reopening new chains.
1001 1001 # Creating more and more snapshot as the repository grow.
1002 1002
1003 1003 if floor is not None:
1004 1004 # We only do this for siblings created after the one in our
1005 1005 # parent's delta chain. Those created before has less chances
1006 1006 # to be valid base since our ancestors had to create a new
1007 1007 # snapshot.
1008 1008 siblings = [r for r in siblings if floor < r]
1009 1009 yield tuple(sorted(siblings))
1010 1010 # then test the base from our parent's delta chain.
1011 1011 yield tuple(sorted(snaps))
1012 1012 floor = min(snaps)
1013 1013 # No suitable base found in the parent chain, search if any full
1014 1014 # snapshots emitted since parent's base would be a suitable base for an
1015 1015 # intermediate snapshot.
1016 1016 #
1017 1017 # It give a chance to reuse a delta chain unrelated to the current
1018 1018 # revisions instead of starting our own. Without such re-use,
1019 1019 # topological branches would keep reopening new full chains. Creating
1020 1020 # more and more snapshot as the repository grow.
1021 1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1022 1022 yield tuple(sorted(full))
1023 1023
1024 1024 if not sparse:
1025 1025 # other approach failed try against prev to hopefully save us a
1026 1026 # fulltext.
1027 1027 yield (prev,)
1028 1028
1029 1029
1030 1030 class SnapshotCache:
1031 1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1032 1032
1033 1033 def __init__(self):
1034 1034 self.snapshots = collections.defaultdict(set)
1035 1035 self._start_rev = None
1036 1036 self._end_rev = None
1037 1037
1038 1038 def update(self, revlog, start_rev=0):
1039 1039 """find snapshots from start_rev to tip"""
1040 1040 nb_revs = len(revlog)
1041 1041 end_rev = nb_revs - 1
1042 1042 if start_rev > end_rev:
1043 1043 return # range is empty
1044 1044
1045 1045 if self._start_rev is None:
1046 1046 assert self._end_rev is None
1047 1047 self._update(revlog, start_rev, end_rev)
1048 1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1049 1049 if start_rev < self._start_rev:
1050 1050 self._update(revlog, start_rev, self._start_rev - 1)
1051 1051 if self._end_rev < end_rev:
1052 1052 self._update(revlog, self._end_rev + 1, end_rev)
1053 1053
1054 1054 if self._start_rev is None:
1055 1055 assert self._end_rev is None
1056 1056 self._end_rev = end_rev
1057 1057 self._start_rev = start_rev
1058 1058 else:
1059 1059 self._start_rev = min(self._start_rev, start_rev)
1060 1060 self._end_rev = max(self._end_rev, end_rev)
1061 1061 assert self._start_rev <= self._end_rev, (
1062 1062 self._start_rev,
1063 1063 self._end_rev,
1064 1064 )
1065 1065
1066 1066 def _update(self, revlog, start_rev, end_rev):
1067 1067 """internal method that actually do update content"""
1068 1068 assert self._start_rev is None or (
1069 1069 start_rev < self._start_rev or start_rev > self._end_rev
1070 1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1071 1071 assert self._start_rev is None or (
1072 1072 end_rev < self._start_rev or end_rev > self._end_rev
1073 1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1074 1074 cache = self.snapshots
1075 1075 if hasattr(revlog.index, 'findsnapshots'):
1076 1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1077 1077 else:
1078 1078 deltaparent = revlog.deltaparent
1079 1079 issnapshot = revlog.issnapshot
1080 1080 for rev in revlog.revs(start_rev, end_rev):
1081 1081 if issnapshot(rev):
1082 1082 cache[deltaparent(rev)].add(rev)
1083 1083
1084 1084
1085 1085 class deltacomputer:
1086 1086 def __init__(
1087 1087 self,
1088 1088 revlog,
1089 1089 write_debug=None,
1090 1090 debug_search=False,
1091 1091 debug_info=None,
1092 1092 ):
1093 1093 self.revlog = revlog
1094 1094 self._write_debug = write_debug
1095 1095 if write_debug is None:
1096 1096 self._debug_search = False
1097 1097 else:
1098 1098 self._debug_search = debug_search
1099 1099 self._debug_info = debug_info
1100 1100 self._snapshot_cache = SnapshotCache()
1101 1101
1102 1102 @property
1103 1103 def _gather_debug(self):
1104 1104 return self._write_debug is not None or self._debug_info is not None
1105 1105
1106 1106 def buildtext(self, revinfo):
1107 1107 """Builds a fulltext version of a revision
1108 1108
1109 1109 revinfo: revisioninfo instance that contains all needed info
1110 1110 """
1111 1111 btext = revinfo.btext
1112 1112 if btext[0] is not None:
1113 1113 return btext[0]
1114 1114
1115 1115 revlog = self.revlog
1116 1116 cachedelta = revinfo.cachedelta
1117 1117 baserev = cachedelta[0]
1118 1118 delta = cachedelta[1]
1119 1119
1120 1120 fulltext = btext[0] = _textfromdelta(
1121 1121 revlog,
1122 1122 baserev,
1123 1123 delta,
1124 1124 revinfo.p1,
1125 1125 revinfo.p2,
1126 1126 revinfo.flags,
1127 1127 revinfo.node,
1128 1128 )
1129 1129 return fulltext
1130 1130
1131 1131 def _builddeltadiff(self, base, revinfo):
1132 1132 revlog = self.revlog
1133 1133 t = self.buildtext(revinfo)
1134 1134 if revlog.iscensored(base):
1135 1135 # deltas based on a censored revision must replace the
1136 1136 # full content in one patch, so delta works everywhere
1137 1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1138 1138 delta = header + t
1139 1139 else:
1140 1140 ptext = revlog.rawdata(base)
1141 1141 delta = mdiff.textdiff(ptext, t)
1142 1142
1143 1143 return delta
1144 1144
1145 1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1146 1146 # can we use the cached delta?
1147 1147 revlog = self.revlog
1148 1148 chainbase = revlog.chainbase(base)
1149 1149 if revlog.delta_config.general_delta:
1150 1150 deltabase = base
1151 1151 else:
1152 1152 if target_rev is not None and base != target_rev - 1:
1153 1153 msg = (
1154 1154 b'general delta cannot use delta for something else '
1155 1155 b'than `prev`: %d<-%d'
1156 1156 )
1157 1157 msg %= (base, target_rev)
1158 1158 raise error.ProgrammingError(msg)
1159 1159 deltabase = chainbase
1160 1160 snapshotdepth = None
1161 1161 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1162 1162 snapshotdepth = 0
1163 1163 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1164 1164 # A delta chain should always be one full snapshot,
1165 1165 # zero or more semi-snapshots, and zero or more deltas
1166 1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1167 1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1168 1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1169 1169 delta = None
1170 1170 if revinfo.cachedelta:
1171 1171 cachebase = revinfo.cachedelta[0]
1172 1172 # check if the diff still apply
1173 1173 currentbase = cachebase
1174 1174 while (
1175 1175 currentbase != nullrev
1176 1176 and currentbase != base
1177 1177 and self.revlog.length(currentbase) == 0
1178 1178 ):
1179 1179 currentbase = self.revlog.deltaparent(currentbase)
1180 1180 if self.revlog.delta_config.lazy_delta and currentbase == base:
1181 1181 delta = revinfo.cachedelta[1]
1182 1182 if delta is None:
1183 1183 delta = self._builddeltadiff(base, revinfo)
1184 1184 if self._debug_search:
1185 1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1186 1186 msg %= len(delta)
1187 1187 self._write_debug(msg)
1188 1188 # snapshotdept need to be neither None nor 0 level snapshot
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1189 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1190 lowestrealisticdeltalen = (
1191 len(delta) // revlog.delta_config.upper_bound_comp
1192 )
1191 1193 snapshotlimit = revinfo.textlen >> snapshotdepth
1192 1194 if self._debug_search:
1193 1195 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1194 1196 msg %= lowestrealisticdeltalen
1195 1197 self._write_debug(msg)
1196 1198 if snapshotlimit < lowestrealisticdeltalen:
1197 1199 if self._debug_search:
1198 1200 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1199 1201 self._write_debug(msg)
1200 1202 return None
1201 1203 if revlog.length(base) < lowestrealisticdeltalen:
1202 1204 if self._debug_search:
1203 1205 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1204 1206 self._write_debug(msg)
1205 1207 return None
1206 1208 header, data = revlog.compress(delta)
1207 1209 deltalen = len(header) + len(data)
1208 1210 offset = revlog.end(len(revlog) - 1)
1209 1211 dist = deltalen + offset - revlog.start(chainbase)
1210 1212 chainlen, compresseddeltalen = revlog._chaininfo(base)
1211 1213 chainlen += 1
1212 1214 compresseddeltalen += deltalen
1213 1215
1214 1216 return _deltainfo(
1215 1217 dist,
1216 1218 deltalen,
1217 1219 (header, data),
1218 1220 deltabase,
1219 1221 chainbase,
1220 1222 chainlen,
1221 1223 compresseddeltalen,
1222 1224 snapshotdepth,
1223 1225 )
1224 1226
1225 1227 def _fullsnapshotinfo(self, revinfo, curr):
1226 1228 rawtext = self.buildtext(revinfo)
1227 1229 data = self.revlog.compress(rawtext)
1228 1230 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1229 1231 deltabase = chainbase = curr
1230 1232 snapshotdepth = 0
1231 1233 chainlen = 1
1232 1234
1233 1235 return _deltainfo(
1234 1236 dist,
1235 1237 deltalen,
1236 1238 data,
1237 1239 deltabase,
1238 1240 chainbase,
1239 1241 chainlen,
1240 1242 compresseddeltalen,
1241 1243 snapshotdepth,
1242 1244 )
1243 1245
1244 1246 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1245 1247 """Find an acceptable delta against a candidate revision
1246 1248
1247 1249 revinfo: information about the revision (instance of _revisioninfo)
1248 1250
1249 1251 Returns the first acceptable candidate revision, as ordered by
1250 1252 _candidategroups
1251 1253
1252 1254 If no suitable deltabase is found, we return delta info for a full
1253 1255 snapshot.
1254 1256
1255 1257 `excluded_bases` is an optional set of revision that cannot be used as
1256 1258 a delta base. Use this to recompute delta suitable in censor or strip
1257 1259 context.
1258 1260 """
1259 1261 if target_rev is None:
1260 1262 target_rev = len(self.revlog)
1261 1263
1262 1264 gather_debug = self._gather_debug
1263 1265 cachedelta = revinfo.cachedelta
1264 1266 revlog = self.revlog
1265 1267 p1r = p2r = None
1266 1268
1267 1269 if excluded_bases is None:
1268 1270 excluded_bases = set()
1269 1271
1270 1272 if gather_debug:
1271 1273 start = util.timer()
1272 1274 dbg = self._one_dbg_data()
1273 1275 dbg['revision'] = target_rev
1274 1276 target_revlog = b"UNKNOWN"
1275 1277 target_type = self.revlog.target[0]
1276 1278 target_key = self.revlog.target[1]
1277 1279 if target_type == KIND_CHANGELOG:
1278 1280 target_revlog = b'CHANGELOG:'
1279 1281 elif target_type == KIND_MANIFESTLOG:
1280 1282 target_revlog = b'MANIFESTLOG:'
1281 1283 if target_key:
1282 1284 target_revlog += b'%s:' % target_key
1283 1285 elif target_type == KIND_FILELOG:
1284 1286 target_revlog = b'FILELOG:'
1285 1287 if target_key:
1286 1288 target_revlog += b'%s:' % target_key
1287 1289 dbg['target-revlog'] = target_revlog
1288 1290 p1r = revlog.rev(revinfo.p1)
1289 1291 p2r = revlog.rev(revinfo.p2)
1290 1292 if p1r != nullrev:
1291 1293 p1_chain_len = revlog._chaininfo(p1r)[0]
1292 1294 else:
1293 1295 p1_chain_len = -1
1294 1296 if p2r != nullrev:
1295 1297 p2_chain_len = revlog._chaininfo(p2r)[0]
1296 1298 else:
1297 1299 p2_chain_len = -1
1298 1300 dbg['p1-chain-len'] = p1_chain_len
1299 1301 dbg['p2-chain-len'] = p2_chain_len
1300 1302
1301 1303 # 1) if the revision is empty, no amount of delta can beat it
1302 1304 #
1303 1305 # 2) no delta for flag processor revision (see "candelta" for why)
1304 1306 # not calling candelta since only one revision needs test, also to
1305 1307 # avoid overhead fetching flags again.
1306 1308 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1307 1309 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1308 1310 if gather_debug:
1309 1311 end = util.timer()
1310 1312 dbg['duration'] = end - start
1311 1313 dbg[
1312 1314 'delta-base'
1313 1315 ] = deltainfo.base # pytype: disable=attribute-error
1314 1316 dbg['search_round_count'] = 0
1315 1317 dbg['using-cached-base'] = False
1316 1318 dbg['delta_try_count'] = 0
1317 1319 dbg['type'] = b"full"
1318 1320 dbg['snapshot-depth'] = 0
1319 1321 self._dbg_process_data(dbg)
1320 1322 return deltainfo
1321 1323
1322 1324 deltainfo = None
1323 1325
1324 1326 # If this source delta are to be forcibly reuse, let us comply early.
1325 1327 if (
1326 1328 revlog.delta_config.general_delta
1327 1329 and revinfo.cachedelta is not None
1328 1330 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 1331 ):
1330 1332 base = revinfo.cachedelta[0]
1331 1333 if base == nullrev:
1332 1334 dbg_type = b"full"
1333 1335 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1334 1336 if gather_debug:
1335 1337 snapshotdepth = 0
1336 1338 elif base not in excluded_bases:
1337 1339 delta = revinfo.cachedelta[1]
1338 1340 header, data = revlog.compress(delta)
1339 1341 deltalen = len(header) + len(data)
1340 1342 if gather_debug:
1341 1343 offset = revlog.end(len(revlog) - 1)
1342 1344 chainbase = revlog.chainbase(base)
1343 1345 distance = deltalen + offset - revlog.start(chainbase)
1344 1346 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 1347 chainlen += 1
1346 1348 compresseddeltalen += deltalen
1347 1349 if base == p1r or base == p2r:
1348 1350 dbg_type = b"delta"
1349 1351 snapshotdepth = None
1350 1352 elif not revlog.issnapshot(base):
1351 1353 snapshotdepth = None
1352 1354 else:
1353 1355 dbg_type = b"snapshot"
1354 1356 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 1357 else:
1356 1358 distance = None
1357 1359 chainbase = None
1358 1360 chainlen = None
1359 1361 compresseddeltalen = None
1360 1362 snapshotdepth = None
1361 1363 deltainfo = _deltainfo(
1362 1364 distance=distance,
1363 1365 deltalen=deltalen,
1364 1366 data=(header, data),
1365 1367 base=base,
1366 1368 chainbase=chainbase,
1367 1369 chainlen=chainlen,
1368 1370 compresseddeltalen=compresseddeltalen,
1369 1371 snapshotdepth=snapshotdepth,
1370 1372 )
1371 1373
1372 1374 if deltainfo is not None:
1373 1375 if gather_debug:
1374 1376 end = util.timer()
1375 1377 dbg['duration'] = end - start
1376 1378 dbg[
1377 1379 'delta-base'
1378 1380 ] = deltainfo.base # pytype: disable=attribute-error
1379 1381 dbg['search_round_count'] = 0
1380 1382 dbg['using-cached-base'] = True
1381 1383 dbg['delta_try_count'] = 0
1382 1384 dbg['type'] = b"full"
1383 1385 if snapshotdepth is None:
1384 1386 dbg['snapshot-depth'] = 0
1385 1387 else:
1386 1388 dbg['snapshot-depth'] = snapshotdepth
1387 1389 self._dbg_process_data(dbg)
1388 1390 return deltainfo
1389 1391
1390 1392 # count the number of different delta we tried (for debug purpose)
1391 1393 dbg_try_count = 0
1392 1394 # count the number of "search round" we did. (for debug purpose)
1393 1395 dbg_try_rounds = 0
1394 1396 dbg_type = b'unknown'
1395 1397
1396 1398 if p1r is None:
1397 1399 p1r = revlog.rev(revinfo.p1)
1398 1400 p2r = revlog.rev(revinfo.p2)
1399 1401
1400 1402 if self._debug_search:
1401 1403 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1402 1404 msg %= target_rev
1403 1405 self._write_debug(msg)
1404 1406
1405 1407 groups = _candidategroups(
1406 1408 self.revlog,
1407 1409 revinfo.textlen,
1408 1410 p1r,
1409 1411 p2r,
1410 1412 cachedelta,
1411 1413 excluded_bases,
1412 1414 target_rev,
1413 1415 snapshot_cache=self._snapshot_cache,
1414 1416 )
1415 1417 candidaterevs = next(groups)
1416 1418 while candidaterevs is not None:
1417 1419 dbg_try_rounds += 1
1418 1420 if self._debug_search:
1419 1421 prev = None
1420 1422 if deltainfo is not None:
1421 1423 prev = deltainfo.base
1422 1424
1423 1425 if (
1424 1426 cachedelta is not None
1425 1427 and len(candidaterevs) == 1
1426 1428 and cachedelta[0] in candidaterevs
1427 1429 ):
1428 1430 round_type = b"cached-delta"
1429 1431 elif p1r in candidaterevs or p2r in candidaterevs:
1430 1432 round_type = b"parents"
1431 1433 elif prev is not None and all(c < prev for c in candidaterevs):
1432 1434 round_type = b"refine-down"
1433 1435 elif prev is not None and all(c > prev for c in candidaterevs):
1434 1436 round_type = b"refine-up"
1435 1437 else:
1436 1438 round_type = b"search-down"
1437 1439 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1438 1440 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1439 1441 self._write_debug(msg)
1440 1442 nominateddeltas = []
1441 1443 if deltainfo is not None:
1442 1444 if self._debug_search:
1443 1445 msg = (
1444 1446 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1445 1447 )
1446 1448 msg %= (deltainfo.base, deltainfo.deltalen)
1447 1449 self._write_debug(msg)
1448 1450 # if we already found a good delta,
1449 1451 # challenge it against refined candidates
1450 1452 nominateddeltas.append(deltainfo)
1451 1453 for candidaterev in candidaterevs:
1452 1454 if self._debug_search:
1453 1455 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1454 1456 msg %= candidaterev
1455 1457 self._write_debug(msg)
1456 1458 candidate_type = None
1457 1459 if candidaterev == p1r:
1458 1460 candidate_type = b"p1"
1459 1461 elif candidaterev == p2r:
1460 1462 candidate_type = b"p2"
1461 1463 elif self.revlog.issnapshot(candidaterev):
1462 1464 candidate_type = b"snapshot-%d"
1463 1465 candidate_type %= self.revlog.snapshotdepth(
1464 1466 candidaterev
1465 1467 )
1466 1468
1467 1469 if candidate_type is not None:
1468 1470 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1469 1471 msg %= candidate_type
1470 1472 self._write_debug(msg)
1471 1473 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1472 1474 msg %= self.revlog.length(candidaterev)
1473 1475 self._write_debug(msg)
1474 1476 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1475 1477 msg %= self.revlog.deltaparent(candidaterev)
1476 1478 self._write_debug(msg)
1477 1479
1478 1480 dbg_try_count += 1
1479 1481
1480 1482 if self._debug_search:
1481 1483 delta_start = util.timer()
1482 1484 candidatedelta = self._builddeltainfo(
1483 1485 revinfo,
1484 1486 candidaterev,
1485 1487 target_rev=target_rev,
1486 1488 )
1487 1489 if self._debug_search:
1488 1490 delta_end = util.timer()
1489 1491 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1490 1492 msg %= delta_end - delta_start
1491 1493 self._write_debug(msg)
1492 1494 if candidatedelta is not None:
1493 1495 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1494 1496 if self._debug_search:
1495 1497 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1496 1498 msg %= candidatedelta.deltalen
1497 1499 self._write_debug(msg)
1498 1500 nominateddeltas.append(candidatedelta)
1499 1501 elif self._debug_search:
1500 1502 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1501 1503 msg %= candidatedelta.deltalen
1502 1504 self._write_debug(msg)
1503 1505 elif self._debug_search:
1504 1506 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1505 1507 self._write_debug(msg)
1506 1508 if nominateddeltas:
1507 1509 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1508 1510 if deltainfo is not None:
1509 1511 candidaterevs = groups.send(deltainfo.base)
1510 1512 else:
1511 1513 candidaterevs = next(groups)
1512 1514
1513 1515 if deltainfo is None:
1514 1516 dbg_type = b"full"
1515 1517 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1516 1518 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1517 1519 dbg_type = b"snapshot"
1518 1520 else:
1519 1521 dbg_type = b"delta"
1520 1522
1521 1523 if gather_debug:
1522 1524 end = util.timer()
1523 1525 if dbg_type == b'full':
1524 1526 used_cached = (
1525 1527 cachedelta is not None
1526 1528 and dbg_try_rounds == 0
1527 1529 and dbg_try_count == 0
1528 1530 and cachedelta[0] == nullrev
1529 1531 )
1530 1532 else:
1531 1533 used_cached = (
1532 1534 cachedelta is not None
1533 1535 and dbg_try_rounds == 1
1534 1536 and dbg_try_count == 1
1535 1537 and deltainfo.base == cachedelta[0]
1536 1538 )
1537 1539 dbg['duration'] = end - start
1538 1540 dbg[
1539 1541 'delta-base'
1540 1542 ] = deltainfo.base # pytype: disable=attribute-error
1541 1543 dbg['search_round_count'] = dbg_try_rounds
1542 1544 dbg['using-cached-base'] = used_cached
1543 1545 dbg['delta_try_count'] = dbg_try_count
1544 1546 dbg['type'] = dbg_type
1545 1547 if (
1546 1548 deltainfo.snapshotdepth # pytype: disable=attribute-error
1547 1549 is not None
1548 1550 ):
1549 1551 dbg[
1550 1552 'snapshot-depth'
1551 1553 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1552 1554 else:
1553 1555 dbg['snapshot-depth'] = 0
1554 1556 self._dbg_process_data(dbg)
1555 1557 return deltainfo
1556 1558
1557 1559 def _one_dbg_data(self):
1558 1560 return {
1559 1561 'duration': None,
1560 1562 'revision': None,
1561 1563 'delta-base': None,
1562 1564 'search_round_count': None,
1563 1565 'using-cached-base': None,
1564 1566 'delta_try_count': None,
1565 1567 'type': None,
1566 1568 'p1-chain-len': None,
1567 1569 'p2-chain-len': None,
1568 1570 'snapshot-depth': None,
1569 1571 'target-revlog': None,
1570 1572 }
1571 1573
1572 1574 def _dbg_process_data(self, dbg):
1573 1575 if self._debug_info is not None:
1574 1576 self._debug_info.append(dbg)
1575 1577
1576 1578 if self._write_debug is not None:
1577 1579 msg = (
1578 1580 b"DBG-DELTAS:"
1579 1581 b" %-12s"
1580 1582 b" rev=%d:"
1581 1583 b" delta-base=%d"
1582 1584 b" is-cached=%d"
1583 1585 b" - search-rounds=%d"
1584 1586 b" try-count=%d"
1585 1587 b" - delta-type=%-6s"
1586 1588 b" snap-depth=%d"
1587 1589 b" - p1-chain-length=%d"
1588 1590 b" p2-chain-length=%d"
1589 1591 b" - duration=%f"
1590 1592 b"\n"
1591 1593 )
1592 1594 msg %= (
1593 1595 dbg["target-revlog"],
1594 1596 dbg["revision"],
1595 1597 dbg["delta-base"],
1596 1598 dbg["using-cached-base"],
1597 1599 dbg["search_round_count"],
1598 1600 dbg["delta_try_count"],
1599 1601 dbg["type"],
1600 1602 dbg["snapshot-depth"],
1601 1603 dbg["p1-chain-len"],
1602 1604 dbg["p2-chain-len"],
1603 1605 dbg["duration"],
1604 1606 )
1605 1607 self._write_debug(msg)
1606 1608
1607 1609
1608 1610 def delta_compression(default_compression_header, deltainfo):
1609 1611 """return (COMPRESSION_MODE, deltainfo)
1610 1612
1611 1613 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1612 1614 compression.
1613 1615 """
1614 1616 h, d = deltainfo.data
1615 1617 compression_mode = COMP_MODE_INLINE
1616 1618 if not h and not d:
1617 1619 # not data to store at all... declare them uncompressed
1618 1620 compression_mode = COMP_MODE_PLAIN
1619 1621 elif not h:
1620 1622 t = d[0:1]
1621 1623 if t == b'\0':
1622 1624 compression_mode = COMP_MODE_PLAIN
1623 1625 elif t == default_compression_header:
1624 1626 compression_mode = COMP_MODE_DEFAULT
1625 1627 elif h == b'u':
1626 1628 # we have a more efficient way to declare uncompressed
1627 1629 h = b''
1628 1630 compression_mode = COMP_MODE_PLAIN
1629 1631 deltainfo = drop_u_compression(deltainfo)
1630 1632 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now