##// END OF EJS Templates
revlog: drop the unused `_sidedatareadfp` method...
marmoute -
r51977:9d5efaef default
parent child Browse files
Show More
@@ -1,3807 +1,3798 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428
429 429 self.radix = radix
430 430
431 431 self._docket_file = None
432 432 self._indexfile = None
433 433 self._datafile = None
434 434 self._sidedatafile = None
435 435 self._nodemap_file = None
436 436 self.postfix = postfix
437 437 self._trypending = trypending
438 438 self._try_split = try_split
439 439 self.opener = opener
440 440 if persistentnodemap:
441 441 self._nodemap_file = nodemaputil.get_nodemap_file(self)
442 442
443 443 assert target[0] in ALL_KINDS
444 444 assert len(target) == 2
445 445 self.target = target
446 446 if b'feature-config' in self.opener.options:
447 447 self.feature_config = self.opener.options[b'feature-config'].copy()
448 448 else:
449 449 self.feature_config = FeatureConfig()
450 450 self.feature_config.censorable = censorable
451 451 self.feature_config.canonical_parent_order = canonical_parent_order
452 452 if b'data-config' in self.opener.options:
453 453 self.data_config = self.opener.options[b'data-config'].copy()
454 454 else:
455 455 self.data_config = DataConfig()
456 456 self.data_config.check_ambig = checkambig
457 457 self.data_config.mmap_large_index = mmaplargeindex
458 458 if b'delta-config' in self.opener.options:
459 459 self.delta_config = self.opener.options[b'delta-config'].copy()
460 460 else:
461 461 self.delta_config = DeltaConfig()
462 462 self.delta_config.upper_bound_comp = upperboundcomp
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 util.nouideprecwarn(
496 496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 497 )
498 498 return self.delta_config.general_delta
499 499
500 500 @property
501 501 def _checkambig(self):
502 502 """temporary compatibility proxy"""
503 503 util.nouideprecwarn(
504 504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 505 )
506 506 return self.data_config.check_ambig
507 507
508 508 @property
509 509 def _mmaplargeindex(self):
510 510 """temporary compatibility proxy"""
511 511 util.nouideprecwarn(
512 512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 513 )
514 514 return self.data_config.mmap_large_index
515 515
516 516 @property
517 517 def _censorable(self):
518 518 """temporary compatibility proxy"""
519 519 util.nouideprecwarn(
520 520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 521 )
522 522 return self.feature_config.censorable
523 523
524 524 @property
525 525 def _chunkcachesize(self):
526 526 """temporary compatibility proxy"""
527 527 util.nouideprecwarn(
528 528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 529 )
530 530 return self.data_config.chunk_cache_size
531 531
532 532 @property
533 533 def _maxchainlen(self):
534 534 """temporary compatibility proxy"""
535 535 util.nouideprecwarn(
536 536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 537 )
538 538 return self.delta_config.max_chain_len
539 539
540 540 @property
541 541 def _deltabothparents(self):
542 542 """temporary compatibility proxy"""
543 543 util.nouideprecwarn(
544 544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 545 )
546 546 return self.delta_config.delta_both_parents
547 547
548 548 @property
549 549 def _candidate_group_chunk_size(self):
550 550 """temporary compatibility proxy"""
551 551 util.nouideprecwarn(
552 552 b"use revlog.delta_config.candidate_group_chunk_size",
553 553 b"6.6",
554 554 stacklevel=2,
555 555 )
556 556 return self.delta_config.candidate_group_chunk_size
557 557
558 558 @property
559 559 def _debug_delta(self):
560 560 """temporary compatibility proxy"""
561 561 util.nouideprecwarn(
562 562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 563 )
564 564 return self.delta_config.debug_delta
565 565
566 566 @property
567 567 def _compengine(self):
568 568 """temporary compatibility proxy"""
569 569 util.nouideprecwarn(
570 570 b"use revlog.feature_config.compression_engine",
571 571 b"6.6",
572 572 stacklevel=2,
573 573 )
574 574 return self.feature_config.compression_engine
575 575
576 576 @property
577 577 def upperboundcomp(self):
578 578 """temporary compatibility proxy"""
579 579 util.nouideprecwarn(
580 580 b"use revlog.delta_config.upper_bound_comp",
581 581 b"6.6",
582 582 stacklevel=2,
583 583 )
584 584 return self.delta_config.upper_bound_comp
585 585
586 586 @property
587 587 def _compengineopts(self):
588 588 """temporary compatibility proxy"""
589 589 util.nouideprecwarn(
590 590 b"use revlog.feature_config.compression_engine_options",
591 591 b"6.6",
592 592 stacklevel=2,
593 593 )
594 594 return self.feature_config.compression_engine_options
595 595
596 596 @property
597 597 def _maxdeltachainspan(self):
598 598 """temporary compatibility proxy"""
599 599 util.nouideprecwarn(
600 600 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
601 601 )
602 602 return self.delta_config.max_deltachain_span
603 603
604 604 @property
605 605 def _withsparseread(self):
606 606 """temporary compatibility proxy"""
607 607 util.nouideprecwarn(
608 608 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
609 609 )
610 610 return self.data_config.with_sparse_read
611 611
612 612 @property
613 613 def _sparserevlog(self):
614 614 """temporary compatibility proxy"""
615 615 util.nouideprecwarn(
616 616 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
617 617 )
618 618 return self.delta_config.sparse_revlog
619 619
620 620 @property
621 621 def hassidedata(self):
622 622 """temporary compatibility proxy"""
623 623 util.nouideprecwarn(
624 624 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
625 625 )
626 626 return self.feature_config.has_side_data
627 627
628 628 @property
629 629 def _srdensitythreshold(self):
630 630 """temporary compatibility proxy"""
631 631 util.nouideprecwarn(
632 632 b"use revlog.data_config.sr_density_threshold",
633 633 b"6.6",
634 634 stacklevel=2,
635 635 )
636 636 return self.data_config.sr_density_threshold
637 637
638 638 @property
639 639 def _srmingapsize(self):
640 640 """temporary compatibility proxy"""
641 641 util.nouideprecwarn(
642 642 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
643 643 )
644 644 return self.data_config.sr_min_gap_size
645 645
646 646 @property
647 647 def _compute_rank(self):
648 648 """temporary compatibility proxy"""
649 649 util.nouideprecwarn(
650 650 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
651 651 )
652 652 return self.feature_config.compute_rank
653 653
654 654 @property
655 655 def canonical_parent_order(self):
656 656 """temporary compatibility proxy"""
657 657 util.nouideprecwarn(
658 658 b"use revlog.feature_config.canonical_parent_order",
659 659 b"6.6",
660 660 stacklevel=2,
661 661 )
662 662 return self.feature_config.canonical_parent_order
663 663
664 664 @property
665 665 def _lazydelta(self):
666 666 """temporary compatibility proxy"""
667 667 util.nouideprecwarn(
668 668 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
669 669 )
670 670 return self.delta_config.lazy_delta
671 671
672 672 @property
673 673 def _lazydeltabase(self):
674 674 """temporary compatibility proxy"""
675 675 util.nouideprecwarn(
676 676 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
677 677 )
678 678 return self.delta_config.lazy_delta_base
679 679
680 680 def _init_opts(self):
681 681 """process options (from above/config) to setup associated default revlog mode
682 682
683 683 These values might be affected when actually reading on disk information.
684 684
685 685 The relevant values are returned for use in _loadindex().
686 686
687 687 * newversionflags:
688 688 version header to use if we need to create a new revlog
689 689
690 690 * mmapindexthreshold:
691 691 minimal index size for start to use mmap
692 692
693 693 * force_nodemap:
694 694 force the usage of a "development" version of the nodemap code
695 695 """
696 696 opts = self.opener.options
697 697
698 698 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
699 699 new_header = CHANGELOGV2
700 700 compute_rank = opts.get(b'changelogv2.compute-rank', True)
701 701 self.feature_config.compute_rank = compute_rank
702 702 elif b'revlogv2' in opts:
703 703 new_header = REVLOGV2
704 704 elif b'revlogv1' in opts:
705 705 new_header = REVLOGV1 | FLAG_INLINE_DATA
706 706 if b'generaldelta' in opts:
707 707 new_header |= FLAG_GENERALDELTA
708 708 elif b'revlogv0' in self.opener.options:
709 709 new_header = REVLOGV0
710 710 else:
711 711 new_header = REVLOG_DEFAULT_VERSION
712 712
713 713 mmapindexthreshold = None
714 714 if self.data_config.mmap_large_index:
715 715 mmapindexthreshold = self.data_config.mmap_index_threshold
716 716 if self.feature_config.enable_ellipsis:
717 717 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
718 718
719 719 # revlog v0 doesn't have flag processors
720 720 for flag, processor in opts.get(b'flagprocessors', {}).items():
721 721 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
722 722
723 723 chunk_cache_size = self.data_config.chunk_cache_size
724 724 if chunk_cache_size <= 0:
725 725 raise error.RevlogError(
726 726 _(b'revlog chunk cache size %r is not greater than 0')
727 727 % chunk_cache_size
728 728 )
729 729 elif chunk_cache_size & (chunk_cache_size - 1):
730 730 raise error.RevlogError(
731 731 _(b'revlog chunk cache size %r is not a power of 2')
732 732 % chunk_cache_size
733 733 )
734 734 force_nodemap = opts.get(b'devel-force-nodemap', False)
735 735 return new_header, mmapindexthreshold, force_nodemap
736 736
737 737 def _get_data(self, filepath, mmap_threshold, size=None):
738 738 """return a file content with or without mmap
739 739
740 740 If the file is missing return the empty string"""
741 741 try:
742 742 with self.opener(filepath) as fp:
743 743 if mmap_threshold is not None:
744 744 file_size = self.opener.fstat(fp).st_size
745 745 if file_size >= mmap_threshold:
746 746 if size is not None:
747 747 # avoid potentiel mmap crash
748 748 size = min(file_size, size)
749 749 # TODO: should .close() to release resources without
750 750 # relying on Python GC
751 751 if size is None:
752 752 return util.buffer(util.mmapread(fp))
753 753 else:
754 754 return util.buffer(util.mmapread(fp, size))
755 755 if size is None:
756 756 return fp.read()
757 757 else:
758 758 return fp.read(size)
759 759 except FileNotFoundError:
760 760 return b''
761 761
762 762 def get_streams(self, max_linkrev, force_inline=False):
763 763 """return a list of streams that represent this revlog
764 764
765 765 This is used by stream-clone to do bytes to bytes copies of a repository.
766 766
767 767 This streams data for all revisions that refer to a changelog revision up
768 768 to `max_linkrev`.
769 769
770 770 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
771 771
772 772 It returns is a list of three-tuple:
773 773
774 774 [
775 775 (filename, bytes_stream, stream_size),
776 776 …
777 777 ]
778 778 """
779 779 n = len(self)
780 780 index = self.index
781 781 while n > 0:
782 782 linkrev = index[n - 1][4]
783 783 if linkrev < max_linkrev:
784 784 break
785 785 # note: this loop will rarely go through multiple iterations, since
786 786 # it only traverses commits created during the current streaming
787 787 # pull operation.
788 788 #
789 789 # If this become a problem, using a binary search should cap the
790 790 # runtime of this.
791 791 n = n - 1
792 792 if n == 0:
793 793 # no data to send
794 794 return []
795 795 index_size = n * index.entry_size
796 796 data_size = self.end(n - 1)
797 797
798 798 # XXX we might have been split (or stripped) since the object
799 799 # initialization, We need to close this race too, but having a way to
800 800 # pre-open the file we feed to the revlog and never closing them before
801 801 # we are done streaming.
802 802
803 803 if self._inline:
804 804
805 805 def get_stream():
806 806 with self.opener(self._indexfile, mode=b"r") as fp:
807 807 yield None
808 808 size = index_size + data_size
809 809 if size <= 65536:
810 810 yield fp.read(size)
811 811 else:
812 812 yield from util.filechunkiter(fp, limit=size)
813 813
814 814 inline_stream = get_stream()
815 815 next(inline_stream)
816 816 return [
817 817 (self._indexfile, inline_stream, index_size + data_size),
818 818 ]
819 819 elif force_inline:
820 820
821 821 def get_stream():
822 822 with self.reading():
823 823 yield None
824 824
825 825 for rev in range(n):
826 826 idx = self.index.entry_binary(rev)
827 827 if rev == 0 and self._docket is None:
828 828 # re-inject the inline flag
829 829 header = self._format_flags
830 830 header |= self._format_version
831 831 header |= FLAG_INLINE_DATA
832 832 header = self.index.pack_header(header)
833 833 idx = header + idx
834 834 yield idx
835 835 yield self._getsegmentforrevs(rev, rev)[1]
836 836
837 837 inline_stream = get_stream()
838 838 next(inline_stream)
839 839 return [
840 840 (self._indexfile, inline_stream, index_size + data_size),
841 841 ]
842 842 else:
843 843
844 844 def get_index_stream():
845 845 with self.opener(self._indexfile, mode=b"r") as fp:
846 846 yield None
847 847 if index_size <= 65536:
848 848 yield fp.read(index_size)
849 849 else:
850 850 yield from util.filechunkiter(fp, limit=index_size)
851 851
852 852 def get_data_stream():
853 853 with self._datafp() as fp:
854 854 yield None
855 855 if data_size <= 65536:
856 856 yield fp.read(data_size)
857 857 else:
858 858 yield from util.filechunkiter(fp, limit=data_size)
859 859
860 860 index_stream = get_index_stream()
861 861 next(index_stream)
862 862 data_stream = get_data_stream()
863 863 next(data_stream)
864 864 return [
865 865 (self._datafile, data_stream, data_size),
866 866 (self._indexfile, index_stream, index_size),
867 867 ]
868 868
869 869 def _loadindex(self, docket=None):
870 870
871 871 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
872 872
873 873 if self.postfix is not None:
874 874 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
875 875 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
876 876 entry_point = b'%s.i.a' % self.radix
877 877 elif self._try_split and self.opener.exists(self._split_index_file):
878 878 entry_point = self._split_index_file
879 879 else:
880 880 entry_point = b'%s.i' % self.radix
881 881
882 882 if docket is not None:
883 883 self._docket = docket
884 884 self._docket_file = entry_point
885 885 else:
886 886 self._initempty = True
887 887 entry_data = self._get_data(entry_point, mmapindexthreshold)
888 888 if len(entry_data) > 0:
889 889 header = INDEX_HEADER.unpack(entry_data[:4])[0]
890 890 self._initempty = False
891 891 else:
892 892 header = new_header
893 893
894 894 self._format_flags = header & ~0xFFFF
895 895 self._format_version = header & 0xFFFF
896 896
897 897 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
898 898 if supported_flags is None:
899 899 msg = _(b'unknown version (%d) in revlog %s')
900 900 msg %= (self._format_version, self.display_id)
901 901 raise error.RevlogError(msg)
902 902 elif self._format_flags & ~supported_flags:
903 903 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
904 904 display_flag = self._format_flags >> 16
905 905 msg %= (display_flag, self._format_version, self.display_id)
906 906 raise error.RevlogError(msg)
907 907
908 908 features = FEATURES_BY_VERSION[self._format_version]
909 909 self._inline = features[b'inline'](self._format_flags)
910 910 self.delta_config.general_delta = features[b'generaldelta'](
911 911 self._format_flags
912 912 )
913 913 self.feature_config.has_side_data = features[b'sidedata']
914 914
915 915 if not features[b'docket']:
916 916 self._indexfile = entry_point
917 917 index_data = entry_data
918 918 else:
919 919 self._docket_file = entry_point
920 920 if self._initempty:
921 921 self._docket = docketutil.default_docket(self, header)
922 922 else:
923 923 self._docket = docketutil.parse_docket(
924 924 self, entry_data, use_pending=self._trypending
925 925 )
926 926
927 927 if self._docket is not None:
928 928 self._indexfile = self._docket.index_filepath()
929 929 index_data = b''
930 930 index_size = self._docket.index_end
931 931 if index_size > 0:
932 932 index_data = self._get_data(
933 933 self._indexfile, mmapindexthreshold, size=index_size
934 934 )
935 935 if len(index_data) < index_size:
936 936 msg = _(b'too few index data for %s: got %d, expected %d')
937 937 msg %= (self.display_id, len(index_data), index_size)
938 938 raise error.RevlogError(msg)
939 939
940 940 self._inline = False
941 941 # generaldelta implied by version 2 revlogs.
942 942 self.delta_config.general_delta = True
943 943 # the logic for persistent nodemap will be dealt with within the
944 944 # main docket, so disable it for now.
945 945 self._nodemap_file = None
946 946
947 947 if self._docket is not None:
948 948 self._datafile = self._docket.data_filepath()
949 949 self._sidedatafile = self._docket.sidedata_filepath()
950 950 elif self.postfix is None:
951 951 self._datafile = b'%s.d' % self.radix
952 952 else:
953 953 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
954 954
955 955 self.nodeconstants = sha1nodeconstants
956 956 self.nullid = self.nodeconstants.nullid
957 957
958 958 # sparse-revlog can't be on without general-delta (issue6056)
959 959 if not self.delta_config.general_delta:
960 960 self.delta_config.sparse_revlog = False
961 961
962 962 self._storedeltachains = True
963 963
964 964 devel_nodemap = (
965 965 self._nodemap_file
966 966 and force_nodemap
967 967 and parse_index_v1_nodemap is not None
968 968 )
969 969
970 970 use_rust_index = False
971 971 if rustrevlog is not None:
972 972 if self._nodemap_file is not None:
973 973 use_rust_index = True
974 974 else:
975 975 use_rust_index = self.opener.options.get(b'rust.index')
976 976
977 977 self._parse_index = parse_index_v1
978 978 if self._format_version == REVLOGV0:
979 979 self._parse_index = revlogv0.parse_index_v0
980 980 elif self._format_version == REVLOGV2:
981 981 self._parse_index = parse_index_v2
982 982 elif self._format_version == CHANGELOGV2:
983 983 self._parse_index = parse_index_cl_v2
984 984 elif devel_nodemap:
985 985 self._parse_index = parse_index_v1_nodemap
986 986 elif use_rust_index:
987 987 self._parse_index = parse_index_v1_mixed
988 988 try:
989 989 d = self._parse_index(index_data, self._inline)
990 990 index, chunkcache = d
991 991 use_nodemap = (
992 992 not self._inline
993 993 and self._nodemap_file is not None
994 994 and hasattr(index, 'update_nodemap_data')
995 995 )
996 996 if use_nodemap:
997 997 nodemap_data = nodemaputil.persisted_data(self)
998 998 if nodemap_data is not None:
999 999 docket = nodemap_data[0]
1000 1000 if (
1001 1001 len(d[0]) > docket.tip_rev
1002 1002 and d[0][docket.tip_rev][7] == docket.tip_node
1003 1003 ):
1004 1004 # no changelog tampering
1005 1005 self._nodemap_docket = docket
1006 1006 index.update_nodemap_data(*nodemap_data)
1007 1007 except (ValueError, IndexError):
1008 1008 raise error.RevlogError(
1009 1009 _(b"index %s is corrupted") % self.display_id
1010 1010 )
1011 1011 self.index = index
1012 1012 self._segmentfile = randomaccessfile.randomaccessfile(
1013 1013 self.opener,
1014 1014 (self._indexfile if self._inline else self._datafile),
1015 1015 self.data_config.chunk_cache_size,
1016 1016 chunkcache,
1017 1017 )
1018 1018 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1019 1019 self.opener,
1020 1020 self._sidedatafile,
1021 1021 self.data_config.chunk_cache_size,
1022 1022 )
1023 1023 # revnum -> (chain-length, sum-delta-length)
1024 1024 self._chaininfocache = util.lrucachedict(500)
1025 1025 # revlog header -> revlog compressor
1026 1026 self._decompressors = {}
1027 1027
1028 1028 def get_revlog(self):
1029 1029 """simple function to mirror API of other not-really-revlog API"""
1030 1030 return self
1031 1031
1032 1032 @util.propertycache
1033 1033 def revlog_kind(self):
1034 1034 return self.target[0]
1035 1035
1036 1036 @util.propertycache
1037 1037 def display_id(self):
1038 1038 """The public facing "ID" of the revlog that we use in message"""
1039 1039 if self.revlog_kind == KIND_FILELOG:
1040 1040 # Reference the file without the "data/" prefix, so it is familiar
1041 1041 # to the user.
1042 1042 return self.target[1]
1043 1043 else:
1044 1044 return self.radix
1045 1045
1046 1046 def _get_decompressor(self, t):
1047 1047 try:
1048 1048 compressor = self._decompressors[t]
1049 1049 except KeyError:
1050 1050 try:
1051 1051 engine = util.compengines.forrevlogheader(t)
1052 1052 compressor = engine.revlogcompressor(
1053 1053 self.feature_config.compression_engine_options
1054 1054 )
1055 1055 self._decompressors[t] = compressor
1056 1056 except KeyError:
1057 1057 raise error.RevlogError(
1058 1058 _(b'unknown compression type %s') % binascii.hexlify(t)
1059 1059 )
1060 1060 return compressor
1061 1061
1062 1062 @util.propertycache
1063 1063 def _compressor(self):
1064 1064 engine = util.compengines[self.feature_config.compression_engine]
1065 1065 return engine.revlogcompressor(
1066 1066 self.feature_config.compression_engine_options
1067 1067 )
1068 1068
1069 1069 @util.propertycache
1070 1070 def _decompressor(self):
1071 1071 """the default decompressor"""
1072 1072 if self._docket is None:
1073 1073 return None
1074 1074 t = self._docket.default_compression_header
1075 1075 c = self._get_decompressor(t)
1076 1076 return c.decompress
1077 1077
1078 1078 def __index_write_fp(self):
1079 1079 # You should not use this directly and use `_writing` instead
1080 1080 try:
1081 1081 f = self.opener(
1082 1082 self._indexfile,
1083 1083 mode=b"r+",
1084 1084 checkambig=self.data_config.check_ambig,
1085 1085 )
1086 1086 if self._docket is None:
1087 1087 f.seek(0, os.SEEK_END)
1088 1088 else:
1089 1089 f.seek(self._docket.index_end, os.SEEK_SET)
1090 1090 return f
1091 1091 except FileNotFoundError:
1092 1092 return self.opener(
1093 1093 self._indexfile,
1094 1094 mode=b"w+",
1095 1095 checkambig=self.data_config.check_ambig,
1096 1096 )
1097 1097
1098 1098 def __index_new_fp(self):
1099 1099 # You should not use this unless you are upgrading from inline revlog
1100 1100 return self.opener(
1101 1101 self._indexfile,
1102 1102 mode=b"w",
1103 1103 checkambig=self.data_config.check_ambig,
1104 1104 atomictemp=True,
1105 1105 )
1106 1106
1107 1107 def _datafp(self, mode=b'r'):
1108 1108 """file object for the revlog's data file"""
1109 1109 return self.opener(self._datafile, mode=mode)
1110 1110
1111 @contextlib.contextmanager
1112 def _sidedatareadfp(self):
1113 """file object suitable to read sidedata"""
1114 if self._writinghandles:
1115 yield self._writinghandles[2]
1116 else:
1117 with self.opener(self._sidedatafile) as fp:
1118 yield fp
1119
1120 1111 def tiprev(self):
1121 1112 return len(self.index) - 1
1122 1113
1123 1114 def tip(self):
1124 1115 return self.node(self.tiprev())
1125 1116
1126 1117 def __contains__(self, rev):
1127 1118 return 0 <= rev < len(self)
1128 1119
1129 1120 def __len__(self):
1130 1121 return len(self.index)
1131 1122
1132 1123 def __iter__(self):
1133 1124 return iter(range(len(self)))
1134 1125
1135 1126 def revs(self, start=0, stop=None):
1136 1127 """iterate over all rev in this revlog (from start to stop)"""
1137 1128 return storageutil.iterrevs(len(self), start=start, stop=stop)
1138 1129
1139 1130 def hasnode(self, node):
1140 1131 try:
1141 1132 self.rev(node)
1142 1133 return True
1143 1134 except KeyError:
1144 1135 return False
1145 1136
1146 1137 def _candelta(self, baserev, rev):
1147 1138 """whether two revisions (baserev, rev) can be delta-ed or not"""
1148 1139 # Disable delta if either rev requires a content-changing flag
1149 1140 # processor (ex. LFS). This is because such flag processor can alter
1150 1141 # the rawtext content that the delta will be based on, and two clients
1151 1142 # could have a same revlog node with different flags (i.e. different
1152 1143 # rawtext contents) and the delta could be incompatible.
1153 1144 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1154 1145 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1155 1146 ):
1156 1147 return False
1157 1148 return True
1158 1149
1159 1150 def update_caches(self, transaction):
1160 1151 """update on disk cache
1161 1152
1162 1153 If a transaction is passed, the update may be delayed to transaction
1163 1154 commit."""
1164 1155 if self._nodemap_file is not None:
1165 1156 if transaction is None:
1166 1157 nodemaputil.update_persistent_nodemap(self)
1167 1158 else:
1168 1159 nodemaputil.setup_persistent_nodemap(transaction, self)
1169 1160
1170 1161 def clearcaches(self):
1171 1162 """Clear in-memory caches"""
1172 1163 self._revisioncache = None
1173 1164 self._chainbasecache.clear()
1174 1165 self._segmentfile.clear_cache()
1175 1166 self._segmentfile_sidedata.clear_cache()
1176 1167 self._pcache = {}
1177 1168 self._nodemap_docket = None
1178 1169 self.index.clearcaches()
1179 1170 # The python code is the one responsible for validating the docket, we
1180 1171 # end up having to refresh it here.
1181 1172 use_nodemap = (
1182 1173 not self._inline
1183 1174 and self._nodemap_file is not None
1184 1175 and hasattr(self.index, 'update_nodemap_data')
1185 1176 )
1186 1177 if use_nodemap:
1187 1178 nodemap_data = nodemaputil.persisted_data(self)
1188 1179 if nodemap_data is not None:
1189 1180 self._nodemap_docket = nodemap_data[0]
1190 1181 self.index.update_nodemap_data(*nodemap_data)
1191 1182
1192 1183 def rev(self, node):
1193 1184 """return the revision number associated with a <nodeid>"""
1194 1185 try:
1195 1186 return self.index.rev(node)
1196 1187 except TypeError:
1197 1188 raise
1198 1189 except error.RevlogError:
1199 1190 # parsers.c radix tree lookup failed
1200 1191 if (
1201 1192 node == self.nodeconstants.wdirid
1202 1193 or node in self.nodeconstants.wdirfilenodeids
1203 1194 ):
1204 1195 raise error.WdirUnsupported
1205 1196 raise error.LookupError(node, self.display_id, _(b'no node'))
1206 1197
1207 1198 # Accessors for index entries.
1208 1199
1209 1200 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1210 1201 # are flags.
1211 1202 def start(self, rev):
1212 1203 return int(self.index[rev][0] >> 16)
1213 1204
1214 1205 def sidedata_cut_off(self, rev):
1215 1206 sd_cut_off = self.index[rev][8]
1216 1207 if sd_cut_off != 0:
1217 1208 return sd_cut_off
1218 1209 # This is some annoying dance, because entries without sidedata
1219 1210 # currently use 0 as their ofsset. (instead of previous-offset +
1220 1211 # previous-size)
1221 1212 #
1222 1213 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1223 1214 # In the meantime, we need this.
1224 1215 while 0 <= rev:
1225 1216 e = self.index[rev]
1226 1217 if e[9] != 0:
1227 1218 return e[8] + e[9]
1228 1219 rev -= 1
1229 1220 return 0
1230 1221
1231 1222 def flags(self, rev):
1232 1223 return self.index[rev][0] & 0xFFFF
1233 1224
1234 1225 def length(self, rev):
1235 1226 return self.index[rev][1]
1236 1227
1237 1228 def sidedata_length(self, rev):
1238 1229 if not self.feature_config.has_side_data:
1239 1230 return 0
1240 1231 return self.index[rev][9]
1241 1232
1242 1233 def rawsize(self, rev):
1243 1234 """return the length of the uncompressed text for a given revision"""
1244 1235 l = self.index[rev][2]
1245 1236 if l >= 0:
1246 1237 return l
1247 1238
1248 1239 t = self.rawdata(rev)
1249 1240 return len(t)
1250 1241
1251 1242 def size(self, rev):
1252 1243 """length of non-raw text (processed by a "read" flag processor)"""
1253 1244 # fast path: if no "read" flag processor could change the content,
1254 1245 # size is rawsize. note: ELLIPSIS is known to not change the content.
1255 1246 flags = self.flags(rev)
1256 1247 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1257 1248 return self.rawsize(rev)
1258 1249
1259 1250 return len(self.revision(rev))
1260 1251
1261 1252 def fast_rank(self, rev):
1262 1253 """Return the rank of a revision if already known, or None otherwise.
1263 1254
1264 1255 The rank of a revision is the size of the sub-graph it defines as a
1265 1256 head. Equivalently, the rank of a revision `r` is the size of the set
1266 1257 `ancestors(r)`, `r` included.
1267 1258
1268 1259 This method returns the rank retrieved from the revlog in constant
1269 1260 time. It makes no attempt at computing unknown values for versions of
1270 1261 the revlog which do not persist the rank.
1271 1262 """
1272 1263 rank = self.index[rev][ENTRY_RANK]
1273 1264 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1274 1265 return None
1275 1266 if rev == nullrev:
1276 1267 return 0 # convention
1277 1268 return rank
1278 1269
1279 1270 def chainbase(self, rev):
1280 1271 base = self._chainbasecache.get(rev)
1281 1272 if base is not None:
1282 1273 return base
1283 1274
1284 1275 index = self.index
1285 1276 iterrev = rev
1286 1277 base = index[iterrev][3]
1287 1278 while base != iterrev:
1288 1279 iterrev = base
1289 1280 base = index[iterrev][3]
1290 1281
1291 1282 self._chainbasecache[rev] = base
1292 1283 return base
1293 1284
1294 1285 def linkrev(self, rev):
1295 1286 return self.index[rev][4]
1296 1287
1297 1288 def parentrevs(self, rev):
1298 1289 try:
1299 1290 entry = self.index[rev]
1300 1291 except IndexError:
1301 1292 if rev == wdirrev:
1302 1293 raise error.WdirUnsupported
1303 1294 raise
1304 1295
1305 1296 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1306 1297 return entry[6], entry[5]
1307 1298 else:
1308 1299 return entry[5], entry[6]
1309 1300
1310 1301 # fast parentrevs(rev) where rev isn't filtered
1311 1302 _uncheckedparentrevs = parentrevs
1312 1303
1313 1304 def node(self, rev):
1314 1305 try:
1315 1306 return self.index[rev][7]
1316 1307 except IndexError:
1317 1308 if rev == wdirrev:
1318 1309 raise error.WdirUnsupported
1319 1310 raise
1320 1311
1321 1312 # Derived from index values.
1322 1313
1323 1314 def end(self, rev):
1324 1315 return self.start(rev) + self.length(rev)
1325 1316
1326 1317 def parents(self, node):
1327 1318 i = self.index
1328 1319 d = i[self.rev(node)]
1329 1320 # inline node() to avoid function call overhead
1330 1321 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1331 1322 return i[d[6]][7], i[d[5]][7]
1332 1323 else:
1333 1324 return i[d[5]][7], i[d[6]][7]
1334 1325
1335 1326 def chainlen(self, rev):
1336 1327 return self._chaininfo(rev)[0]
1337 1328
1338 1329 def _chaininfo(self, rev):
1339 1330 chaininfocache = self._chaininfocache
1340 1331 if rev in chaininfocache:
1341 1332 return chaininfocache[rev]
1342 1333 index = self.index
1343 1334 generaldelta = self.delta_config.general_delta
1344 1335 iterrev = rev
1345 1336 e = index[iterrev]
1346 1337 clen = 0
1347 1338 compresseddeltalen = 0
1348 1339 while iterrev != e[3]:
1349 1340 clen += 1
1350 1341 compresseddeltalen += e[1]
1351 1342 if generaldelta:
1352 1343 iterrev = e[3]
1353 1344 else:
1354 1345 iterrev -= 1
1355 1346 if iterrev in chaininfocache:
1356 1347 t = chaininfocache[iterrev]
1357 1348 clen += t[0]
1358 1349 compresseddeltalen += t[1]
1359 1350 break
1360 1351 e = index[iterrev]
1361 1352 else:
1362 1353 # Add text length of base since decompressing that also takes
1363 1354 # work. For cache hits the length is already included.
1364 1355 compresseddeltalen += e[1]
1365 1356 r = (clen, compresseddeltalen)
1366 1357 chaininfocache[rev] = r
1367 1358 return r
1368 1359
1369 1360 def _deltachain(self, rev, stoprev=None):
1370 1361 """Obtain the delta chain for a revision.
1371 1362
1372 1363 ``stoprev`` specifies a revision to stop at. If not specified, we
1373 1364 stop at the base of the chain.
1374 1365
1375 1366 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1376 1367 revs in ascending order and ``stopped`` is a bool indicating whether
1377 1368 ``stoprev`` was hit.
1378 1369 """
1379 1370 generaldelta = self.delta_config.general_delta
1380 1371 # Try C implementation.
1381 1372 try:
1382 1373 return self.index.deltachain(rev, stoprev, generaldelta)
1383 1374 except AttributeError:
1384 1375 pass
1385 1376
1386 1377 chain = []
1387 1378
1388 1379 # Alias to prevent attribute lookup in tight loop.
1389 1380 index = self.index
1390 1381
1391 1382 iterrev = rev
1392 1383 e = index[iterrev]
1393 1384 while iterrev != e[3] and iterrev != stoprev:
1394 1385 chain.append(iterrev)
1395 1386 if generaldelta:
1396 1387 iterrev = e[3]
1397 1388 else:
1398 1389 iterrev -= 1
1399 1390 e = index[iterrev]
1400 1391
1401 1392 if iterrev == stoprev:
1402 1393 stopped = True
1403 1394 else:
1404 1395 chain.append(iterrev)
1405 1396 stopped = False
1406 1397
1407 1398 chain.reverse()
1408 1399 return chain, stopped
1409 1400
1410 1401 def ancestors(self, revs, stoprev=0, inclusive=False):
1411 1402 """Generate the ancestors of 'revs' in reverse revision order.
1412 1403 Does not generate revs lower than stoprev.
1413 1404
1414 1405 See the documentation for ancestor.lazyancestors for more details."""
1415 1406
1416 1407 # first, make sure start revisions aren't filtered
1417 1408 revs = list(revs)
1418 1409 checkrev = self.node
1419 1410 for r in revs:
1420 1411 checkrev(r)
1421 1412 # and we're sure ancestors aren't filtered as well
1422 1413
1423 1414 if rustancestor is not None and self.index.rust_ext_compat:
1424 1415 lazyancestors = rustancestor.LazyAncestors
1425 1416 arg = self.index
1426 1417 else:
1427 1418 lazyancestors = ancestor.lazyancestors
1428 1419 arg = self._uncheckedparentrevs
1429 1420 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1430 1421
1431 1422 def descendants(self, revs):
1432 1423 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1433 1424
1434 1425 def findcommonmissing(self, common=None, heads=None):
1435 1426 """Return a tuple of the ancestors of common and the ancestors of heads
1436 1427 that are not ancestors of common. In revset terminology, we return the
1437 1428 tuple:
1438 1429
1439 1430 ::common, (::heads) - (::common)
1440 1431
1441 1432 The list is sorted by revision number, meaning it is
1442 1433 topologically sorted.
1443 1434
1444 1435 'heads' and 'common' are both lists of node IDs. If heads is
1445 1436 not supplied, uses all of the revlog's heads. If common is not
1446 1437 supplied, uses nullid."""
1447 1438 if common is None:
1448 1439 common = [self.nullid]
1449 1440 if heads is None:
1450 1441 heads = self.heads()
1451 1442
1452 1443 common = [self.rev(n) for n in common]
1453 1444 heads = [self.rev(n) for n in heads]
1454 1445
1455 1446 # we want the ancestors, but inclusive
1456 1447 class lazyset:
1457 1448 def __init__(self, lazyvalues):
1458 1449 self.addedvalues = set()
1459 1450 self.lazyvalues = lazyvalues
1460 1451
1461 1452 def __contains__(self, value):
1462 1453 return value in self.addedvalues or value in self.lazyvalues
1463 1454
1464 1455 def __iter__(self):
1465 1456 added = self.addedvalues
1466 1457 for r in added:
1467 1458 yield r
1468 1459 for r in self.lazyvalues:
1469 1460 if not r in added:
1470 1461 yield r
1471 1462
1472 1463 def add(self, value):
1473 1464 self.addedvalues.add(value)
1474 1465
1475 1466 def update(self, values):
1476 1467 self.addedvalues.update(values)
1477 1468
1478 1469 has = lazyset(self.ancestors(common))
1479 1470 has.add(nullrev)
1480 1471 has.update(common)
1481 1472
1482 1473 # take all ancestors from heads that aren't in has
1483 1474 missing = set()
1484 1475 visit = collections.deque(r for r in heads if r not in has)
1485 1476 while visit:
1486 1477 r = visit.popleft()
1487 1478 if r in missing:
1488 1479 continue
1489 1480 else:
1490 1481 missing.add(r)
1491 1482 for p in self.parentrevs(r):
1492 1483 if p not in has:
1493 1484 visit.append(p)
1494 1485 missing = list(missing)
1495 1486 missing.sort()
1496 1487 return has, [self.node(miss) for miss in missing]
1497 1488
1498 1489 def incrementalmissingrevs(self, common=None):
1499 1490 """Return an object that can be used to incrementally compute the
1500 1491 revision numbers of the ancestors of arbitrary sets that are not
1501 1492 ancestors of common. This is an ancestor.incrementalmissingancestors
1502 1493 object.
1503 1494
1504 1495 'common' is a list of revision numbers. If common is not supplied, uses
1505 1496 nullrev.
1506 1497 """
1507 1498 if common is None:
1508 1499 common = [nullrev]
1509 1500
1510 1501 if rustancestor is not None and self.index.rust_ext_compat:
1511 1502 return rustancestor.MissingAncestors(self.index, common)
1512 1503 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1513 1504
1514 1505 def findmissingrevs(self, common=None, heads=None):
1515 1506 """Return the revision numbers of the ancestors of heads that
1516 1507 are not ancestors of common.
1517 1508
1518 1509 More specifically, return a list of revision numbers corresponding to
1519 1510 nodes N such that every N satisfies the following constraints:
1520 1511
1521 1512 1. N is an ancestor of some node in 'heads'
1522 1513 2. N is not an ancestor of any node in 'common'
1523 1514
1524 1515 The list is sorted by revision number, meaning it is
1525 1516 topologically sorted.
1526 1517
1527 1518 'heads' and 'common' are both lists of revision numbers. If heads is
1528 1519 not supplied, uses all of the revlog's heads. If common is not
1529 1520 supplied, uses nullid."""
1530 1521 if common is None:
1531 1522 common = [nullrev]
1532 1523 if heads is None:
1533 1524 heads = self.headrevs()
1534 1525
1535 1526 inc = self.incrementalmissingrevs(common=common)
1536 1527 return inc.missingancestors(heads)
1537 1528
1538 1529 def findmissing(self, common=None, heads=None):
1539 1530 """Return the ancestors of heads that are not ancestors of common.
1540 1531
1541 1532 More specifically, return a list of nodes N such that every N
1542 1533 satisfies the following constraints:
1543 1534
1544 1535 1. N is an ancestor of some node in 'heads'
1545 1536 2. N is not an ancestor of any node in 'common'
1546 1537
1547 1538 The list is sorted by revision number, meaning it is
1548 1539 topologically sorted.
1549 1540
1550 1541 'heads' and 'common' are both lists of node IDs. If heads is
1551 1542 not supplied, uses all of the revlog's heads. If common is not
1552 1543 supplied, uses nullid."""
1553 1544 if common is None:
1554 1545 common = [self.nullid]
1555 1546 if heads is None:
1556 1547 heads = self.heads()
1557 1548
1558 1549 common = [self.rev(n) for n in common]
1559 1550 heads = [self.rev(n) for n in heads]
1560 1551
1561 1552 inc = self.incrementalmissingrevs(common=common)
1562 1553 return [self.node(r) for r in inc.missingancestors(heads)]
1563 1554
1564 1555 def nodesbetween(self, roots=None, heads=None):
1565 1556 """Return a topological path from 'roots' to 'heads'.
1566 1557
1567 1558 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1568 1559 topologically sorted list of all nodes N that satisfy both of
1569 1560 these constraints:
1570 1561
1571 1562 1. N is a descendant of some node in 'roots'
1572 1563 2. N is an ancestor of some node in 'heads'
1573 1564
1574 1565 Every node is considered to be both a descendant and an ancestor
1575 1566 of itself, so every reachable node in 'roots' and 'heads' will be
1576 1567 included in 'nodes'.
1577 1568
1578 1569 'outroots' is the list of reachable nodes in 'roots', i.e., the
1579 1570 subset of 'roots' that is returned in 'nodes'. Likewise,
1580 1571 'outheads' is the subset of 'heads' that is also in 'nodes'.
1581 1572
1582 1573 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1583 1574 unspecified, uses nullid as the only root. If 'heads' is
1584 1575 unspecified, uses list of all of the revlog's heads."""
1585 1576 nonodes = ([], [], [])
1586 1577 if roots is not None:
1587 1578 roots = list(roots)
1588 1579 if not roots:
1589 1580 return nonodes
1590 1581 lowestrev = min([self.rev(n) for n in roots])
1591 1582 else:
1592 1583 roots = [self.nullid] # Everybody's a descendant of nullid
1593 1584 lowestrev = nullrev
1594 1585 if (lowestrev == nullrev) and (heads is None):
1595 1586 # We want _all_ the nodes!
1596 1587 return (
1597 1588 [self.node(r) for r in self],
1598 1589 [self.nullid],
1599 1590 list(self.heads()),
1600 1591 )
1601 1592 if heads is None:
1602 1593 # All nodes are ancestors, so the latest ancestor is the last
1603 1594 # node.
1604 1595 highestrev = len(self) - 1
1605 1596 # Set ancestors to None to signal that every node is an ancestor.
1606 1597 ancestors = None
1607 1598 # Set heads to an empty dictionary for later discovery of heads
1608 1599 heads = {}
1609 1600 else:
1610 1601 heads = list(heads)
1611 1602 if not heads:
1612 1603 return nonodes
1613 1604 ancestors = set()
1614 1605 # Turn heads into a dictionary so we can remove 'fake' heads.
1615 1606 # Also, later we will be using it to filter out the heads we can't
1616 1607 # find from roots.
1617 1608 heads = dict.fromkeys(heads, False)
1618 1609 # Start at the top and keep marking parents until we're done.
1619 1610 nodestotag = set(heads)
1620 1611 # Remember where the top was so we can use it as a limit later.
1621 1612 highestrev = max([self.rev(n) for n in nodestotag])
1622 1613 while nodestotag:
1623 1614 # grab a node to tag
1624 1615 n = nodestotag.pop()
1625 1616 # Never tag nullid
1626 1617 if n == self.nullid:
1627 1618 continue
1628 1619 # A node's revision number represents its place in a
1629 1620 # topologically sorted list of nodes.
1630 1621 r = self.rev(n)
1631 1622 if r >= lowestrev:
1632 1623 if n not in ancestors:
1633 1624 # If we are possibly a descendant of one of the roots
1634 1625 # and we haven't already been marked as an ancestor
1635 1626 ancestors.add(n) # Mark as ancestor
1636 1627 # Add non-nullid parents to list of nodes to tag.
1637 1628 nodestotag.update(
1638 1629 [p for p in self.parents(n) if p != self.nullid]
1639 1630 )
1640 1631 elif n in heads: # We've seen it before, is it a fake head?
1641 1632 # So it is, real heads should not be the ancestors of
1642 1633 # any other heads.
1643 1634 heads.pop(n)
1644 1635 if not ancestors:
1645 1636 return nonodes
1646 1637 # Now that we have our set of ancestors, we want to remove any
1647 1638 # roots that are not ancestors.
1648 1639
1649 1640 # If one of the roots was nullid, everything is included anyway.
1650 1641 if lowestrev > nullrev:
1651 1642 # But, since we weren't, let's recompute the lowest rev to not
1652 1643 # include roots that aren't ancestors.
1653 1644
1654 1645 # Filter out roots that aren't ancestors of heads
1655 1646 roots = [root for root in roots if root in ancestors]
1656 1647 # Recompute the lowest revision
1657 1648 if roots:
1658 1649 lowestrev = min([self.rev(root) for root in roots])
1659 1650 else:
1660 1651 # No more roots? Return empty list
1661 1652 return nonodes
1662 1653 else:
1663 1654 # We are descending from nullid, and don't need to care about
1664 1655 # any other roots.
1665 1656 lowestrev = nullrev
1666 1657 roots = [self.nullid]
1667 1658 # Transform our roots list into a set.
1668 1659 descendants = set(roots)
1669 1660 # Also, keep the original roots so we can filter out roots that aren't
1670 1661 # 'real' roots (i.e. are descended from other roots).
1671 1662 roots = descendants.copy()
1672 1663 # Our topologically sorted list of output nodes.
1673 1664 orderedout = []
1674 1665 # Don't start at nullid since we don't want nullid in our output list,
1675 1666 # and if nullid shows up in descendants, empty parents will look like
1676 1667 # they're descendants.
1677 1668 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1678 1669 n = self.node(r)
1679 1670 isdescendant = False
1680 1671 if lowestrev == nullrev: # Everybody is a descendant of nullid
1681 1672 isdescendant = True
1682 1673 elif n in descendants:
1683 1674 # n is already a descendant
1684 1675 isdescendant = True
1685 1676 # This check only needs to be done here because all the roots
1686 1677 # will start being marked is descendants before the loop.
1687 1678 if n in roots:
1688 1679 # If n was a root, check if it's a 'real' root.
1689 1680 p = tuple(self.parents(n))
1690 1681 # If any of its parents are descendants, it's not a root.
1691 1682 if (p[0] in descendants) or (p[1] in descendants):
1692 1683 roots.remove(n)
1693 1684 else:
1694 1685 p = tuple(self.parents(n))
1695 1686 # A node is a descendant if either of its parents are
1696 1687 # descendants. (We seeded the dependents list with the roots
1697 1688 # up there, remember?)
1698 1689 if (p[0] in descendants) or (p[1] in descendants):
1699 1690 descendants.add(n)
1700 1691 isdescendant = True
1701 1692 if isdescendant and ((ancestors is None) or (n in ancestors)):
1702 1693 # Only include nodes that are both descendants and ancestors.
1703 1694 orderedout.append(n)
1704 1695 if (ancestors is not None) and (n in heads):
1705 1696 # We're trying to figure out which heads are reachable
1706 1697 # from roots.
1707 1698 # Mark this head as having been reached
1708 1699 heads[n] = True
1709 1700 elif ancestors is None:
1710 1701 # Otherwise, we're trying to discover the heads.
1711 1702 # Assume this is a head because if it isn't, the next step
1712 1703 # will eventually remove it.
1713 1704 heads[n] = True
1714 1705 # But, obviously its parents aren't.
1715 1706 for p in self.parents(n):
1716 1707 heads.pop(p, None)
1717 1708 heads = [head for head, flag in heads.items() if flag]
1718 1709 roots = list(roots)
1719 1710 assert orderedout
1720 1711 assert roots
1721 1712 assert heads
1722 1713 return (orderedout, roots, heads)
1723 1714
1724 1715 def headrevs(self, revs=None):
1725 1716 if revs is None:
1726 1717 try:
1727 1718 return self.index.headrevs()
1728 1719 except AttributeError:
1729 1720 return self._headrevs()
1730 1721 if rustdagop is not None and self.index.rust_ext_compat:
1731 1722 return rustdagop.headrevs(self.index, revs)
1732 1723 return dagop.headrevs(revs, self._uncheckedparentrevs)
1733 1724
1734 1725 def computephases(self, roots):
1735 1726 return self.index.computephasesmapsets(roots)
1736 1727
1737 1728 def _headrevs(self):
1738 1729 count = len(self)
1739 1730 if not count:
1740 1731 return [nullrev]
1741 1732 # we won't iter over filtered rev so nobody is a head at start
1742 1733 ishead = [0] * (count + 1)
1743 1734 index = self.index
1744 1735 for r in self:
1745 1736 ishead[r] = 1 # I may be an head
1746 1737 e = index[r]
1747 1738 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1748 1739 return [r for r, val in enumerate(ishead) if val]
1749 1740
1750 1741 def heads(self, start=None, stop=None):
1751 1742 """return the list of all nodes that have no children
1752 1743
1753 1744 if start is specified, only heads that are descendants of
1754 1745 start will be returned
1755 1746 if stop is specified, it will consider all the revs from stop
1756 1747 as if they had no children
1757 1748 """
1758 1749 if start is None and stop is None:
1759 1750 if not len(self):
1760 1751 return [self.nullid]
1761 1752 return [self.node(r) for r in self.headrevs()]
1762 1753
1763 1754 if start is None:
1764 1755 start = nullrev
1765 1756 else:
1766 1757 start = self.rev(start)
1767 1758
1768 1759 stoprevs = {self.rev(n) for n in stop or []}
1769 1760
1770 1761 revs = dagop.headrevssubset(
1771 1762 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1772 1763 )
1773 1764
1774 1765 return [self.node(rev) for rev in revs]
1775 1766
1776 1767 def children(self, node):
1777 1768 """find the children of a given node"""
1778 1769 c = []
1779 1770 p = self.rev(node)
1780 1771 for r in self.revs(start=p + 1):
1781 1772 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1782 1773 if prevs:
1783 1774 for pr in prevs:
1784 1775 if pr == p:
1785 1776 c.append(self.node(r))
1786 1777 elif p == nullrev:
1787 1778 c.append(self.node(r))
1788 1779 return c
1789 1780
1790 1781 def commonancestorsheads(self, a, b):
1791 1782 """calculate all the heads of the common ancestors of nodes a and b"""
1792 1783 a, b = self.rev(a), self.rev(b)
1793 1784 ancs = self._commonancestorsheads(a, b)
1794 1785 return pycompat.maplist(self.node, ancs)
1795 1786
1796 1787 def _commonancestorsheads(self, *revs):
1797 1788 """calculate all the heads of the common ancestors of revs"""
1798 1789 try:
1799 1790 ancs = self.index.commonancestorsheads(*revs)
1800 1791 except (AttributeError, OverflowError): # C implementation failed
1801 1792 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1802 1793 return ancs
1803 1794
1804 1795 def isancestor(self, a, b):
1805 1796 """return True if node a is an ancestor of node b
1806 1797
1807 1798 A revision is considered an ancestor of itself."""
1808 1799 a, b = self.rev(a), self.rev(b)
1809 1800 return self.isancestorrev(a, b)
1810 1801
1811 1802 def isancestorrev(self, a, b):
1812 1803 """return True if revision a is an ancestor of revision b
1813 1804
1814 1805 A revision is considered an ancestor of itself.
1815 1806
1816 1807 The implementation of this is trivial but the use of
1817 1808 reachableroots is not."""
1818 1809 if a == nullrev:
1819 1810 return True
1820 1811 elif a == b:
1821 1812 return True
1822 1813 elif a > b:
1823 1814 return False
1824 1815 return bool(self.reachableroots(a, [b], [a], includepath=False))
1825 1816
1826 1817 def reachableroots(self, minroot, heads, roots, includepath=False):
1827 1818 """return (heads(::(<roots> and <roots>::<heads>)))
1828 1819
1829 1820 If includepath is True, return (<roots>::<heads>)."""
1830 1821 try:
1831 1822 return self.index.reachableroots2(
1832 1823 minroot, heads, roots, includepath
1833 1824 )
1834 1825 except AttributeError:
1835 1826 return dagop._reachablerootspure(
1836 1827 self.parentrevs, minroot, roots, heads, includepath
1837 1828 )
1838 1829
1839 1830 def ancestor(self, a, b):
1840 1831 """calculate the "best" common ancestor of nodes a and b"""
1841 1832
1842 1833 a, b = self.rev(a), self.rev(b)
1843 1834 try:
1844 1835 ancs = self.index.ancestors(a, b)
1845 1836 except (AttributeError, OverflowError):
1846 1837 ancs = ancestor.ancestors(self.parentrevs, a, b)
1847 1838 if ancs:
1848 1839 # choose a consistent winner when there's a tie
1849 1840 return min(map(self.node, ancs))
1850 1841 return self.nullid
1851 1842
1852 1843 def _match(self, id):
1853 1844 if isinstance(id, int):
1854 1845 # rev
1855 1846 return self.node(id)
1856 1847 if len(id) == self.nodeconstants.nodelen:
1857 1848 # possibly a binary node
1858 1849 # odds of a binary node being all hex in ASCII are 1 in 10**25
1859 1850 try:
1860 1851 node = id
1861 1852 self.rev(node) # quick search the index
1862 1853 return node
1863 1854 except error.LookupError:
1864 1855 pass # may be partial hex id
1865 1856 try:
1866 1857 # str(rev)
1867 1858 rev = int(id)
1868 1859 if b"%d" % rev != id:
1869 1860 raise ValueError
1870 1861 if rev < 0:
1871 1862 rev = len(self) + rev
1872 1863 if rev < 0 or rev >= len(self):
1873 1864 raise ValueError
1874 1865 return self.node(rev)
1875 1866 except (ValueError, OverflowError):
1876 1867 pass
1877 1868 if len(id) == 2 * self.nodeconstants.nodelen:
1878 1869 try:
1879 1870 # a full hex nodeid?
1880 1871 node = bin(id)
1881 1872 self.rev(node)
1882 1873 return node
1883 1874 except (binascii.Error, error.LookupError):
1884 1875 pass
1885 1876
1886 1877 def _partialmatch(self, id):
1887 1878 # we don't care wdirfilenodeids as they should be always full hash
1888 1879 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1889 1880 ambiguous = False
1890 1881 try:
1891 1882 partial = self.index.partialmatch(id)
1892 1883 if partial and self.hasnode(partial):
1893 1884 if maybewdir:
1894 1885 # single 'ff...' match in radix tree, ambiguous with wdir
1895 1886 ambiguous = True
1896 1887 else:
1897 1888 return partial
1898 1889 elif maybewdir:
1899 1890 # no 'ff...' match in radix tree, wdir identified
1900 1891 raise error.WdirUnsupported
1901 1892 else:
1902 1893 return None
1903 1894 except error.RevlogError:
1904 1895 # parsers.c radix tree lookup gave multiple matches
1905 1896 # fast path: for unfiltered changelog, radix tree is accurate
1906 1897 if not getattr(self, 'filteredrevs', None):
1907 1898 ambiguous = True
1908 1899 # fall through to slow path that filters hidden revisions
1909 1900 except (AttributeError, ValueError):
1910 1901 # we are pure python, or key is not hex
1911 1902 pass
1912 1903 if ambiguous:
1913 1904 raise error.AmbiguousPrefixLookupError(
1914 1905 id, self.display_id, _(b'ambiguous identifier')
1915 1906 )
1916 1907
1917 1908 if id in self._pcache:
1918 1909 return self._pcache[id]
1919 1910
1920 1911 if len(id) <= 40:
1921 1912 # hex(node)[:...]
1922 1913 l = len(id) // 2 * 2 # grab an even number of digits
1923 1914 try:
1924 1915 # we're dropping the last digit, so let's check that it's hex,
1925 1916 # to avoid the expensive computation below if it's not
1926 1917 if len(id) % 2 > 0:
1927 1918 if not (id[-1] in hexdigits):
1928 1919 return None
1929 1920 prefix = bin(id[:l])
1930 1921 except binascii.Error:
1931 1922 pass
1932 1923 else:
1933 1924 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1934 1925 nl = [
1935 1926 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1936 1927 ]
1937 1928 if self.nodeconstants.nullhex.startswith(id):
1938 1929 nl.append(self.nullid)
1939 1930 if len(nl) > 0:
1940 1931 if len(nl) == 1 and not maybewdir:
1941 1932 self._pcache[id] = nl[0]
1942 1933 return nl[0]
1943 1934 raise error.AmbiguousPrefixLookupError(
1944 1935 id, self.display_id, _(b'ambiguous identifier')
1945 1936 )
1946 1937 if maybewdir:
1947 1938 raise error.WdirUnsupported
1948 1939 return None
1949 1940
1950 1941 def lookup(self, id):
1951 1942 """locate a node based on:
1952 1943 - revision number or str(revision number)
1953 1944 - nodeid or subset of hex nodeid
1954 1945 """
1955 1946 n = self._match(id)
1956 1947 if n is not None:
1957 1948 return n
1958 1949 n = self._partialmatch(id)
1959 1950 if n:
1960 1951 return n
1961 1952
1962 1953 raise error.LookupError(id, self.display_id, _(b'no match found'))
1963 1954
1964 1955 def shortest(self, node, minlength=1):
1965 1956 """Find the shortest unambiguous prefix that matches node."""
1966 1957
1967 1958 def isvalid(prefix):
1968 1959 try:
1969 1960 matchednode = self._partialmatch(prefix)
1970 1961 except error.AmbiguousPrefixLookupError:
1971 1962 return False
1972 1963 except error.WdirUnsupported:
1973 1964 # single 'ff...' match
1974 1965 return True
1975 1966 if matchednode is None:
1976 1967 raise error.LookupError(node, self.display_id, _(b'no node'))
1977 1968 return True
1978 1969
1979 1970 def maybewdir(prefix):
1980 1971 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1981 1972
1982 1973 hexnode = hex(node)
1983 1974
1984 1975 def disambiguate(hexnode, minlength):
1985 1976 """Disambiguate against wdirid."""
1986 1977 for length in range(minlength, len(hexnode) + 1):
1987 1978 prefix = hexnode[:length]
1988 1979 if not maybewdir(prefix):
1989 1980 return prefix
1990 1981
1991 1982 if not getattr(self, 'filteredrevs', None):
1992 1983 try:
1993 1984 length = max(self.index.shortest(node), minlength)
1994 1985 return disambiguate(hexnode, length)
1995 1986 except error.RevlogError:
1996 1987 if node != self.nodeconstants.wdirid:
1997 1988 raise error.LookupError(
1998 1989 node, self.display_id, _(b'no node')
1999 1990 )
2000 1991 except AttributeError:
2001 1992 # Fall through to pure code
2002 1993 pass
2003 1994
2004 1995 if node == self.nodeconstants.wdirid:
2005 1996 for length in range(minlength, len(hexnode) + 1):
2006 1997 prefix = hexnode[:length]
2007 1998 if isvalid(prefix):
2008 1999 return prefix
2009 2000
2010 2001 for length in range(minlength, len(hexnode) + 1):
2011 2002 prefix = hexnode[:length]
2012 2003 if isvalid(prefix):
2013 2004 return disambiguate(hexnode, length)
2014 2005
2015 2006 def cmp(self, node, text):
2016 2007 """compare text with a given file revision
2017 2008
2018 2009 returns True if text is different than what is stored.
2019 2010 """
2020 2011 p1, p2 = self.parents(node)
2021 2012 return storageutil.hashrevisionsha1(text, p1, p2) != node
2022 2013
2023 2014 def _getsegmentforrevs(self, startrev, endrev):
2024 2015 """Obtain a segment of raw data corresponding to a range of revisions.
2025 2016
2026 2017 Accepts the start and end revisions and an optional already-open
2027 2018 file handle to be used for reading. If the file handle is read, its
2028 2019 seek position will not be preserved.
2029 2020
2030 2021 Requests for data may be satisfied by a cache.
2031 2022
2032 2023 Returns a 2-tuple of (offset, data) for the requested range of
2033 2024 revisions. Offset is the integer offset from the beginning of the
2034 2025 revlog and data is a str or buffer of the raw byte data.
2035 2026
2036 2027 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2037 2028 to determine where each revision's data begins and ends.
2038 2029 """
2039 2030 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2040 2031 # (functions are expensive).
2041 2032 index = self.index
2042 2033 istart = index[startrev]
2043 2034 start = int(istart[0] >> 16)
2044 2035 if startrev == endrev:
2045 2036 end = start + istart[1]
2046 2037 else:
2047 2038 iend = index[endrev]
2048 2039 end = int(iend[0] >> 16) + iend[1]
2049 2040
2050 2041 if self._inline:
2051 2042 start += (startrev + 1) * self.index.entry_size
2052 2043 end += (endrev + 1) * self.index.entry_size
2053 2044 length = end - start
2054 2045
2055 2046 return start, self._segmentfile.read_chunk(start, length)
2056 2047
2057 2048 def _chunk(self, rev):
2058 2049 """Obtain a single decompressed chunk for a revision.
2059 2050
2060 2051 Accepts an integer revision and an optional already-open file handle
2061 2052 to be used for reading. If used, the seek position of the file will not
2062 2053 be preserved.
2063 2054
2064 2055 Returns a str holding uncompressed data for the requested revision.
2065 2056 """
2066 2057 compression_mode = self.index[rev][10]
2067 2058 data = self._getsegmentforrevs(rev, rev)[1]
2068 2059 if compression_mode == COMP_MODE_PLAIN:
2069 2060 return data
2070 2061 elif compression_mode == COMP_MODE_DEFAULT:
2071 2062 return self._decompressor(data)
2072 2063 elif compression_mode == COMP_MODE_INLINE:
2073 2064 return self.decompress(data)
2074 2065 else:
2075 2066 msg = b'unknown compression mode %d'
2076 2067 msg %= compression_mode
2077 2068 raise error.RevlogError(msg)
2078 2069
2079 2070 def _chunks(self, revs, targetsize=None):
2080 2071 """Obtain decompressed chunks for the specified revisions.
2081 2072
2082 2073 Accepts an iterable of numeric revisions that are assumed to be in
2083 2074 ascending order. Also accepts an optional already-open file handle
2084 2075 to be used for reading. If used, the seek position of the file will
2085 2076 not be preserved.
2086 2077
2087 2078 This function is similar to calling ``self._chunk()`` multiple times,
2088 2079 but is faster.
2089 2080
2090 2081 Returns a list with decompressed data for each requested revision.
2091 2082 """
2092 2083 if not revs:
2093 2084 return []
2094 2085 start = self.start
2095 2086 length = self.length
2096 2087 inline = self._inline
2097 2088 iosize = self.index.entry_size
2098 2089 buffer = util.buffer
2099 2090
2100 2091 l = []
2101 2092 ladd = l.append
2102 2093
2103 2094 if not self.data_config.with_sparse_read:
2104 2095 slicedchunks = (revs,)
2105 2096 else:
2106 2097 slicedchunks = deltautil.slicechunk(
2107 2098 self, revs, targetsize=targetsize
2108 2099 )
2109 2100
2110 2101 for revschunk in slicedchunks:
2111 2102 firstrev = revschunk[0]
2112 2103 # Skip trailing revisions with empty diff
2113 2104 for lastrev in revschunk[::-1]:
2114 2105 if length(lastrev) != 0:
2115 2106 break
2116 2107
2117 2108 try:
2118 2109 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2119 2110 except OverflowError:
2120 2111 # issue4215 - we can't cache a run of chunks greater than
2121 2112 # 2G on Windows
2122 2113 return [self._chunk(rev) for rev in revschunk]
2123 2114
2124 2115 decomp = self.decompress
2125 2116 # self._decompressor might be None, but will not be used in that case
2126 2117 def_decomp = self._decompressor
2127 2118 for rev in revschunk:
2128 2119 chunkstart = start(rev)
2129 2120 if inline:
2130 2121 chunkstart += (rev + 1) * iosize
2131 2122 chunklength = length(rev)
2132 2123 comp_mode = self.index[rev][10]
2133 2124 c = buffer(data, chunkstart - offset, chunklength)
2134 2125 if comp_mode == COMP_MODE_PLAIN:
2135 2126 ladd(c)
2136 2127 elif comp_mode == COMP_MODE_INLINE:
2137 2128 ladd(decomp(c))
2138 2129 elif comp_mode == COMP_MODE_DEFAULT:
2139 2130 ladd(def_decomp(c))
2140 2131 else:
2141 2132 msg = b'unknown compression mode %d'
2142 2133 msg %= comp_mode
2143 2134 raise error.RevlogError(msg)
2144 2135
2145 2136 return l
2146 2137
2147 2138 def deltaparent(self, rev):
2148 2139 """return deltaparent of the given revision"""
2149 2140 base = self.index[rev][3]
2150 2141 if base == rev:
2151 2142 return nullrev
2152 2143 elif self.delta_config.general_delta:
2153 2144 return base
2154 2145 else:
2155 2146 return rev - 1
2156 2147
2157 2148 def issnapshot(self, rev):
2158 2149 """tells whether rev is a snapshot"""
2159 2150 if not self.delta_config.sparse_revlog:
2160 2151 return self.deltaparent(rev) == nullrev
2161 2152 elif hasattr(self.index, 'issnapshot'):
2162 2153 # directly assign the method to cache the testing and access
2163 2154 self.issnapshot = self.index.issnapshot
2164 2155 return self.issnapshot(rev)
2165 2156 if rev == nullrev:
2166 2157 return True
2167 2158 entry = self.index[rev]
2168 2159 base = entry[3]
2169 2160 if base == rev:
2170 2161 return True
2171 2162 if base == nullrev:
2172 2163 return True
2173 2164 p1 = entry[5]
2174 2165 while self.length(p1) == 0:
2175 2166 b = self.deltaparent(p1)
2176 2167 if b == p1:
2177 2168 break
2178 2169 p1 = b
2179 2170 p2 = entry[6]
2180 2171 while self.length(p2) == 0:
2181 2172 b = self.deltaparent(p2)
2182 2173 if b == p2:
2183 2174 break
2184 2175 p2 = b
2185 2176 if base == p1 or base == p2:
2186 2177 return False
2187 2178 return self.issnapshot(base)
2188 2179
2189 2180 def snapshotdepth(self, rev):
2190 2181 """number of snapshot in the chain before this one"""
2191 2182 if not self.issnapshot(rev):
2192 2183 raise error.ProgrammingError(b'revision %d not a snapshot')
2193 2184 return len(self._deltachain(rev)[0]) - 1
2194 2185
2195 2186 def revdiff(self, rev1, rev2):
2196 2187 """return or calculate a delta between two revisions
2197 2188
2198 2189 The delta calculated is in binary form and is intended to be written to
2199 2190 revlog data directly. So this function needs raw revision data.
2200 2191 """
2201 2192 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2202 2193 return bytes(self._chunk(rev2))
2203 2194
2204 2195 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2205 2196
2206 2197 def revision(self, nodeorrev):
2207 2198 """return an uncompressed revision of a given node or revision
2208 2199 number.
2209 2200 """
2210 2201 return self._revisiondata(nodeorrev)
2211 2202
2212 2203 def sidedata(self, nodeorrev):
2213 2204 """a map of extra data related to the changeset but not part of the hash
2214 2205
2215 2206 This function currently return a dictionary. However, more advanced
2216 2207 mapping object will likely be used in the future for a more
2217 2208 efficient/lazy code.
2218 2209 """
2219 2210 # deal with <nodeorrev> argument type
2220 2211 if isinstance(nodeorrev, int):
2221 2212 rev = nodeorrev
2222 2213 else:
2223 2214 rev = self.rev(nodeorrev)
2224 2215 return self._sidedata(rev)
2225 2216
2226 2217 def _revisiondata(self, nodeorrev, raw=False):
2227 2218 # deal with <nodeorrev> argument type
2228 2219 if isinstance(nodeorrev, int):
2229 2220 rev = nodeorrev
2230 2221 node = self.node(rev)
2231 2222 else:
2232 2223 node = nodeorrev
2233 2224 rev = None
2234 2225
2235 2226 # fast path the special `nullid` rev
2236 2227 if node == self.nullid:
2237 2228 return b""
2238 2229
2239 2230 # ``rawtext`` is the text as stored inside the revlog. Might be the
2240 2231 # revision or might need to be processed to retrieve the revision.
2241 2232 rev, rawtext, validated = self._rawtext(node, rev)
2242 2233
2243 2234 if raw and validated:
2244 2235 # if we don't want to process the raw text and that raw
2245 2236 # text is cached, we can exit early.
2246 2237 return rawtext
2247 2238 if rev is None:
2248 2239 rev = self.rev(node)
2249 2240 # the revlog's flag for this revision
2250 2241 # (usually alter its state or content)
2251 2242 flags = self.flags(rev)
2252 2243
2253 2244 if validated and flags == REVIDX_DEFAULT_FLAGS:
2254 2245 # no extra flags set, no flag processor runs, text = rawtext
2255 2246 return rawtext
2256 2247
2257 2248 if raw:
2258 2249 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2259 2250 text = rawtext
2260 2251 else:
2261 2252 r = flagutil.processflagsread(self, rawtext, flags)
2262 2253 text, validatehash = r
2263 2254 if validatehash:
2264 2255 self.checkhash(text, node, rev=rev)
2265 2256 if not validated:
2266 2257 self._revisioncache = (node, rev, rawtext)
2267 2258
2268 2259 return text
2269 2260
2270 2261 def _rawtext(self, node, rev):
2271 2262 """return the possibly unvalidated rawtext for a revision
2272 2263
2273 2264 returns (rev, rawtext, validated)
2274 2265 """
2275 2266
2276 2267 # revision in the cache (could be useful to apply delta)
2277 2268 cachedrev = None
2278 2269 # An intermediate text to apply deltas to
2279 2270 basetext = None
2280 2271
2281 2272 # Check if we have the entry in cache
2282 2273 # The cache entry looks like (node, rev, rawtext)
2283 2274 if self._revisioncache:
2284 2275 if self._revisioncache[0] == node:
2285 2276 return (rev, self._revisioncache[2], True)
2286 2277 cachedrev = self._revisioncache[1]
2287 2278
2288 2279 if rev is None:
2289 2280 rev = self.rev(node)
2290 2281
2291 2282 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2292 2283 if stopped:
2293 2284 basetext = self._revisioncache[2]
2294 2285
2295 2286 # drop cache to save memory, the caller is expected to
2296 2287 # update self._revisioncache after validating the text
2297 2288 self._revisioncache = None
2298 2289
2299 2290 targetsize = None
2300 2291 rawsize = self.index[rev][2]
2301 2292 if 0 <= rawsize:
2302 2293 targetsize = 4 * rawsize
2303 2294
2304 2295 bins = self._chunks(chain, targetsize=targetsize)
2305 2296 if basetext is None:
2306 2297 basetext = bytes(bins[0])
2307 2298 bins = bins[1:]
2308 2299
2309 2300 rawtext = mdiff.patches(basetext, bins)
2310 2301 del basetext # let us have a chance to free memory early
2311 2302 return (rev, rawtext, False)
2312 2303
2313 2304 def _sidedata(self, rev):
2314 2305 """Return the sidedata for a given revision number."""
2315 2306 index_entry = self.index[rev]
2316 2307 sidedata_offset = index_entry[8]
2317 2308 sidedata_size = index_entry[9]
2318 2309
2319 2310 if self._inline:
2320 2311 sidedata_offset += self.index.entry_size * (1 + rev)
2321 2312 if sidedata_size == 0:
2322 2313 return {}
2323 2314
2324 2315 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2325 2316 filename = self._sidedatafile
2326 2317 end = self._docket.sidedata_end
2327 2318 offset = sidedata_offset
2328 2319 length = sidedata_size
2329 2320 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2330 2321 raise error.RevlogError(m)
2331 2322
2332 2323 comp_segment = self._segmentfile_sidedata.read_chunk(
2333 2324 sidedata_offset, sidedata_size
2334 2325 )
2335 2326
2336 2327 comp = self.index[rev][11]
2337 2328 if comp == COMP_MODE_PLAIN:
2338 2329 segment = comp_segment
2339 2330 elif comp == COMP_MODE_DEFAULT:
2340 2331 segment = self._decompressor(comp_segment)
2341 2332 elif comp == COMP_MODE_INLINE:
2342 2333 segment = self.decompress(comp_segment)
2343 2334 else:
2344 2335 msg = b'unknown compression mode %d'
2345 2336 msg %= comp
2346 2337 raise error.RevlogError(msg)
2347 2338
2348 2339 sidedata = sidedatautil.deserialize_sidedata(segment)
2349 2340 return sidedata
2350 2341
2351 2342 def rawdata(self, nodeorrev):
2352 2343 """return an uncompressed raw data of a given node or revision number."""
2353 2344 return self._revisiondata(nodeorrev, raw=True)
2354 2345
2355 2346 def hash(self, text, p1, p2):
2356 2347 """Compute a node hash.
2357 2348
2358 2349 Available as a function so that subclasses can replace the hash
2359 2350 as needed.
2360 2351 """
2361 2352 return storageutil.hashrevisionsha1(text, p1, p2)
2362 2353
2363 2354 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2364 2355 """Check node hash integrity.
2365 2356
2366 2357 Available as a function so that subclasses can extend hash mismatch
2367 2358 behaviors as needed.
2368 2359 """
2369 2360 try:
2370 2361 if p1 is None and p2 is None:
2371 2362 p1, p2 = self.parents(node)
2372 2363 if node != self.hash(text, p1, p2):
2373 2364 # Clear the revision cache on hash failure. The revision cache
2374 2365 # only stores the raw revision and clearing the cache does have
2375 2366 # the side-effect that we won't have a cache hit when the raw
2376 2367 # revision data is accessed. But this case should be rare and
2377 2368 # it is extra work to teach the cache about the hash
2378 2369 # verification state.
2379 2370 if self._revisioncache and self._revisioncache[0] == node:
2380 2371 self._revisioncache = None
2381 2372
2382 2373 revornode = rev
2383 2374 if revornode is None:
2384 2375 revornode = templatefilters.short(hex(node))
2385 2376 raise error.RevlogError(
2386 2377 _(b"integrity check failed on %s:%s")
2387 2378 % (self.display_id, pycompat.bytestr(revornode))
2388 2379 )
2389 2380 except error.RevlogError:
2390 2381 if self.feature_config.censorable and storageutil.iscensoredtext(
2391 2382 text
2392 2383 ):
2393 2384 raise error.CensoredNodeError(self.display_id, node, text)
2394 2385 raise
2395 2386
2396 2387 @property
2397 2388 def _split_index_file(self):
2398 2389 """the path where to expect the index of an ongoing splitting operation
2399 2390
2400 2391 The file will only exist if a splitting operation is in progress, but
2401 2392 it is always expected at the same location."""
2402 2393 parts = self.radix.split(b'/')
2403 2394 if len(parts) > 1:
2404 2395 # adds a '-s' prefix to the ``data/` or `meta/` base
2405 2396 head = parts[0] + b'-s'
2406 2397 mids = parts[1:-1]
2407 2398 tail = parts[-1] + b'.i'
2408 2399 pieces = [head] + mids + [tail]
2409 2400 return b'/'.join(pieces)
2410 2401 else:
2411 2402 # the revlog is stored at the root of the store (changelog or
2412 2403 # manifest), no risk of collision.
2413 2404 return self.radix + b'.i.s'
2414 2405
2415 2406 def _enforceinlinesize(self, tr, side_write=True):
2416 2407 """Check if the revlog is too big for inline and convert if so.
2417 2408
2418 2409 This should be called after revisions are added to the revlog. If the
2419 2410 revlog has grown too large to be an inline revlog, it will convert it
2420 2411 to use multiple index and data files.
2421 2412 """
2422 2413 tiprev = len(self) - 1
2423 2414 total_size = self.start(tiprev) + self.length(tiprev)
2424 2415 if not self._inline or total_size < _maxinline:
2425 2416 return
2426 2417
2427 2418 troffset = tr.findoffset(self._indexfile)
2428 2419 if troffset is None:
2429 2420 raise error.RevlogError(
2430 2421 _(b"%s not found in the transaction") % self._indexfile
2431 2422 )
2432 2423 if troffset:
2433 2424 tr.addbackup(self._indexfile, for_offset=True)
2434 2425 tr.add(self._datafile, 0)
2435 2426
2436 2427 existing_handles = False
2437 2428 if self._writinghandles is not None:
2438 2429 existing_handles = True
2439 2430 fp = self._writinghandles[0]
2440 2431 fp.flush()
2441 2432 fp.close()
2442 2433 # We can't use the cached file handle after close(). So prevent
2443 2434 # its usage.
2444 2435 self._writinghandles = None
2445 2436 self._segmentfile.writing_handle = None
2446 2437 # No need to deal with sidedata writing handle as it is only
2447 2438 # relevant with revlog-v2 which is never inline, not reaching
2448 2439 # this code
2449 2440 if side_write:
2450 2441 old_index_file_path = self._indexfile
2451 2442 new_index_file_path = self._split_index_file
2452 2443 opener = self.opener
2453 2444 weak_self = weakref.ref(self)
2454 2445
2455 2446 # the "split" index replace the real index when the transaction is finalized
2456 2447 def finalize_callback(tr):
2457 2448 opener.rename(
2458 2449 new_index_file_path,
2459 2450 old_index_file_path,
2460 2451 checkambig=True,
2461 2452 )
2462 2453 maybe_self = weak_self()
2463 2454 if maybe_self is not None:
2464 2455 maybe_self._indexfile = old_index_file_path
2465 2456
2466 2457 def abort_callback(tr):
2467 2458 maybe_self = weak_self()
2468 2459 if maybe_self is not None:
2469 2460 maybe_self._indexfile = old_index_file_path
2470 2461
2471 2462 tr.registertmp(new_index_file_path)
2472 2463 if self.target[1] is not None:
2473 2464 callback_id = b'000-revlog-split-%d-%s' % self.target
2474 2465 else:
2475 2466 callback_id = b'000-revlog-split-%d' % self.target[0]
2476 2467 tr.addfinalize(callback_id, finalize_callback)
2477 2468 tr.addabort(callback_id, abort_callback)
2478 2469
2479 2470 new_dfh = self._datafp(b'w+')
2480 2471 new_dfh.truncate(0) # drop any potentially existing data
2481 2472 try:
2482 2473 with self.reading():
2483 2474 for r in self:
2484 2475 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2485 2476 new_dfh.flush()
2486 2477
2487 2478 if side_write:
2488 2479 self._indexfile = new_index_file_path
2489 2480 with self.__index_new_fp() as fp:
2490 2481 self._format_flags &= ~FLAG_INLINE_DATA
2491 2482 self._inline = False
2492 2483 for i in self:
2493 2484 e = self.index.entry_binary(i)
2494 2485 if i == 0 and self._docket is None:
2495 2486 header = self._format_flags | self._format_version
2496 2487 header = self.index.pack_header(header)
2497 2488 e = header + e
2498 2489 fp.write(e)
2499 2490 if self._docket is not None:
2500 2491 self._docket.index_end = fp.tell()
2501 2492
2502 2493 # If we don't use side-write, the temp file replace the real
2503 2494 # index when we exit the context manager
2504 2495
2505 2496 nodemaputil.setup_persistent_nodemap(tr, self)
2506 2497 self._segmentfile = randomaccessfile.randomaccessfile(
2507 2498 self.opener,
2508 2499 self._datafile,
2509 2500 self.data_config.chunk_cache_size,
2510 2501 )
2511 2502
2512 2503 if existing_handles:
2513 2504 # switched from inline to conventional reopen the index
2514 2505 ifh = self.__index_write_fp()
2515 2506 self._writinghandles = (ifh, new_dfh, None)
2516 2507 self._segmentfile.writing_handle = new_dfh
2517 2508 new_dfh = None
2518 2509 # No need to deal with sidedata writing handle as it is only
2519 2510 # relevant with revlog-v2 which is never inline, not reaching
2520 2511 # this code
2521 2512 finally:
2522 2513 if new_dfh is not None:
2523 2514 new_dfh.close()
2524 2515
2525 2516 def _nodeduplicatecallback(self, transaction, node):
2526 2517 """called when trying to add a node already stored."""
2527 2518
2528 2519 @contextlib.contextmanager
2529 2520 def reading(self):
2530 2521 """Context manager that keeps data and sidedata files open for reading"""
2531 2522 if len(self.index) == 0:
2532 2523 yield # nothing to be read
2533 2524 else:
2534 2525 with self._segmentfile.reading():
2535 2526 with self._segmentfile_sidedata.reading():
2536 2527 yield
2537 2528
2538 2529 @contextlib.contextmanager
2539 2530 def _writing(self, transaction):
2540 2531 if self._trypending:
2541 2532 msg = b'try to write in a `trypending` revlog: %s'
2542 2533 msg %= self.display_id
2543 2534 raise error.ProgrammingError(msg)
2544 2535 if self._writinghandles is not None:
2545 2536 yield
2546 2537 else:
2547 2538 ifh = dfh = sdfh = None
2548 2539 try:
2549 2540 r = len(self)
2550 2541 # opening the data file.
2551 2542 dsize = 0
2552 2543 if r:
2553 2544 dsize = self.end(r - 1)
2554 2545 dfh = None
2555 2546 if not self._inline:
2556 2547 try:
2557 2548 dfh = self._datafp(b"r+")
2558 2549 if self._docket is None:
2559 2550 dfh.seek(0, os.SEEK_END)
2560 2551 else:
2561 2552 dfh.seek(self._docket.data_end, os.SEEK_SET)
2562 2553 except FileNotFoundError:
2563 2554 dfh = self._datafp(b"w+")
2564 2555 transaction.add(self._datafile, dsize)
2565 2556 if self._sidedatafile is not None:
2566 2557 # revlog-v2 does not inline, help Pytype
2567 2558 assert dfh is not None
2568 2559 try:
2569 2560 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2570 2561 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2571 2562 except FileNotFoundError:
2572 2563 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2573 2564 transaction.add(
2574 2565 self._sidedatafile, self._docket.sidedata_end
2575 2566 )
2576 2567
2577 2568 # opening the index file.
2578 2569 isize = r * self.index.entry_size
2579 2570 ifh = self.__index_write_fp()
2580 2571 if self._inline:
2581 2572 transaction.add(self._indexfile, dsize + isize)
2582 2573 else:
2583 2574 transaction.add(self._indexfile, isize)
2584 2575 # exposing all file handle for writing.
2585 2576 self._writinghandles = (ifh, dfh, sdfh)
2586 2577 self._segmentfile.writing_handle = ifh if self._inline else dfh
2587 2578 self._segmentfile_sidedata.writing_handle = sdfh
2588 2579 yield
2589 2580 if self._docket is not None:
2590 2581 self._write_docket(transaction)
2591 2582 finally:
2592 2583 self._writinghandles = None
2593 2584 self._segmentfile.writing_handle = None
2594 2585 self._segmentfile_sidedata.writing_handle = None
2595 2586 if dfh is not None:
2596 2587 dfh.close()
2597 2588 if sdfh is not None:
2598 2589 sdfh.close()
2599 2590 # closing the index file last to avoid exposing referent to
2600 2591 # potential unflushed data content.
2601 2592 if ifh is not None:
2602 2593 ifh.close()
2603 2594
2604 2595 def _write_docket(self, transaction):
2605 2596 """write the current docket on disk
2606 2597
2607 2598 Exist as a method to help changelog to implement transaction logic
2608 2599
2609 2600 We could also imagine using the same transaction logic for all revlog
2610 2601 since docket are cheap."""
2611 2602 self._docket.write(transaction)
2612 2603
2613 2604 def addrevision(
2614 2605 self,
2615 2606 text,
2616 2607 transaction,
2617 2608 link,
2618 2609 p1,
2619 2610 p2,
2620 2611 cachedelta=None,
2621 2612 node=None,
2622 2613 flags=REVIDX_DEFAULT_FLAGS,
2623 2614 deltacomputer=None,
2624 2615 sidedata=None,
2625 2616 ):
2626 2617 """add a revision to the log
2627 2618
2628 2619 text - the revision data to add
2629 2620 transaction - the transaction object used for rollback
2630 2621 link - the linkrev data to add
2631 2622 p1, p2 - the parent nodeids of the revision
2632 2623 cachedelta - an optional precomputed delta
2633 2624 node - nodeid of revision; typically node is not specified, and it is
2634 2625 computed by default as hash(text, p1, p2), however subclasses might
2635 2626 use different hashing method (and override checkhash() in such case)
2636 2627 flags - the known flags to set on the revision
2637 2628 deltacomputer - an optional deltacomputer instance shared between
2638 2629 multiple calls
2639 2630 """
2640 2631 if link == nullrev:
2641 2632 raise error.RevlogError(
2642 2633 _(b"attempted to add linkrev -1 to %s") % self.display_id
2643 2634 )
2644 2635
2645 2636 if sidedata is None:
2646 2637 sidedata = {}
2647 2638 elif sidedata and not self.feature_config.has_side_data:
2648 2639 raise error.ProgrammingError(
2649 2640 _(b"trying to add sidedata to a revlog who don't support them")
2650 2641 )
2651 2642
2652 2643 if flags:
2653 2644 node = node or self.hash(text, p1, p2)
2654 2645
2655 2646 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2656 2647
2657 2648 # If the flag processor modifies the revision data, ignore any provided
2658 2649 # cachedelta.
2659 2650 if rawtext != text:
2660 2651 cachedelta = None
2661 2652
2662 2653 if len(rawtext) > _maxentrysize:
2663 2654 raise error.RevlogError(
2664 2655 _(
2665 2656 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2666 2657 )
2667 2658 % (self.display_id, len(rawtext))
2668 2659 )
2669 2660
2670 2661 node = node or self.hash(rawtext, p1, p2)
2671 2662 rev = self.index.get_rev(node)
2672 2663 if rev is not None:
2673 2664 return rev
2674 2665
2675 2666 if validatehash:
2676 2667 self.checkhash(rawtext, node, p1=p1, p2=p2)
2677 2668
2678 2669 return self.addrawrevision(
2679 2670 rawtext,
2680 2671 transaction,
2681 2672 link,
2682 2673 p1,
2683 2674 p2,
2684 2675 node,
2685 2676 flags,
2686 2677 cachedelta=cachedelta,
2687 2678 deltacomputer=deltacomputer,
2688 2679 sidedata=sidedata,
2689 2680 )
2690 2681
2691 2682 def addrawrevision(
2692 2683 self,
2693 2684 rawtext,
2694 2685 transaction,
2695 2686 link,
2696 2687 p1,
2697 2688 p2,
2698 2689 node,
2699 2690 flags,
2700 2691 cachedelta=None,
2701 2692 deltacomputer=None,
2702 2693 sidedata=None,
2703 2694 ):
2704 2695 """add a raw revision with known flags, node and parents
2705 2696 useful when reusing a revision not stored in this revlog (ex: received
2706 2697 over wire, or read from an external bundle).
2707 2698 """
2708 2699 with self._writing(transaction):
2709 2700 return self._addrevision(
2710 2701 node,
2711 2702 rawtext,
2712 2703 transaction,
2713 2704 link,
2714 2705 p1,
2715 2706 p2,
2716 2707 flags,
2717 2708 cachedelta,
2718 2709 deltacomputer=deltacomputer,
2719 2710 sidedata=sidedata,
2720 2711 )
2721 2712
2722 2713 def compress(self, data):
2723 2714 """Generate a possibly-compressed representation of data."""
2724 2715 if not data:
2725 2716 return b'', data
2726 2717
2727 2718 compressed = self._compressor.compress(data)
2728 2719
2729 2720 if compressed:
2730 2721 # The revlog compressor added the header in the returned data.
2731 2722 return b'', compressed
2732 2723
2733 2724 if data[0:1] == b'\0':
2734 2725 return b'', data
2735 2726 return b'u', data
2736 2727
2737 2728 def decompress(self, data):
2738 2729 """Decompress a revlog chunk.
2739 2730
2740 2731 The chunk is expected to begin with a header identifying the
2741 2732 format type so it can be routed to an appropriate decompressor.
2742 2733 """
2743 2734 if not data:
2744 2735 return data
2745 2736
2746 2737 # Revlogs are read much more frequently than they are written and many
2747 2738 # chunks only take microseconds to decompress, so performance is
2748 2739 # important here.
2749 2740 #
2750 2741 # We can make a few assumptions about revlogs:
2751 2742 #
2752 2743 # 1) the majority of chunks will be compressed (as opposed to inline
2753 2744 # raw data).
2754 2745 # 2) decompressing *any* data will likely by at least 10x slower than
2755 2746 # returning raw inline data.
2756 2747 # 3) we want to prioritize common and officially supported compression
2757 2748 # engines
2758 2749 #
2759 2750 # It follows that we want to optimize for "decompress compressed data
2760 2751 # when encoded with common and officially supported compression engines"
2761 2752 # case over "raw data" and "data encoded by less common or non-official
2762 2753 # compression engines." That is why we have the inline lookup first
2763 2754 # followed by the compengines lookup.
2764 2755 #
2765 2756 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2766 2757 # compressed chunks. And this matters for changelog and manifest reads.
2767 2758 t = data[0:1]
2768 2759
2769 2760 if t == b'x':
2770 2761 try:
2771 2762 return _zlibdecompress(data)
2772 2763 except zlib.error as e:
2773 2764 raise error.RevlogError(
2774 2765 _(b'revlog decompress error: %s')
2775 2766 % stringutil.forcebytestr(e)
2776 2767 )
2777 2768 # '\0' is more common than 'u' so it goes first.
2778 2769 elif t == b'\0':
2779 2770 return data
2780 2771 elif t == b'u':
2781 2772 return util.buffer(data, 1)
2782 2773
2783 2774 compressor = self._get_decompressor(t)
2784 2775
2785 2776 return compressor.decompress(data)
2786 2777
2787 2778 def _addrevision(
2788 2779 self,
2789 2780 node,
2790 2781 rawtext,
2791 2782 transaction,
2792 2783 link,
2793 2784 p1,
2794 2785 p2,
2795 2786 flags,
2796 2787 cachedelta,
2797 2788 alwayscache=False,
2798 2789 deltacomputer=None,
2799 2790 sidedata=None,
2800 2791 ):
2801 2792 """internal function to add revisions to the log
2802 2793
2803 2794 see addrevision for argument descriptions.
2804 2795
2805 2796 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2806 2797
2807 2798 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2808 2799 be used.
2809 2800
2810 2801 invariants:
2811 2802 - rawtext is optional (can be None); if not set, cachedelta must be set.
2812 2803 if both are set, they must correspond to each other.
2813 2804 """
2814 2805 if node == self.nullid:
2815 2806 raise error.RevlogError(
2816 2807 _(b"%s: attempt to add null revision") % self.display_id
2817 2808 )
2818 2809 if (
2819 2810 node == self.nodeconstants.wdirid
2820 2811 or node in self.nodeconstants.wdirfilenodeids
2821 2812 ):
2822 2813 raise error.RevlogError(
2823 2814 _(b"%s: attempt to add wdir revision") % self.display_id
2824 2815 )
2825 2816 if self._writinghandles is None:
2826 2817 msg = b'adding revision outside `revlog._writing` context'
2827 2818 raise error.ProgrammingError(msg)
2828 2819
2829 2820 btext = [rawtext]
2830 2821
2831 2822 curr = len(self)
2832 2823 prev = curr - 1
2833 2824
2834 2825 offset = self._get_data_offset(prev)
2835 2826
2836 2827 if self._concurrencychecker:
2837 2828 ifh, dfh, sdfh = self._writinghandles
2838 2829 # XXX no checking for the sidedata file
2839 2830 if self._inline:
2840 2831 # offset is "as if" it were in the .d file, so we need to add on
2841 2832 # the size of the entry metadata.
2842 2833 self._concurrencychecker(
2843 2834 ifh, self._indexfile, offset + curr * self.index.entry_size
2844 2835 )
2845 2836 else:
2846 2837 # Entries in the .i are a consistent size.
2847 2838 self._concurrencychecker(
2848 2839 ifh, self._indexfile, curr * self.index.entry_size
2849 2840 )
2850 2841 self._concurrencychecker(dfh, self._datafile, offset)
2851 2842
2852 2843 p1r, p2r = self.rev(p1), self.rev(p2)
2853 2844
2854 2845 # full versions are inserted when the needed deltas
2855 2846 # become comparable to the uncompressed text
2856 2847 if rawtext is None:
2857 2848 # need rawtext size, before changed by flag processors, which is
2858 2849 # the non-raw size. use revlog explicitly to avoid filelog's extra
2859 2850 # logic that might remove metadata size.
2860 2851 textlen = mdiff.patchedsize(
2861 2852 revlog.size(self, cachedelta[0]), cachedelta[1]
2862 2853 )
2863 2854 else:
2864 2855 textlen = len(rawtext)
2865 2856
2866 2857 if deltacomputer is None:
2867 2858 write_debug = None
2868 2859 if self.delta_config.debug_delta:
2869 2860 write_debug = transaction._report
2870 2861 deltacomputer = deltautil.deltacomputer(
2871 2862 self, write_debug=write_debug
2872 2863 )
2873 2864
2874 2865 if cachedelta is not None and len(cachedelta) == 2:
2875 2866 # If the cached delta has no information about how it should be
2876 2867 # reused, add the default reuse instruction according to the
2877 2868 # revlog's configuration.
2878 2869 if (
2879 2870 self.delta_config.general_delta
2880 2871 and self.delta_config.lazy_delta_base
2881 2872 ):
2882 2873 delta_base_reuse = DELTA_BASE_REUSE_TRY
2883 2874 else:
2884 2875 delta_base_reuse = DELTA_BASE_REUSE_NO
2885 2876 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2886 2877
2887 2878 revinfo = revlogutils.revisioninfo(
2888 2879 node,
2889 2880 p1,
2890 2881 p2,
2891 2882 btext,
2892 2883 textlen,
2893 2884 cachedelta,
2894 2885 flags,
2895 2886 )
2896 2887
2897 2888 deltainfo = deltacomputer.finddeltainfo(revinfo)
2898 2889
2899 2890 compression_mode = COMP_MODE_INLINE
2900 2891 if self._docket is not None:
2901 2892 default_comp = self._docket.default_compression_header
2902 2893 r = deltautil.delta_compression(default_comp, deltainfo)
2903 2894 compression_mode, deltainfo = r
2904 2895
2905 2896 sidedata_compression_mode = COMP_MODE_INLINE
2906 2897 if sidedata and self.feature_config.has_side_data:
2907 2898 sidedata_compression_mode = COMP_MODE_PLAIN
2908 2899 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2909 2900 sidedata_offset = self._docket.sidedata_end
2910 2901 h, comp_sidedata = self.compress(serialized_sidedata)
2911 2902 if (
2912 2903 h != b'u'
2913 2904 and comp_sidedata[0:1] != b'\0'
2914 2905 and len(comp_sidedata) < len(serialized_sidedata)
2915 2906 ):
2916 2907 assert not h
2917 2908 if (
2918 2909 comp_sidedata[0:1]
2919 2910 == self._docket.default_compression_header
2920 2911 ):
2921 2912 sidedata_compression_mode = COMP_MODE_DEFAULT
2922 2913 serialized_sidedata = comp_sidedata
2923 2914 else:
2924 2915 sidedata_compression_mode = COMP_MODE_INLINE
2925 2916 serialized_sidedata = comp_sidedata
2926 2917 else:
2927 2918 serialized_sidedata = b""
2928 2919 # Don't store the offset if the sidedata is empty, that way
2929 2920 # we can easily detect empty sidedata and they will be no different
2930 2921 # than ones we manually add.
2931 2922 sidedata_offset = 0
2932 2923
2933 2924 rank = RANK_UNKNOWN
2934 2925 if self.feature_config.compute_rank:
2935 2926 if (p1r, p2r) == (nullrev, nullrev):
2936 2927 rank = 1
2937 2928 elif p1r != nullrev and p2r == nullrev:
2938 2929 rank = 1 + self.fast_rank(p1r)
2939 2930 elif p1r == nullrev and p2r != nullrev:
2940 2931 rank = 1 + self.fast_rank(p2r)
2941 2932 else: # merge node
2942 2933 if rustdagop is not None and self.index.rust_ext_compat:
2943 2934 rank = rustdagop.rank(self.index, p1r, p2r)
2944 2935 else:
2945 2936 pmin, pmax = sorted((p1r, p2r))
2946 2937 rank = 1 + self.fast_rank(pmax)
2947 2938 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2948 2939
2949 2940 e = revlogutils.entry(
2950 2941 flags=flags,
2951 2942 data_offset=offset,
2952 2943 data_compressed_length=deltainfo.deltalen,
2953 2944 data_uncompressed_length=textlen,
2954 2945 data_compression_mode=compression_mode,
2955 2946 data_delta_base=deltainfo.base,
2956 2947 link_rev=link,
2957 2948 parent_rev_1=p1r,
2958 2949 parent_rev_2=p2r,
2959 2950 node_id=node,
2960 2951 sidedata_offset=sidedata_offset,
2961 2952 sidedata_compressed_length=len(serialized_sidedata),
2962 2953 sidedata_compression_mode=sidedata_compression_mode,
2963 2954 rank=rank,
2964 2955 )
2965 2956
2966 2957 self.index.append(e)
2967 2958 entry = self.index.entry_binary(curr)
2968 2959 if curr == 0 and self._docket is None:
2969 2960 header = self._format_flags | self._format_version
2970 2961 header = self.index.pack_header(header)
2971 2962 entry = header + entry
2972 2963 self._writeentry(
2973 2964 transaction,
2974 2965 entry,
2975 2966 deltainfo.data,
2976 2967 link,
2977 2968 offset,
2978 2969 serialized_sidedata,
2979 2970 sidedata_offset,
2980 2971 )
2981 2972
2982 2973 rawtext = btext[0]
2983 2974
2984 2975 if alwayscache and rawtext is None:
2985 2976 rawtext = deltacomputer.buildtext(revinfo)
2986 2977
2987 2978 if type(rawtext) == bytes: # only accept immutable objects
2988 2979 self._revisioncache = (node, curr, rawtext)
2989 2980 self._chainbasecache[curr] = deltainfo.chainbase
2990 2981 return curr
2991 2982
2992 2983 def _get_data_offset(self, prev):
2993 2984 """Returns the current offset in the (in-transaction) data file.
2994 2985 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2995 2986 file to store that information: since sidedata can be rewritten to the
2996 2987 end of the data file within a transaction, you can have cases where, for
2997 2988 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2998 2989 to `n - 1`'s sidedata being written after `n`'s data.
2999 2990
3000 2991 TODO cache this in a docket file before getting out of experimental."""
3001 2992 if self._docket is None:
3002 2993 return self.end(prev)
3003 2994 else:
3004 2995 return self._docket.data_end
3005 2996
3006 2997 def _writeentry(
3007 2998 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3008 2999 ):
3009 3000 # Files opened in a+ mode have inconsistent behavior on various
3010 3001 # platforms. Windows requires that a file positioning call be made
3011 3002 # when the file handle transitions between reads and writes. See
3012 3003 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3013 3004 # platforms, Python or the platform itself can be buggy. Some versions
3014 3005 # of Solaris have been observed to not append at the end of the file
3015 3006 # if the file was seeked to before the end. See issue4943 for more.
3016 3007 #
3017 3008 # We work around this issue by inserting a seek() before writing.
3018 3009 # Note: This is likely not necessary on Python 3. However, because
3019 3010 # the file handle is reused for reads and may be seeked there, we need
3020 3011 # to be careful before changing this.
3021 3012 if self._writinghandles is None:
3022 3013 msg = b'adding revision outside `revlog._writing` context'
3023 3014 raise error.ProgrammingError(msg)
3024 3015 ifh, dfh, sdfh = self._writinghandles
3025 3016 if self._docket is None:
3026 3017 ifh.seek(0, os.SEEK_END)
3027 3018 else:
3028 3019 ifh.seek(self._docket.index_end, os.SEEK_SET)
3029 3020 if dfh:
3030 3021 if self._docket is None:
3031 3022 dfh.seek(0, os.SEEK_END)
3032 3023 else:
3033 3024 dfh.seek(self._docket.data_end, os.SEEK_SET)
3034 3025 if sdfh:
3035 3026 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3036 3027
3037 3028 curr = len(self) - 1
3038 3029 if not self._inline:
3039 3030 transaction.add(self._datafile, offset)
3040 3031 if self._sidedatafile:
3041 3032 transaction.add(self._sidedatafile, sidedata_offset)
3042 3033 transaction.add(self._indexfile, curr * len(entry))
3043 3034 if data[0]:
3044 3035 dfh.write(data[0])
3045 3036 dfh.write(data[1])
3046 3037 if sidedata:
3047 3038 sdfh.write(sidedata)
3048 3039 ifh.write(entry)
3049 3040 else:
3050 3041 offset += curr * self.index.entry_size
3051 3042 transaction.add(self._indexfile, offset)
3052 3043 ifh.write(entry)
3053 3044 ifh.write(data[0])
3054 3045 ifh.write(data[1])
3055 3046 assert not sidedata
3056 3047 self._enforceinlinesize(transaction)
3057 3048 if self._docket is not None:
3058 3049 # revlog-v2 always has 3 writing handles, help Pytype
3059 3050 wh1 = self._writinghandles[0]
3060 3051 wh2 = self._writinghandles[1]
3061 3052 wh3 = self._writinghandles[2]
3062 3053 assert wh1 is not None
3063 3054 assert wh2 is not None
3064 3055 assert wh3 is not None
3065 3056 self._docket.index_end = wh1.tell()
3066 3057 self._docket.data_end = wh2.tell()
3067 3058 self._docket.sidedata_end = wh3.tell()
3068 3059
3069 3060 nodemaputil.setup_persistent_nodemap(transaction, self)
3070 3061
3071 3062 def addgroup(
3072 3063 self,
3073 3064 deltas,
3074 3065 linkmapper,
3075 3066 transaction,
3076 3067 alwayscache=False,
3077 3068 addrevisioncb=None,
3078 3069 duplicaterevisioncb=None,
3079 3070 debug_info=None,
3080 3071 delta_base_reuse_policy=None,
3081 3072 ):
3082 3073 """
3083 3074 add a delta group
3084 3075
3085 3076 given a set of deltas, add them to the revision log. the
3086 3077 first delta is against its parent, which should be in our
3087 3078 log, the rest are against the previous delta.
3088 3079
3089 3080 If ``addrevisioncb`` is defined, it will be called with arguments of
3090 3081 this revlog and the node that was added.
3091 3082 """
3092 3083
3093 3084 if self._adding_group:
3094 3085 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3095 3086
3096 3087 # read the default delta-base reuse policy from revlog config if the
3097 3088 # group did not specify one.
3098 3089 if delta_base_reuse_policy is None:
3099 3090 if (
3100 3091 self.delta_config.general_delta
3101 3092 and self.delta_config.lazy_delta_base
3102 3093 ):
3103 3094 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3104 3095 else:
3105 3096 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3106 3097
3107 3098 self._adding_group = True
3108 3099 empty = True
3109 3100 try:
3110 3101 with self._writing(transaction):
3111 3102 write_debug = None
3112 3103 if self.delta_config.debug_delta:
3113 3104 write_debug = transaction._report
3114 3105 deltacomputer = deltautil.deltacomputer(
3115 3106 self,
3116 3107 write_debug=write_debug,
3117 3108 debug_info=debug_info,
3118 3109 )
3119 3110 # loop through our set of deltas
3120 3111 for data in deltas:
3121 3112 (
3122 3113 node,
3123 3114 p1,
3124 3115 p2,
3125 3116 linknode,
3126 3117 deltabase,
3127 3118 delta,
3128 3119 flags,
3129 3120 sidedata,
3130 3121 ) = data
3131 3122 link = linkmapper(linknode)
3132 3123 flags = flags or REVIDX_DEFAULT_FLAGS
3133 3124
3134 3125 rev = self.index.get_rev(node)
3135 3126 if rev is not None:
3136 3127 # this can happen if two branches make the same change
3137 3128 self._nodeduplicatecallback(transaction, rev)
3138 3129 if duplicaterevisioncb:
3139 3130 duplicaterevisioncb(self, rev)
3140 3131 empty = False
3141 3132 continue
3142 3133
3143 3134 for p in (p1, p2):
3144 3135 if not self.index.has_node(p):
3145 3136 raise error.LookupError(
3146 3137 p, self.radix, _(b'unknown parent')
3147 3138 )
3148 3139
3149 3140 if not self.index.has_node(deltabase):
3150 3141 raise error.LookupError(
3151 3142 deltabase, self.display_id, _(b'unknown delta base')
3152 3143 )
3153 3144
3154 3145 baserev = self.rev(deltabase)
3155 3146
3156 3147 if baserev != nullrev and self.iscensored(baserev):
3157 3148 # if base is censored, delta must be full replacement in a
3158 3149 # single patch operation
3159 3150 hlen = struct.calcsize(b">lll")
3160 3151 oldlen = self.rawsize(baserev)
3161 3152 newlen = len(delta) - hlen
3162 3153 if delta[:hlen] != mdiff.replacediffheader(
3163 3154 oldlen, newlen
3164 3155 ):
3165 3156 raise error.CensoredBaseError(
3166 3157 self.display_id, self.node(baserev)
3167 3158 )
3168 3159
3169 3160 if not flags and self._peek_iscensored(baserev, delta):
3170 3161 flags |= REVIDX_ISCENSORED
3171 3162
3172 3163 # We assume consumers of addrevisioncb will want to retrieve
3173 3164 # the added revision, which will require a call to
3174 3165 # revision(). revision() will fast path if there is a cache
3175 3166 # hit. So, we tell _addrevision() to always cache in this case.
3176 3167 # We're only using addgroup() in the context of changegroup
3177 3168 # generation so the revision data can always be handled as raw
3178 3169 # by the flagprocessor.
3179 3170 rev = self._addrevision(
3180 3171 node,
3181 3172 None,
3182 3173 transaction,
3183 3174 link,
3184 3175 p1,
3185 3176 p2,
3186 3177 flags,
3187 3178 (baserev, delta, delta_base_reuse_policy),
3188 3179 alwayscache=alwayscache,
3189 3180 deltacomputer=deltacomputer,
3190 3181 sidedata=sidedata,
3191 3182 )
3192 3183
3193 3184 if addrevisioncb:
3194 3185 addrevisioncb(self, rev)
3195 3186 empty = False
3196 3187 finally:
3197 3188 self._adding_group = False
3198 3189 return not empty
3199 3190
3200 3191 def iscensored(self, rev):
3201 3192 """Check if a file revision is censored."""
3202 3193 if not self.feature_config.censorable:
3203 3194 return False
3204 3195
3205 3196 return self.flags(rev) & REVIDX_ISCENSORED
3206 3197
3207 3198 def _peek_iscensored(self, baserev, delta):
3208 3199 """Quickly check if a delta produces a censored revision."""
3209 3200 if not self.feature_config.censorable:
3210 3201 return False
3211 3202
3212 3203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3213 3204
3214 3205 def getstrippoint(self, minlink):
3215 3206 """find the minimum rev that must be stripped to strip the linkrev
3216 3207
3217 3208 Returns a tuple containing the minimum rev and a set of all revs that
3218 3209 have linkrevs that will be broken by this strip.
3219 3210 """
3220 3211 return storageutil.resolvestripinfo(
3221 3212 minlink,
3222 3213 len(self) - 1,
3223 3214 self.headrevs(),
3224 3215 self.linkrev,
3225 3216 self.parentrevs,
3226 3217 )
3227 3218
3228 3219 def strip(self, minlink, transaction):
3229 3220 """truncate the revlog on the first revision with a linkrev >= minlink
3230 3221
3231 3222 This function is called when we're stripping revision minlink and
3232 3223 its descendants from the repository.
3233 3224
3234 3225 We have to remove all revisions with linkrev >= minlink, because
3235 3226 the equivalent changelog revisions will be renumbered after the
3236 3227 strip.
3237 3228
3238 3229 So we truncate the revlog on the first of these revisions, and
3239 3230 trust that the caller has saved the revisions that shouldn't be
3240 3231 removed and that it'll re-add them after this truncation.
3241 3232 """
3242 3233 if len(self) == 0:
3243 3234 return
3244 3235
3245 3236 rev, _ = self.getstrippoint(minlink)
3246 3237 if rev == len(self):
3247 3238 return
3248 3239
3249 3240 # first truncate the files on disk
3250 3241 data_end = self.start(rev)
3251 3242 if not self._inline:
3252 3243 transaction.add(self._datafile, data_end)
3253 3244 end = rev * self.index.entry_size
3254 3245 else:
3255 3246 end = data_end + (rev * self.index.entry_size)
3256 3247
3257 3248 if self._sidedatafile:
3258 3249 sidedata_end = self.sidedata_cut_off(rev)
3259 3250 transaction.add(self._sidedatafile, sidedata_end)
3260 3251
3261 3252 transaction.add(self._indexfile, end)
3262 3253 if self._docket is not None:
3263 3254 # XXX we could, leverage the docket while stripping. However it is
3264 3255 # not powerfull enough at the time of this comment
3265 3256 self._docket.index_end = end
3266 3257 self._docket.data_end = data_end
3267 3258 self._docket.sidedata_end = sidedata_end
3268 3259 self._docket.write(transaction, stripping=True)
3269 3260
3270 3261 # then reset internal state in memory to forget those revisions
3271 3262 self._revisioncache = None
3272 3263 self._chaininfocache = util.lrucachedict(500)
3273 3264 self._segmentfile.clear_cache()
3274 3265 self._segmentfile_sidedata.clear_cache()
3275 3266
3276 3267 del self.index[rev:-1]
3277 3268
3278 3269 def checksize(self):
3279 3270 """Check size of index and data files
3280 3271
3281 3272 return a (dd, di) tuple.
3282 3273 - dd: extra bytes for the "data" file
3283 3274 - di: extra bytes for the "index" file
3284 3275
3285 3276 A healthy revlog will return (0, 0).
3286 3277 """
3287 3278 expected = 0
3288 3279 if len(self):
3289 3280 expected = max(0, self.end(len(self) - 1))
3290 3281
3291 3282 try:
3292 3283 with self._datafp() as f:
3293 3284 f.seek(0, io.SEEK_END)
3294 3285 actual = f.tell()
3295 3286 dd = actual - expected
3296 3287 except FileNotFoundError:
3297 3288 dd = 0
3298 3289
3299 3290 try:
3300 3291 f = self.opener(self._indexfile)
3301 3292 f.seek(0, io.SEEK_END)
3302 3293 actual = f.tell()
3303 3294 f.close()
3304 3295 s = self.index.entry_size
3305 3296 i = max(0, actual // s)
3306 3297 di = actual - (i * s)
3307 3298 if self._inline:
3308 3299 databytes = 0
3309 3300 for r in self:
3310 3301 databytes += max(0, self.length(r))
3311 3302 dd = 0
3312 3303 di = actual - len(self) * s - databytes
3313 3304 except FileNotFoundError:
3314 3305 di = 0
3315 3306
3316 3307 return (dd, di)
3317 3308
3318 3309 def files(self):
3319 3310 """return list of files that compose this revlog"""
3320 3311 res = [self._indexfile]
3321 3312 if self._docket_file is None:
3322 3313 if not self._inline:
3323 3314 res.append(self._datafile)
3324 3315 else:
3325 3316 res.append(self._docket_file)
3326 3317 res.extend(self._docket.old_index_filepaths(include_empty=False))
3327 3318 if self._docket.data_end:
3328 3319 res.append(self._datafile)
3329 3320 res.extend(self._docket.old_data_filepaths(include_empty=False))
3330 3321 if self._docket.sidedata_end:
3331 3322 res.append(self._sidedatafile)
3332 3323 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3333 3324 return res
3334 3325
3335 3326 def emitrevisions(
3336 3327 self,
3337 3328 nodes,
3338 3329 nodesorder=None,
3339 3330 revisiondata=False,
3340 3331 assumehaveparentrevisions=False,
3341 3332 deltamode=repository.CG_DELTAMODE_STD,
3342 3333 sidedata_helpers=None,
3343 3334 debug_info=None,
3344 3335 ):
3345 3336 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3346 3337 raise error.ProgrammingError(
3347 3338 b'unhandled value for nodesorder: %s' % nodesorder
3348 3339 )
3349 3340
3350 3341 if nodesorder is None and not self.delta_config.general_delta:
3351 3342 nodesorder = b'storage'
3352 3343
3353 3344 if (
3354 3345 not self._storedeltachains
3355 3346 and deltamode != repository.CG_DELTAMODE_PREV
3356 3347 ):
3357 3348 deltamode = repository.CG_DELTAMODE_FULL
3358 3349
3359 3350 return storageutil.emitrevisions(
3360 3351 self,
3361 3352 nodes,
3362 3353 nodesorder,
3363 3354 revlogrevisiondelta,
3364 3355 deltaparentfn=self.deltaparent,
3365 3356 candeltafn=self._candelta,
3366 3357 rawsizefn=self.rawsize,
3367 3358 revdifffn=self.revdiff,
3368 3359 flagsfn=self.flags,
3369 3360 deltamode=deltamode,
3370 3361 revisiondata=revisiondata,
3371 3362 assumehaveparentrevisions=assumehaveparentrevisions,
3372 3363 sidedata_helpers=sidedata_helpers,
3373 3364 debug_info=debug_info,
3374 3365 )
3375 3366
3376 3367 DELTAREUSEALWAYS = b'always'
3377 3368 DELTAREUSESAMEREVS = b'samerevs'
3378 3369 DELTAREUSENEVER = b'never'
3379 3370
3380 3371 DELTAREUSEFULLADD = b'fulladd'
3381 3372
3382 3373 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3383 3374
3384 3375 def clone(
3385 3376 self,
3386 3377 tr,
3387 3378 destrevlog,
3388 3379 addrevisioncb=None,
3389 3380 deltareuse=DELTAREUSESAMEREVS,
3390 3381 forcedeltabothparents=None,
3391 3382 sidedata_helpers=None,
3392 3383 ):
3393 3384 """Copy this revlog to another, possibly with format changes.
3394 3385
3395 3386 The destination revlog will contain the same revisions and nodes.
3396 3387 However, it may not be bit-for-bit identical due to e.g. delta encoding
3397 3388 differences.
3398 3389
3399 3390 The ``deltareuse`` argument control how deltas from the existing revlog
3400 3391 are preserved in the destination revlog. The argument can have the
3401 3392 following values:
3402 3393
3403 3394 DELTAREUSEALWAYS
3404 3395 Deltas will always be reused (if possible), even if the destination
3405 3396 revlog would not select the same revisions for the delta. This is the
3406 3397 fastest mode of operation.
3407 3398 DELTAREUSESAMEREVS
3408 3399 Deltas will be reused if the destination revlog would pick the same
3409 3400 revisions for the delta. This mode strikes a balance between speed
3410 3401 and optimization.
3411 3402 DELTAREUSENEVER
3412 3403 Deltas will never be reused. This is the slowest mode of execution.
3413 3404 This mode can be used to recompute deltas (e.g. if the diff/delta
3414 3405 algorithm changes).
3415 3406 DELTAREUSEFULLADD
3416 3407 Revision will be re-added as if their were new content. This is
3417 3408 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3418 3409 eg: large file detection and handling.
3419 3410
3420 3411 Delta computation can be slow, so the choice of delta reuse policy can
3421 3412 significantly affect run time.
3422 3413
3423 3414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3424 3415 two extremes. Deltas will be reused if they are appropriate. But if the
3425 3416 delta could choose a better revision, it will do so. This means if you
3426 3417 are converting a non-generaldelta revlog to a generaldelta revlog,
3427 3418 deltas will be recomputed if the delta's parent isn't a parent of the
3428 3419 revision.
3429 3420
3430 3421 In addition to the delta policy, the ``forcedeltabothparents``
3431 3422 argument controls whether to force compute deltas against both parents
3432 3423 for merges. By default, the current default is used.
3433 3424
3434 3425 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3435 3426 `sidedata_helpers`.
3436 3427 """
3437 3428 if deltareuse not in self.DELTAREUSEALL:
3438 3429 raise ValueError(
3439 3430 _(b'value for deltareuse invalid: %s') % deltareuse
3440 3431 )
3441 3432
3442 3433 if len(destrevlog):
3443 3434 raise ValueError(_(b'destination revlog is not empty'))
3444 3435
3445 3436 if getattr(self, 'filteredrevs', None):
3446 3437 raise ValueError(_(b'source revlog has filtered revisions'))
3447 3438 if getattr(destrevlog, 'filteredrevs', None):
3448 3439 raise ValueError(_(b'destination revlog has filtered revisions'))
3449 3440
3450 3441 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3451 3442 # if possible.
3452 3443 old_delta_config = destrevlog.delta_config
3453 3444 destrevlog.delta_config = destrevlog.delta_config.copy()
3454 3445
3455 3446 try:
3456 3447 if deltareuse == self.DELTAREUSEALWAYS:
3457 3448 destrevlog.delta_config.lazy_delta_base = True
3458 3449 destrevlog.delta_config.lazy_delta = True
3459 3450 elif deltareuse == self.DELTAREUSESAMEREVS:
3460 3451 destrevlog.delta_config.lazy_delta_base = False
3461 3452 destrevlog.delta_config.lazy_delta = True
3462 3453 elif deltareuse == self.DELTAREUSENEVER:
3463 3454 destrevlog.delta_config.lazy_delta_base = False
3464 3455 destrevlog.delta_config.lazy_delta = False
3465 3456
3466 3457 delta_both_parents = (
3467 3458 forcedeltabothparents or old_delta_config.delta_both_parents
3468 3459 )
3469 3460 destrevlog.delta_config.delta_both_parents = delta_both_parents
3470 3461
3471 3462 with self.reading(), destrevlog._writing(tr):
3472 3463 self._clone(
3473 3464 tr,
3474 3465 destrevlog,
3475 3466 addrevisioncb,
3476 3467 deltareuse,
3477 3468 forcedeltabothparents,
3478 3469 sidedata_helpers,
3479 3470 )
3480 3471
3481 3472 finally:
3482 3473 destrevlog.delta_config = old_delta_config
3483 3474
3484 3475 def _clone(
3485 3476 self,
3486 3477 tr,
3487 3478 destrevlog,
3488 3479 addrevisioncb,
3489 3480 deltareuse,
3490 3481 forcedeltabothparents,
3491 3482 sidedata_helpers,
3492 3483 ):
3493 3484 """perform the core duty of `revlog.clone` after parameter processing"""
3494 3485 write_debug = None
3495 3486 if self.delta_config.debug_delta:
3496 3487 write_debug = tr._report
3497 3488 deltacomputer = deltautil.deltacomputer(
3498 3489 destrevlog,
3499 3490 write_debug=write_debug,
3500 3491 )
3501 3492 index = self.index
3502 3493 for rev in self:
3503 3494 entry = index[rev]
3504 3495
3505 3496 # Some classes override linkrev to take filtered revs into
3506 3497 # account. Use raw entry from index.
3507 3498 flags = entry[0] & 0xFFFF
3508 3499 linkrev = entry[4]
3509 3500 p1 = index[entry[5]][7]
3510 3501 p2 = index[entry[6]][7]
3511 3502 node = entry[7]
3512 3503
3513 3504 # (Possibly) reuse the delta from the revlog if allowed and
3514 3505 # the revlog chunk is a delta.
3515 3506 cachedelta = None
3516 3507 rawtext = None
3517 3508 if deltareuse == self.DELTAREUSEFULLADD:
3518 3509 text = self._revisiondata(rev)
3519 3510 sidedata = self.sidedata(rev)
3520 3511
3521 3512 if sidedata_helpers is not None:
3522 3513 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3523 3514 self, sidedata_helpers, sidedata, rev
3524 3515 )
3525 3516 flags = flags | new_flags[0] & ~new_flags[1]
3526 3517
3527 3518 destrevlog.addrevision(
3528 3519 text,
3529 3520 tr,
3530 3521 linkrev,
3531 3522 p1,
3532 3523 p2,
3533 3524 cachedelta=cachedelta,
3534 3525 node=node,
3535 3526 flags=flags,
3536 3527 deltacomputer=deltacomputer,
3537 3528 sidedata=sidedata,
3538 3529 )
3539 3530 else:
3540 3531 if destrevlog.delta_config.lazy_delta:
3541 3532 dp = self.deltaparent(rev)
3542 3533 if dp != nullrev:
3543 3534 cachedelta = (dp, bytes(self._chunk(rev)))
3544 3535
3545 3536 sidedata = None
3546 3537 if not cachedelta:
3547 3538 try:
3548 3539 rawtext = self._revisiondata(rev)
3549 3540 except error.CensoredNodeError as censored:
3550 3541 assert flags & REVIDX_ISCENSORED
3551 3542 rawtext = censored.tombstone
3552 3543 sidedata = self.sidedata(rev)
3553 3544 if sidedata is None:
3554 3545 sidedata = self.sidedata(rev)
3555 3546
3556 3547 if sidedata_helpers is not None:
3557 3548 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3558 3549 self, sidedata_helpers, sidedata, rev
3559 3550 )
3560 3551 flags = flags | new_flags[0] & ~new_flags[1]
3561 3552
3562 3553 destrevlog._addrevision(
3563 3554 node,
3564 3555 rawtext,
3565 3556 tr,
3566 3557 linkrev,
3567 3558 p1,
3568 3559 p2,
3569 3560 flags,
3570 3561 cachedelta,
3571 3562 deltacomputer=deltacomputer,
3572 3563 sidedata=sidedata,
3573 3564 )
3574 3565
3575 3566 if addrevisioncb:
3576 3567 addrevisioncb(self, rev, node)
3577 3568
3578 3569 def censorrevision(self, tr, censornode, tombstone=b''):
3579 3570 if self._format_version == REVLOGV0:
3580 3571 raise error.RevlogError(
3581 3572 _(b'cannot censor with version %d revlogs')
3582 3573 % self._format_version
3583 3574 )
3584 3575 elif self._format_version == REVLOGV1:
3585 3576 rewrite.v1_censor(self, tr, censornode, tombstone)
3586 3577 else:
3587 3578 rewrite.v2_censor(self, tr, censornode, tombstone)
3588 3579
3589 3580 def verifyintegrity(self, state):
3590 3581 """Verifies the integrity of the revlog.
3591 3582
3592 3583 Yields ``revlogproblem`` instances describing problems that are
3593 3584 found.
3594 3585 """
3595 3586 dd, di = self.checksize()
3596 3587 if dd:
3597 3588 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3598 3589 if di:
3599 3590 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3600 3591
3601 3592 version = self._format_version
3602 3593
3603 3594 # The verifier tells us what version revlog we should be.
3604 3595 if version != state[b'expectedversion']:
3605 3596 yield revlogproblem(
3606 3597 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3607 3598 % (self.display_id, version, state[b'expectedversion'])
3608 3599 )
3609 3600
3610 3601 state[b'skipread'] = set()
3611 3602 state[b'safe_renamed'] = set()
3612 3603
3613 3604 for rev in self:
3614 3605 node = self.node(rev)
3615 3606
3616 3607 # Verify contents. 4 cases to care about:
3617 3608 #
3618 3609 # common: the most common case
3619 3610 # rename: with a rename
3620 3611 # meta: file content starts with b'\1\n', the metadata
3621 3612 # header defined in filelog.py, but without a rename
3622 3613 # ext: content stored externally
3623 3614 #
3624 3615 # More formally, their differences are shown below:
3625 3616 #
3626 3617 # | common | rename | meta | ext
3627 3618 # -------------------------------------------------------
3628 3619 # flags() | 0 | 0 | 0 | not 0
3629 3620 # renamed() | False | True | False | ?
3630 3621 # rawtext[0:2]=='\1\n'| False | True | True | ?
3631 3622 #
3632 3623 # "rawtext" means the raw text stored in revlog data, which
3633 3624 # could be retrieved by "rawdata(rev)". "text"
3634 3625 # mentioned below is "revision(rev)".
3635 3626 #
3636 3627 # There are 3 different lengths stored physically:
3637 3628 # 1. L1: rawsize, stored in revlog index
3638 3629 # 2. L2: len(rawtext), stored in revlog data
3639 3630 # 3. L3: len(text), stored in revlog data if flags==0, or
3640 3631 # possibly somewhere else if flags!=0
3641 3632 #
3642 3633 # L1 should be equal to L2. L3 could be different from them.
3643 3634 # "text" may or may not affect commit hash depending on flag
3644 3635 # processors (see flagutil.addflagprocessor).
3645 3636 #
3646 3637 # | common | rename | meta | ext
3647 3638 # -------------------------------------------------
3648 3639 # rawsize() | L1 | L1 | L1 | L1
3649 3640 # size() | L1 | L2-LM | L1(*) | L1 (?)
3650 3641 # len(rawtext) | L2 | L2 | L2 | L2
3651 3642 # len(text) | L2 | L2 | L2 | L3
3652 3643 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3653 3644 #
3654 3645 # LM: length of metadata, depending on rawtext
3655 3646 # (*): not ideal, see comment in filelog.size
3656 3647 # (?): could be "- len(meta)" if the resolved content has
3657 3648 # rename metadata
3658 3649 #
3659 3650 # Checks needed to be done:
3660 3651 # 1. length check: L1 == L2, in all cases.
3661 3652 # 2. hash check: depending on flag processor, we may need to
3662 3653 # use either "text" (external), or "rawtext" (in revlog).
3663 3654
3664 3655 try:
3665 3656 skipflags = state.get(b'skipflags', 0)
3666 3657 if skipflags:
3667 3658 skipflags &= self.flags(rev)
3668 3659
3669 3660 _verify_revision(self, skipflags, state, node)
3670 3661
3671 3662 l1 = self.rawsize(rev)
3672 3663 l2 = len(self.rawdata(node))
3673 3664
3674 3665 if l1 != l2:
3675 3666 yield revlogproblem(
3676 3667 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3677 3668 node=node,
3678 3669 )
3679 3670
3680 3671 except error.CensoredNodeError:
3681 3672 if state[b'erroroncensored']:
3682 3673 yield revlogproblem(
3683 3674 error=_(b'censored file data'), node=node
3684 3675 )
3685 3676 state[b'skipread'].add(node)
3686 3677 except Exception as e:
3687 3678 yield revlogproblem(
3688 3679 error=_(b'unpacking %s: %s')
3689 3680 % (short(node), stringutil.forcebytestr(e)),
3690 3681 node=node,
3691 3682 )
3692 3683 state[b'skipread'].add(node)
3693 3684
3694 3685 def storageinfo(
3695 3686 self,
3696 3687 exclusivefiles=False,
3697 3688 sharedfiles=False,
3698 3689 revisionscount=False,
3699 3690 trackedsize=False,
3700 3691 storedsize=False,
3701 3692 ):
3702 3693 d = {}
3703 3694
3704 3695 if exclusivefiles:
3705 3696 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3706 3697 if not self._inline:
3707 3698 d[b'exclusivefiles'].append((self.opener, self._datafile))
3708 3699
3709 3700 if sharedfiles:
3710 3701 d[b'sharedfiles'] = []
3711 3702
3712 3703 if revisionscount:
3713 3704 d[b'revisionscount'] = len(self)
3714 3705
3715 3706 if trackedsize:
3716 3707 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3717 3708
3718 3709 if storedsize:
3719 3710 d[b'storedsize'] = sum(
3720 3711 self.opener.stat(path).st_size for path in self.files()
3721 3712 )
3722 3713
3723 3714 return d
3724 3715
3725 3716 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3726 3717 if not self.feature_config.has_side_data:
3727 3718 return
3728 3719 # revlog formats with sidedata support does not support inline
3729 3720 assert not self._inline
3730 3721 if not helpers[1] and not helpers[2]:
3731 3722 # Nothing to generate or remove
3732 3723 return
3733 3724
3734 3725 new_entries = []
3735 3726 # append the new sidedata
3736 3727 with self._writing(transaction):
3737 3728 ifh, dfh, sdfh = self._writinghandles
3738 3729 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3739 3730
3740 3731 current_offset = sdfh.tell()
3741 3732 for rev in range(startrev, endrev + 1):
3742 3733 entry = self.index[rev]
3743 3734 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3744 3735 store=self,
3745 3736 sidedata_helpers=helpers,
3746 3737 sidedata={},
3747 3738 rev=rev,
3748 3739 )
3749 3740
3750 3741 serialized_sidedata = sidedatautil.serialize_sidedata(
3751 3742 new_sidedata
3752 3743 )
3753 3744
3754 3745 sidedata_compression_mode = COMP_MODE_INLINE
3755 3746 if serialized_sidedata and self.feature_config.has_side_data:
3756 3747 sidedata_compression_mode = COMP_MODE_PLAIN
3757 3748 h, comp_sidedata = self.compress(serialized_sidedata)
3758 3749 if (
3759 3750 h != b'u'
3760 3751 and comp_sidedata[0] != b'\0'
3761 3752 and len(comp_sidedata) < len(serialized_sidedata)
3762 3753 ):
3763 3754 assert not h
3764 3755 if (
3765 3756 comp_sidedata[0]
3766 3757 == self._docket.default_compression_header
3767 3758 ):
3768 3759 sidedata_compression_mode = COMP_MODE_DEFAULT
3769 3760 serialized_sidedata = comp_sidedata
3770 3761 else:
3771 3762 sidedata_compression_mode = COMP_MODE_INLINE
3772 3763 serialized_sidedata = comp_sidedata
3773 3764 if entry[8] != 0 or entry[9] != 0:
3774 3765 # rewriting entries that already have sidedata is not
3775 3766 # supported yet, because it introduces garbage data in the
3776 3767 # revlog.
3777 3768 msg = b"rewriting existing sidedata is not supported yet"
3778 3769 raise error.Abort(msg)
3779 3770
3780 3771 # Apply (potential) flags to add and to remove after running
3781 3772 # the sidedata helpers
3782 3773 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3783 3774 entry_update = (
3784 3775 current_offset,
3785 3776 len(serialized_sidedata),
3786 3777 new_offset_flags,
3787 3778 sidedata_compression_mode,
3788 3779 )
3789 3780
3790 3781 # the sidedata computation might have move the file cursors around
3791 3782 sdfh.seek(current_offset, os.SEEK_SET)
3792 3783 sdfh.write(serialized_sidedata)
3793 3784 new_entries.append(entry_update)
3794 3785 current_offset += len(serialized_sidedata)
3795 3786 self._docket.sidedata_end = sdfh.tell()
3796 3787
3797 3788 # rewrite the new index entries
3798 3789 ifh.seek(startrev * self.index.entry_size)
3799 3790 for i, e in enumerate(new_entries):
3800 3791 rev = startrev + i
3801 3792 self.index.replace_sidedata_info(rev, *e)
3802 3793 packed = self.index.entry_binary(rev)
3803 3794 if rev == 0 and self._docket is None:
3804 3795 header = self._format_flags | self._format_version
3805 3796 header = self.index.pack_header(header)
3806 3797 packed = header + packed
3807 3798 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now