##// END OF EJS Templates
revlog: consolidate cache invalidation within the inner objet...
marmoute -
r51994:045b5f74 default
parent child Browse files
Show More
@@ -1,4041 +1,4042 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 393 # 3-tuple of (node, rev, text) for a raw revision.
394 394 self._revisioncache = None
395 395
396 396 @property
397 397 def index_file(self):
398 398 return self.__index_file
399 399
400 400 @index_file.setter
401 401 def index_file(self, new_index_file):
402 402 self.__index_file = new_index_file
403 403 if self.inline:
404 404 self._segmentfile.filename = new_index_file
405 405
406 406 def __len__(self):
407 407 return len(self.index)
408 408
409 def clear_cache(self):
410 self._revisioncache = None
411 self._segmentfile.clear_cache()
412 self._segmentfile_sidedata.clear_cache()
413
409 414 # Derived from index values.
410 415
411 416 def start(self, rev):
412 417 """the offset of the data chunk for this revision"""
413 418 return int(self.index[rev][0] >> 16)
414 419
415 420 def length(self, rev):
416 421 """the length of the data chunk for this revision"""
417 422 return self.index[rev][1]
418 423
419 424 def end(self, rev):
420 425 """the end of the data chunk for this revision"""
421 426 return self.start(rev) + self.length(rev)
422 427
423 428 def deltaparent(self, rev):
424 429 """return deltaparent of the given revision"""
425 430 base = self.index[rev][3]
426 431 if base == rev:
427 432 return nullrev
428 433 elif self.delta_config.general_delta:
429 434 return base
430 435 else:
431 436 return rev - 1
432 437
433 438 def issnapshot(self, rev):
434 439 """tells whether rev is a snapshot"""
435 440 if not self.delta_config.sparse_revlog:
436 441 return self.deltaparent(rev) == nullrev
437 442 elif hasattr(self.index, 'issnapshot'):
438 443 # directly assign the method to cache the testing and access
439 444 self.issnapshot = self.index.issnapshot
440 445 return self.issnapshot(rev)
441 446 if rev == nullrev:
442 447 return True
443 448 entry = self.index[rev]
444 449 base = entry[3]
445 450 if base == rev:
446 451 return True
447 452 if base == nullrev:
448 453 return True
449 454 p1 = entry[5]
450 455 while self.length(p1) == 0:
451 456 b = self.deltaparent(p1)
452 457 if b == p1:
453 458 break
454 459 p1 = b
455 460 p2 = entry[6]
456 461 while self.length(p2) == 0:
457 462 b = self.deltaparent(p2)
458 463 if b == p2:
459 464 break
460 465 p2 = b
461 466 if base == p1 or base == p2:
462 467 return False
463 468 return self.issnapshot(base)
464 469
465 470 def _deltachain(self, rev, stoprev=None):
466 471 """Obtain the delta chain for a revision.
467 472
468 473 ``stoprev`` specifies a revision to stop at. If not specified, we
469 474 stop at the base of the chain.
470 475
471 476 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
472 477 revs in ascending order and ``stopped`` is a bool indicating whether
473 478 ``stoprev`` was hit.
474 479 """
475 480 generaldelta = self.delta_config.general_delta
476 481 # Try C implementation.
477 482 try:
478 483 return self.index.deltachain(rev, stoprev, generaldelta)
479 484 except AttributeError:
480 485 pass
481 486
482 487 chain = []
483 488
484 489 # Alias to prevent attribute lookup in tight loop.
485 490 index = self.index
486 491
487 492 iterrev = rev
488 493 e = index[iterrev]
489 494 while iterrev != e[3] and iterrev != stoprev:
490 495 chain.append(iterrev)
491 496 if generaldelta:
492 497 iterrev = e[3]
493 498 else:
494 499 iterrev -= 1
495 500 e = index[iterrev]
496 501
497 502 if iterrev == stoprev:
498 503 stopped = True
499 504 else:
500 505 chain.append(iterrev)
501 506 stopped = False
502 507
503 508 chain.reverse()
504 509 return chain, stopped
505 510
506 511 @util.propertycache
507 512 def _compressor(self):
508 513 engine = util.compengines[self.feature_config.compression_engine]
509 514 return engine.revlogcompressor(
510 515 self.feature_config.compression_engine_options
511 516 )
512 517
513 518 @util.propertycache
514 519 def _decompressor(self):
515 520 """the default decompressor"""
516 521 if self._default_compression_header is None:
517 522 return None
518 523 t = self._default_compression_header
519 524 c = self._get_decompressor(t)
520 525 return c.decompress
521 526
522 527 def _get_decompressor(self, t):
523 528 try:
524 529 compressor = self._decompressors[t]
525 530 except KeyError:
526 531 try:
527 532 engine = util.compengines.forrevlogheader(t)
528 533 compressor = engine.revlogcompressor(
529 534 self.feature_config.compression_engine_options
530 535 )
531 536 self._decompressors[t] = compressor
532 537 except KeyError:
533 538 raise error.RevlogError(
534 539 _(b'unknown compression type %s') % binascii.hexlify(t)
535 540 )
536 541 return compressor
537 542
538 543 def compress(self, data):
539 544 """Generate a possibly-compressed representation of data."""
540 545 if not data:
541 546 return b'', data
542 547
543 548 compressed = self._compressor.compress(data)
544 549
545 550 if compressed:
546 551 # The revlog compressor added the header in the returned data.
547 552 return b'', compressed
548 553
549 554 if data[0:1] == b'\0':
550 555 return b'', data
551 556 return b'u', data
552 557
553 558 def decompress(self, data):
554 559 """Decompress a revlog chunk.
555 560
556 561 The chunk is expected to begin with a header identifying the
557 562 format type so it can be routed to an appropriate decompressor.
558 563 """
559 564 if not data:
560 565 return data
561 566
562 567 # Revlogs are read much more frequently than they are written and many
563 568 # chunks only take microseconds to decompress, so performance is
564 569 # important here.
565 570 #
566 571 # We can make a few assumptions about revlogs:
567 572 #
568 573 # 1) the majority of chunks will be compressed (as opposed to inline
569 574 # raw data).
570 575 # 2) decompressing *any* data will likely by at least 10x slower than
571 576 # returning raw inline data.
572 577 # 3) we want to prioritize common and officially supported compression
573 578 # engines
574 579 #
575 580 # It follows that we want to optimize for "decompress compressed data
576 581 # when encoded with common and officially supported compression engines"
577 582 # case over "raw data" and "data encoded by less common or non-official
578 583 # compression engines." That is why we have the inline lookup first
579 584 # followed by the compengines lookup.
580 585 #
581 586 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
582 587 # compressed chunks. And this matters for changelog and manifest reads.
583 588 t = data[0:1]
584 589
585 590 if t == b'x':
586 591 try:
587 592 return _zlibdecompress(data)
588 593 except zlib.error as e:
589 594 raise error.RevlogError(
590 595 _(b'revlog decompress error: %s')
591 596 % stringutil.forcebytestr(e)
592 597 )
593 598 # '\0' is more common than 'u' so it goes first.
594 599 elif t == b'\0':
595 600 return data
596 601 elif t == b'u':
597 602 return util.buffer(data, 1)
598 603
599 604 compressor = self._get_decompressor(t)
600 605
601 606 return compressor.decompress(data)
602 607
603 608 @contextlib.contextmanager
604 609 def reading(self):
605 610 """Context manager that keeps data and sidedata files open for reading"""
606 611 if len(self.index) == 0:
607 612 yield # nothing to be read
608 613 else:
609 614 with self._segmentfile.reading():
610 615 with self._segmentfile_sidedata.reading():
611 616 yield
612 617
613 618 @property
614 619 def is_writing(self):
615 620 """True is a writing context is open"""
616 621 return self._writinghandles is not None
617 622
618 623 @contextlib.contextmanager
619 624 def writing(self, transaction, data_end=None, sidedata_end=None):
620 625 """Open the revlog files for writing
621 626
622 627 Add content to a revlog should be done within such context.
623 628 """
624 629 if self.is_writing:
625 630 yield
626 631 else:
627 632 ifh = dfh = sdfh = None
628 633 try:
629 634 r = len(self.index)
630 635 # opening the data file.
631 636 dsize = 0
632 637 if r:
633 638 dsize = self.end(r - 1)
634 639 dfh = None
635 640 if not self.inline:
636 641 try:
637 642 dfh = self.opener(self.data_file, mode=b"r+")
638 643 if data_end is None:
639 644 dfh.seek(0, os.SEEK_END)
640 645 else:
641 646 dfh.seek(data_end, os.SEEK_SET)
642 647 except FileNotFoundError:
643 648 dfh = self.opener(self.data_file, mode=b"w+")
644 649 transaction.add(self.data_file, dsize)
645 650 if self.sidedata_file is not None:
646 651 assert sidedata_end is not None
647 652 # revlog-v2 does not inline, help Pytype
648 653 assert dfh is not None
649 654 try:
650 655 sdfh = self.opener(self.sidedata_file, mode=b"r+")
651 656 dfh.seek(sidedata_end, os.SEEK_SET)
652 657 except FileNotFoundError:
653 658 sdfh = self.opener(self.sidedata_file, mode=b"w+")
654 659 transaction.add(self.sidedata_file, sidedata_end)
655 660
656 661 # opening the index file.
657 662 isize = r * self.index.entry_size
658 663 ifh = self.__index_write_fp()
659 664 if self.inline:
660 665 transaction.add(self.index_file, dsize + isize)
661 666 else:
662 667 transaction.add(self.index_file, isize)
663 668 # exposing all file handle for writing.
664 669 self._writinghandles = (ifh, dfh, sdfh)
665 670 self._segmentfile.writing_handle = ifh if self.inline else dfh
666 671 self._segmentfile_sidedata.writing_handle = sdfh
667 672 yield
668 673 finally:
669 674 self._writinghandles = None
670 675 self._segmentfile.writing_handle = None
671 676 self._segmentfile_sidedata.writing_handle = None
672 677 if dfh is not None:
673 678 dfh.close()
674 679 if sdfh is not None:
675 680 sdfh.close()
676 681 # closing the index file last to avoid exposing referent to
677 682 # potential unflushed data content.
678 683 if ifh is not None:
679 684 ifh.close()
680 685
681 686 def __index_write_fp(self, index_end=None):
682 687 """internal method to open the index file for writing
683 688
684 689 You should not use this directly and use `_writing` instead
685 690 """
686 691 try:
687 692 f = self.opener(
688 693 self.index_file,
689 694 mode=b"r+",
690 695 checkambig=self.data_config.check_ambig,
691 696 )
692 697 if index_end is None:
693 698 f.seek(0, os.SEEK_END)
694 699 else:
695 700 f.seek(index_end, os.SEEK_SET)
696 701 return f
697 702 except FileNotFoundError:
698 703 return self.opener(
699 704 self.index_file,
700 705 mode=b"w+",
701 706 checkambig=self.data_config.check_ambig,
702 707 )
703 708
704 709 def __index_new_fp(self):
705 710 """internal method to create a new index file for writing
706 711
707 712 You should not use this unless you are upgrading from inline revlog
708 713 """
709 714 return self.opener(
710 715 self.index_file,
711 716 mode=b"w",
712 717 checkambig=self.data_config.check_ambig,
713 718 atomictemp=True,
714 719 )
715 720
716 721 def split_inline(self, tr, header, new_index_file_path=None):
717 722 """split the data of an inline revlog into an index and a data file"""
718 723 existing_handles = False
719 724 if self._writinghandles is not None:
720 725 existing_handles = True
721 726 fp = self._writinghandles[0]
722 727 fp.flush()
723 728 fp.close()
724 729 # We can't use the cached file handle after close(). So prevent
725 730 # its usage.
726 731 self._writinghandles = None
727 732 self._segmentfile.writing_handle = None
728 733 # No need to deal with sidedata writing handle as it is only
729 734 # relevant with revlog-v2 which is never inline, not reaching
730 735 # this code
731 736
732 737 new_dfh = self.opener(self.data_file, mode=b"w+")
733 738 new_dfh.truncate(0) # drop any potentially existing data
734 739 try:
735 740 with self.reading():
736 741 for r in range(len(self.index)):
737 742 new_dfh.write(self.get_segment_for_revs(r, r)[1])
738 743 new_dfh.flush()
739 744
740 745 if new_index_file_path is not None:
741 746 self.index_file = new_index_file_path
742 747 with self.__index_new_fp() as fp:
743 748 self.inline = False
744 749 for i in range(len(self.index)):
745 750 e = self.index.entry_binary(i)
746 751 if i == 0:
747 752 packed_header = self.index.pack_header(header)
748 753 e = packed_header + e
749 754 fp.write(e)
750 755
751 756 # If we don't use side-write, the temp file replace the real
752 757 # index when we exit the context manager
753 758
754 759 self._segmentfile = randomaccessfile.randomaccessfile(
755 760 self.opener,
756 761 self.data_file,
757 762 self.data_config.chunk_cache_size,
758 763 )
759 764
760 765 if existing_handles:
761 766 # switched from inline to conventional reopen the index
762 767 ifh = self.__index_write_fp()
763 768 self._writinghandles = (ifh, new_dfh, None)
764 769 self._segmentfile.writing_handle = new_dfh
765 770 new_dfh = None
766 771 # No need to deal with sidedata writing handle as it is only
767 772 # relevant with revlog-v2 which is never inline, not reaching
768 773 # this code
769 774 finally:
770 775 if new_dfh is not None:
771 776 new_dfh.close()
772 777 return self.index_file
773 778
774 779 def get_segment_for_revs(self, startrev, endrev):
775 780 """Obtain a segment of raw data corresponding to a range of revisions.
776 781
777 782 Accepts the start and end revisions and an optional already-open
778 783 file handle to be used for reading. If the file handle is read, its
779 784 seek position will not be preserved.
780 785
781 786 Requests for data may be satisfied by a cache.
782 787
783 788 Returns a 2-tuple of (offset, data) for the requested range of
784 789 revisions. Offset is the integer offset from the beginning of the
785 790 revlog and data is a str or buffer of the raw byte data.
786 791
787 792 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
788 793 to determine where each revision's data begins and ends.
789 794
790 795 API: we should consider making this a private part of the InnerRevlog
791 796 at some point.
792 797 """
793 798 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
794 799 # (functions are expensive).
795 800 index = self.index
796 801 istart = index[startrev]
797 802 start = int(istart[0] >> 16)
798 803 if startrev == endrev:
799 804 end = start + istart[1]
800 805 else:
801 806 iend = index[endrev]
802 807 end = int(iend[0] >> 16) + iend[1]
803 808
804 809 if self.inline:
805 810 start += (startrev + 1) * self.index.entry_size
806 811 end += (endrev + 1) * self.index.entry_size
807 812 length = end - start
808 813
809 814 return start, self._segmentfile.read_chunk(start, length)
810 815
811 816 def _chunk(self, rev):
812 817 """Obtain a single decompressed chunk for a revision.
813 818
814 819 Accepts an integer revision and an optional already-open file handle
815 820 to be used for reading. If used, the seek position of the file will not
816 821 be preserved.
817 822
818 823 Returns a str holding uncompressed data for the requested revision.
819 824 """
820 825 compression_mode = self.index[rev][10]
821 826 data = self.get_segment_for_revs(rev, rev)[1]
822 827 if compression_mode == COMP_MODE_PLAIN:
823 828 return data
824 829 elif compression_mode == COMP_MODE_DEFAULT:
825 830 return self._decompressor(data)
826 831 elif compression_mode == COMP_MODE_INLINE:
827 832 return self.decompress(data)
828 833 else:
829 834 msg = b'unknown compression mode %d'
830 835 msg %= compression_mode
831 836 raise error.RevlogError(msg)
832 837
833 838 def _chunks(self, revs, targetsize=None):
834 839 """Obtain decompressed chunks for the specified revisions.
835 840
836 841 Accepts an iterable of numeric revisions that are assumed to be in
837 842 ascending order. Also accepts an optional already-open file handle
838 843 to be used for reading. If used, the seek position of the file will
839 844 not be preserved.
840 845
841 846 This function is similar to calling ``self._chunk()`` multiple times,
842 847 but is faster.
843 848
844 849 Returns a list with decompressed data for each requested revision.
845 850 """
846 851 if not revs:
847 852 return []
848 853 start = self.start
849 854 length = self.length
850 855 inline = self.inline
851 856 iosize = self.index.entry_size
852 857 buffer = util.buffer
853 858
854 859 l = []
855 860 ladd = l.append
856 861
857 862 if not self.data_config.with_sparse_read:
858 863 slicedchunks = (revs,)
859 864 else:
860 865 slicedchunks = deltautil.slicechunk(
861 866 self,
862 867 revs,
863 868 targetsize=targetsize,
864 869 )
865 870
866 871 for revschunk in slicedchunks:
867 872 firstrev = revschunk[0]
868 873 # Skip trailing revisions with empty diff
869 874 for lastrev in revschunk[::-1]:
870 875 if length(lastrev) != 0:
871 876 break
872 877
873 878 try:
874 879 offset, data = self.get_segment_for_revs(firstrev, lastrev)
875 880 except OverflowError:
876 881 # issue4215 - we can't cache a run of chunks greater than
877 882 # 2G on Windows
878 883 return [self._chunk(rev) for rev in revschunk]
879 884
880 885 decomp = self.decompress
881 886 # self._decompressor might be None, but will not be used in that case
882 887 def_decomp = self._decompressor
883 888 for rev in revschunk:
884 889 chunkstart = start(rev)
885 890 if inline:
886 891 chunkstart += (rev + 1) * iosize
887 892 chunklength = length(rev)
888 893 comp_mode = self.index[rev][10]
889 894 c = buffer(data, chunkstart - offset, chunklength)
890 895 if comp_mode == COMP_MODE_PLAIN:
891 896 ladd(c)
892 897 elif comp_mode == COMP_MODE_INLINE:
893 898 ladd(decomp(c))
894 899 elif comp_mode == COMP_MODE_DEFAULT:
895 900 ladd(def_decomp(c))
896 901 else:
897 902 msg = b'unknown compression mode %d'
898 903 msg %= comp_mode
899 904 raise error.RevlogError(msg)
900 905
901 906 return l
902 907
903 908 def raw_text(self, node, rev):
904 909 """return the possibly unvalidated rawtext for a revision
905 910
906 911 returns (rev, rawtext, validated)
907 912 """
908 913
909 914 # revision in the cache (could be useful to apply delta)
910 915 cachedrev = None
911 916 # An intermediate text to apply deltas to
912 917 basetext = None
913 918
914 919 # Check if we have the entry in cache
915 920 # The cache entry looks like (node, rev, rawtext)
916 921 if self._revisioncache:
917 922 cachedrev = self._revisioncache[1]
918 923
919 924 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
920 925 if stopped:
921 926 basetext = self._revisioncache[2]
922 927
923 928 # drop cache to save memory, the caller is expected to
924 929 # update self._inner._revisioncache after validating the text
925 930 self._revisioncache = None
926 931
927 932 targetsize = None
928 933 rawsize = self.index[rev][2]
929 934 if 0 <= rawsize:
930 935 targetsize = 4 * rawsize
931 936
932 937 bins = self._chunks(chain, targetsize=targetsize)
933 938 if basetext is None:
934 939 basetext = bytes(bins[0])
935 940 bins = bins[1:]
936 941
937 942 rawtext = mdiff.patches(basetext, bins)
938 943 del basetext # let us have a chance to free memory early
939 944 return (rev, rawtext, False)
940 945
941 946 def sidedata(self, rev, sidedata_end):
942 947 """Return the sidedata for a given revision number."""
943 948 index_entry = self.index[rev]
944 949 sidedata_offset = index_entry[8]
945 950 sidedata_size = index_entry[9]
946 951
947 952 if self.inline:
948 953 sidedata_offset += self.index.entry_size * (1 + rev)
949 954 if sidedata_size == 0:
950 955 return {}
951 956
952 957 if sidedata_end < sidedata_offset + sidedata_size:
953 958 filename = self.sidedata_file
954 959 end = sidedata_end
955 960 offset = sidedata_offset
956 961 length = sidedata_size
957 962 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
958 963 raise error.RevlogError(m)
959 964
960 965 comp_segment = self._segmentfile_sidedata.read_chunk(
961 966 sidedata_offset, sidedata_size
962 967 )
963 968
964 969 comp = self.index[rev][11]
965 970 if comp == COMP_MODE_PLAIN:
966 971 segment = comp_segment
967 972 elif comp == COMP_MODE_DEFAULT:
968 973 segment = self._decompressor(comp_segment)
969 974 elif comp == COMP_MODE_INLINE:
970 975 segment = self.decompress(comp_segment)
971 976 else:
972 977 msg = b'unknown compression mode %d'
973 978 msg %= comp
974 979 raise error.RevlogError(msg)
975 980
976 981 sidedata = sidedatautil.deserialize_sidedata(segment)
977 982 return sidedata
978 983
979 984 def write_entry(
980 985 self,
981 986 transaction,
982 987 entry,
983 988 data,
984 989 link,
985 990 offset,
986 991 sidedata,
987 992 sidedata_offset,
988 993 index_end,
989 994 data_end,
990 995 sidedata_end,
991 996 ):
992 997 # Files opened in a+ mode have inconsistent behavior on various
993 998 # platforms. Windows requires that a file positioning call be made
994 999 # when the file handle transitions between reads and writes. See
995 1000 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
996 1001 # platforms, Python or the platform itself can be buggy. Some versions
997 1002 # of Solaris have been observed to not append at the end of the file
998 1003 # if the file was seeked to before the end. See issue4943 for more.
999 1004 #
1000 1005 # We work around this issue by inserting a seek() before writing.
1001 1006 # Note: This is likely not necessary on Python 3. However, because
1002 1007 # the file handle is reused for reads and may be seeked there, we need
1003 1008 # to be careful before changing this.
1004 1009 if self._writinghandles is None:
1005 1010 msg = b'adding revision outside `revlog._writing` context'
1006 1011 raise error.ProgrammingError(msg)
1007 1012 ifh, dfh, sdfh = self._writinghandles
1008 1013 if index_end is None:
1009 1014 ifh.seek(0, os.SEEK_END)
1010 1015 else:
1011 1016 ifh.seek(index_end, os.SEEK_SET)
1012 1017 if dfh:
1013 1018 if data_end is None:
1014 1019 dfh.seek(0, os.SEEK_END)
1015 1020 else:
1016 1021 dfh.seek(data_end, os.SEEK_SET)
1017 1022 if sdfh:
1018 1023 sdfh.seek(sidedata_end, os.SEEK_SET)
1019 1024
1020 1025 curr = len(self.index) - 1
1021 1026 if not self.inline:
1022 1027 transaction.add(self.data_file, offset)
1023 1028 if self.sidedata_file:
1024 1029 transaction.add(self.sidedata_file, sidedata_offset)
1025 1030 transaction.add(self.index_file, curr * len(entry))
1026 1031 if data[0]:
1027 1032 dfh.write(data[0])
1028 1033 dfh.write(data[1])
1029 1034 if sidedata:
1030 1035 sdfh.write(sidedata)
1031 1036 ifh.write(entry)
1032 1037 else:
1033 1038 offset += curr * self.index.entry_size
1034 1039 transaction.add(self.index_file, offset)
1035 1040 ifh.write(entry)
1036 1041 ifh.write(data[0])
1037 1042 ifh.write(data[1])
1038 1043 assert not sidedata
1039 1044 return (
1040 1045 ifh.tell(),
1041 1046 dfh.tell() if dfh else None,
1042 1047 sdfh.tell() if sdfh else None,
1043 1048 )
1044 1049
1045 1050
1046 1051 class revlog:
1047 1052 """
1048 1053 the underlying revision storage object
1049 1054
1050 1055 A revlog consists of two parts, an index and the revision data.
1051 1056
1052 1057 The index is a file with a fixed record size containing
1053 1058 information on each revision, including its nodeid (hash), the
1054 1059 nodeids of its parents, the position and offset of its data within
1055 1060 the data file, and the revision it's based on. Finally, each entry
1056 1061 contains a linkrev entry that can serve as a pointer to external
1057 1062 data.
1058 1063
1059 1064 The revision data itself is a linear collection of data chunks.
1060 1065 Each chunk represents a revision and is usually represented as a
1061 1066 delta against the previous chunk. To bound lookup time, runs of
1062 1067 deltas are limited to about 2 times the length of the original
1063 1068 version data. This makes retrieval of a version proportional to
1064 1069 its size, or O(1) relative to the number of revisions.
1065 1070
1066 1071 Both pieces of the revlog are written to in an append-only
1067 1072 fashion, which means we never need to rewrite a file to insert or
1068 1073 remove data, and can use some simple techniques to avoid the need
1069 1074 for locking while reading.
1070 1075
1071 1076 If checkambig, indexfile is opened with checkambig=True at
1072 1077 writing, to avoid file stat ambiguity.
1073 1078
1074 1079 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1075 1080 index will be mmapped rather than read if it is larger than the
1076 1081 configured threshold.
1077 1082
1078 1083 If censorable is True, the revlog can have censored revisions.
1079 1084
1080 1085 If `upperboundcomp` is not None, this is the expected maximal gain from
1081 1086 compression for the data content.
1082 1087
1083 1088 `concurrencychecker` is an optional function that receives 3 arguments: a
1084 1089 file handle, a filename, and an expected position. It should check whether
1085 1090 the current position in the file handle is valid, and log/warn/fail (by
1086 1091 raising).
1087 1092
1088 1093 See mercurial/revlogutils/contants.py for details about the content of an
1089 1094 index entry.
1090 1095 """
1091 1096
1092 1097 _flagserrorclass = error.RevlogError
1093 1098
1094 1099 @staticmethod
1095 1100 def is_inline_index(header_bytes):
1096 1101 """Determine if a revlog is inline from the initial bytes of the index"""
1097 1102 header = INDEX_HEADER.unpack(header_bytes)[0]
1098 1103
1099 1104 _format_flags = header & ~0xFFFF
1100 1105 _format_version = header & 0xFFFF
1101 1106
1102 1107 features = FEATURES_BY_VERSION[_format_version]
1103 1108 return features[b'inline'](_format_flags)
1104 1109
1105 1110 def __init__(
1106 1111 self,
1107 1112 opener,
1108 1113 target,
1109 1114 radix,
1110 1115 postfix=None, # only exist for `tmpcensored` now
1111 1116 checkambig=False,
1112 1117 mmaplargeindex=False,
1113 1118 censorable=False,
1114 1119 upperboundcomp=None,
1115 1120 persistentnodemap=False,
1116 1121 concurrencychecker=None,
1117 1122 trypending=False,
1118 1123 try_split=False,
1119 1124 canonical_parent_order=True,
1120 1125 ):
1121 1126 """
1122 1127 create a revlog object
1123 1128
1124 1129 opener is a function that abstracts the file opening operation
1125 1130 and can be used to implement COW semantics or the like.
1126 1131
1127 1132 `target`: a (KIND, ID) tuple that identify the content stored in
1128 1133 this revlog. It help the rest of the code to understand what the revlog
1129 1134 is about without having to resort to heuristic and index filename
1130 1135 analysis. Note: that this must be reliably be set by normal code, but
1131 1136 that test, debug, or performance measurement code might not set this to
1132 1137 accurate value.
1133 1138 """
1134 1139
1135 1140 self.radix = radix
1136 1141
1137 1142 self._docket_file = None
1138 1143 self._indexfile = None
1139 1144 self._datafile = None
1140 1145 self._sidedatafile = None
1141 1146 self._nodemap_file = None
1142 1147 self.postfix = postfix
1143 1148 self._trypending = trypending
1144 1149 self._try_split = try_split
1145 1150 self.opener = opener
1146 1151 if persistentnodemap:
1147 1152 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1148 1153
1149 1154 assert target[0] in ALL_KINDS
1150 1155 assert len(target) == 2
1151 1156 self.target = target
1152 1157 if b'feature-config' in self.opener.options:
1153 1158 self.feature_config = self.opener.options[b'feature-config'].copy()
1154 1159 else:
1155 1160 self.feature_config = FeatureConfig()
1156 1161 self.feature_config.censorable = censorable
1157 1162 self.feature_config.canonical_parent_order = canonical_parent_order
1158 1163 if b'data-config' in self.opener.options:
1159 1164 self.data_config = self.opener.options[b'data-config'].copy()
1160 1165 else:
1161 1166 self.data_config = DataConfig()
1162 1167 self.data_config.check_ambig = checkambig
1163 1168 self.data_config.mmap_large_index = mmaplargeindex
1164 1169 if b'delta-config' in self.opener.options:
1165 1170 self.delta_config = self.opener.options[b'delta-config'].copy()
1166 1171 else:
1167 1172 self.delta_config = DeltaConfig()
1168 1173 self.delta_config.upper_bound_comp = upperboundcomp
1169 1174
1170 1175 # Maps rev to chain base rev.
1171 1176 self._chainbasecache = util.lrucachedict(100)
1172 1177
1173 1178 self.index = None
1174 1179 self._docket = None
1175 1180 self._nodemap_docket = None
1176 1181 # Mapping of partial identifiers to full nodes.
1177 1182 self._pcache = {}
1178 1183
1179 1184 # other optionnals features
1180 1185
1181 1186 # Make copy of flag processors so each revlog instance can support
1182 1187 # custom flags.
1183 1188 self._flagprocessors = dict(flagutil.flagprocessors)
1184 1189 # prevent nesting of addgroup
1185 1190 self._adding_group = None
1186 1191
1187 1192 chunk_cache = self._loadindex()
1188 1193 self._load_inner(chunk_cache)
1189 1194 self._concurrencychecker = concurrencychecker
1190 1195
1191 1196 @property
1192 1197 def _generaldelta(self):
1193 1198 """temporary compatibility proxy"""
1194 1199 util.nouideprecwarn(
1195 1200 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1196 1201 )
1197 1202 return self.delta_config.general_delta
1198 1203
1199 1204 @property
1200 1205 def _checkambig(self):
1201 1206 """temporary compatibility proxy"""
1202 1207 util.nouideprecwarn(
1203 1208 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1204 1209 )
1205 1210 return self.data_config.check_ambig
1206 1211
1207 1212 @property
1208 1213 def _mmaplargeindex(self):
1209 1214 """temporary compatibility proxy"""
1210 1215 util.nouideprecwarn(
1211 1216 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1212 1217 )
1213 1218 return self.data_config.mmap_large_index
1214 1219
1215 1220 @property
1216 1221 def _censorable(self):
1217 1222 """temporary compatibility proxy"""
1218 1223 util.nouideprecwarn(
1219 1224 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1220 1225 )
1221 1226 return self.feature_config.censorable
1222 1227
1223 1228 @property
1224 1229 def _chunkcachesize(self):
1225 1230 """temporary compatibility proxy"""
1226 1231 util.nouideprecwarn(
1227 1232 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1228 1233 )
1229 1234 return self.data_config.chunk_cache_size
1230 1235
1231 1236 @property
1232 1237 def _maxchainlen(self):
1233 1238 """temporary compatibility proxy"""
1234 1239 util.nouideprecwarn(
1235 1240 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1236 1241 )
1237 1242 return self.delta_config.max_chain_len
1238 1243
1239 1244 @property
1240 1245 def _deltabothparents(self):
1241 1246 """temporary compatibility proxy"""
1242 1247 util.nouideprecwarn(
1243 1248 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1244 1249 )
1245 1250 return self.delta_config.delta_both_parents
1246 1251
1247 1252 @property
1248 1253 def _candidate_group_chunk_size(self):
1249 1254 """temporary compatibility proxy"""
1250 1255 util.nouideprecwarn(
1251 1256 b"use revlog.delta_config.candidate_group_chunk_size",
1252 1257 b"6.6",
1253 1258 stacklevel=2,
1254 1259 )
1255 1260 return self.delta_config.candidate_group_chunk_size
1256 1261
1257 1262 @property
1258 1263 def _debug_delta(self):
1259 1264 """temporary compatibility proxy"""
1260 1265 util.nouideprecwarn(
1261 1266 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1262 1267 )
1263 1268 return self.delta_config.debug_delta
1264 1269
1265 1270 @property
1266 1271 def _compengine(self):
1267 1272 """temporary compatibility proxy"""
1268 1273 util.nouideprecwarn(
1269 1274 b"use revlog.feature_config.compression_engine",
1270 1275 b"6.6",
1271 1276 stacklevel=2,
1272 1277 )
1273 1278 return self.feature_config.compression_engine
1274 1279
1275 1280 @property
1276 1281 def upperboundcomp(self):
1277 1282 """temporary compatibility proxy"""
1278 1283 util.nouideprecwarn(
1279 1284 b"use revlog.delta_config.upper_bound_comp",
1280 1285 b"6.6",
1281 1286 stacklevel=2,
1282 1287 )
1283 1288 return self.delta_config.upper_bound_comp
1284 1289
1285 1290 @property
1286 1291 def _compengineopts(self):
1287 1292 """temporary compatibility proxy"""
1288 1293 util.nouideprecwarn(
1289 1294 b"use revlog.feature_config.compression_engine_options",
1290 1295 b"6.6",
1291 1296 stacklevel=2,
1292 1297 )
1293 1298 return self.feature_config.compression_engine_options
1294 1299
1295 1300 @property
1296 1301 def _maxdeltachainspan(self):
1297 1302 """temporary compatibility proxy"""
1298 1303 util.nouideprecwarn(
1299 1304 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1300 1305 )
1301 1306 return self.delta_config.max_deltachain_span
1302 1307
1303 1308 @property
1304 1309 def _withsparseread(self):
1305 1310 """temporary compatibility proxy"""
1306 1311 util.nouideprecwarn(
1307 1312 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1308 1313 )
1309 1314 return self.data_config.with_sparse_read
1310 1315
1311 1316 @property
1312 1317 def _sparserevlog(self):
1313 1318 """temporary compatibility proxy"""
1314 1319 util.nouideprecwarn(
1315 1320 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1316 1321 )
1317 1322 return self.delta_config.sparse_revlog
1318 1323
1319 1324 @property
1320 1325 def hassidedata(self):
1321 1326 """temporary compatibility proxy"""
1322 1327 util.nouideprecwarn(
1323 1328 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1324 1329 )
1325 1330 return self.feature_config.has_side_data
1326 1331
1327 1332 @property
1328 1333 def _srdensitythreshold(self):
1329 1334 """temporary compatibility proxy"""
1330 1335 util.nouideprecwarn(
1331 1336 b"use revlog.data_config.sr_density_threshold",
1332 1337 b"6.6",
1333 1338 stacklevel=2,
1334 1339 )
1335 1340 return self.data_config.sr_density_threshold
1336 1341
1337 1342 @property
1338 1343 def _srmingapsize(self):
1339 1344 """temporary compatibility proxy"""
1340 1345 util.nouideprecwarn(
1341 1346 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1342 1347 )
1343 1348 return self.data_config.sr_min_gap_size
1344 1349
1345 1350 @property
1346 1351 def _compute_rank(self):
1347 1352 """temporary compatibility proxy"""
1348 1353 util.nouideprecwarn(
1349 1354 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1350 1355 )
1351 1356 return self.feature_config.compute_rank
1352 1357
1353 1358 @property
1354 1359 def canonical_parent_order(self):
1355 1360 """temporary compatibility proxy"""
1356 1361 util.nouideprecwarn(
1357 1362 b"use revlog.feature_config.canonical_parent_order",
1358 1363 b"6.6",
1359 1364 stacklevel=2,
1360 1365 )
1361 1366 return self.feature_config.canonical_parent_order
1362 1367
1363 1368 @property
1364 1369 def _lazydelta(self):
1365 1370 """temporary compatibility proxy"""
1366 1371 util.nouideprecwarn(
1367 1372 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1368 1373 )
1369 1374 return self.delta_config.lazy_delta
1370 1375
1371 1376 @property
1372 1377 def _lazydeltabase(self):
1373 1378 """temporary compatibility proxy"""
1374 1379 util.nouideprecwarn(
1375 1380 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1376 1381 )
1377 1382 return self.delta_config.lazy_delta_base
1378 1383
1379 1384 def _init_opts(self):
1380 1385 """process options (from above/config) to setup associated default revlog mode
1381 1386
1382 1387 These values might be affected when actually reading on disk information.
1383 1388
1384 1389 The relevant values are returned for use in _loadindex().
1385 1390
1386 1391 * newversionflags:
1387 1392 version header to use if we need to create a new revlog
1388 1393
1389 1394 * mmapindexthreshold:
1390 1395 minimal index size for start to use mmap
1391 1396
1392 1397 * force_nodemap:
1393 1398 force the usage of a "development" version of the nodemap code
1394 1399 """
1395 1400 opts = self.opener.options
1396 1401
1397 1402 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1398 1403 new_header = CHANGELOGV2
1399 1404 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1400 1405 self.feature_config.compute_rank = compute_rank
1401 1406 elif b'revlogv2' in opts:
1402 1407 new_header = REVLOGV2
1403 1408 elif b'revlogv1' in opts:
1404 1409 new_header = REVLOGV1 | FLAG_INLINE_DATA
1405 1410 if b'generaldelta' in opts:
1406 1411 new_header |= FLAG_GENERALDELTA
1407 1412 elif b'revlogv0' in self.opener.options:
1408 1413 new_header = REVLOGV0
1409 1414 else:
1410 1415 new_header = REVLOG_DEFAULT_VERSION
1411 1416
1412 1417 mmapindexthreshold = None
1413 1418 if self.data_config.mmap_large_index:
1414 1419 mmapindexthreshold = self.data_config.mmap_index_threshold
1415 1420 if self.feature_config.enable_ellipsis:
1416 1421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1417 1422
1418 1423 # revlog v0 doesn't have flag processors
1419 1424 for flag, processor in opts.get(b'flagprocessors', {}).items():
1420 1425 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1421 1426
1422 1427 chunk_cache_size = self.data_config.chunk_cache_size
1423 1428 if chunk_cache_size <= 0:
1424 1429 raise error.RevlogError(
1425 1430 _(b'revlog chunk cache size %r is not greater than 0')
1426 1431 % chunk_cache_size
1427 1432 )
1428 1433 elif chunk_cache_size & (chunk_cache_size - 1):
1429 1434 raise error.RevlogError(
1430 1435 _(b'revlog chunk cache size %r is not a power of 2')
1431 1436 % chunk_cache_size
1432 1437 )
1433 1438 force_nodemap = opts.get(b'devel-force-nodemap', False)
1434 1439 return new_header, mmapindexthreshold, force_nodemap
1435 1440
1436 1441 def _get_data(self, filepath, mmap_threshold, size=None):
1437 1442 """return a file content with or without mmap
1438 1443
1439 1444 If the file is missing return the empty string"""
1440 1445 try:
1441 1446 with self.opener(filepath) as fp:
1442 1447 if mmap_threshold is not None:
1443 1448 file_size = self.opener.fstat(fp).st_size
1444 1449 if file_size >= mmap_threshold:
1445 1450 if size is not None:
1446 1451 # avoid potentiel mmap crash
1447 1452 size = min(file_size, size)
1448 1453 # TODO: should .close() to release resources without
1449 1454 # relying on Python GC
1450 1455 if size is None:
1451 1456 return util.buffer(util.mmapread(fp))
1452 1457 else:
1453 1458 return util.buffer(util.mmapread(fp, size))
1454 1459 if size is None:
1455 1460 return fp.read()
1456 1461 else:
1457 1462 return fp.read(size)
1458 1463 except FileNotFoundError:
1459 1464 return b''
1460 1465
1461 1466 def get_streams(self, max_linkrev, force_inline=False):
1462 1467 """return a list of streams that represent this revlog
1463 1468
1464 1469 This is used by stream-clone to do bytes to bytes copies of a repository.
1465 1470
1466 1471 This streams data for all revisions that refer to a changelog revision up
1467 1472 to `max_linkrev`.
1468 1473
1469 1474 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1470 1475
1471 1476 It returns is a list of three-tuple:
1472 1477
1473 1478 [
1474 1479 (filename, bytes_stream, stream_size),
1475 1480 …
1476 1481 ]
1477 1482 """
1478 1483 n = len(self)
1479 1484 index = self.index
1480 1485 while n > 0:
1481 1486 linkrev = index[n - 1][4]
1482 1487 if linkrev < max_linkrev:
1483 1488 break
1484 1489 # note: this loop will rarely go through multiple iterations, since
1485 1490 # it only traverses commits created during the current streaming
1486 1491 # pull operation.
1487 1492 #
1488 1493 # If this become a problem, using a binary search should cap the
1489 1494 # runtime of this.
1490 1495 n = n - 1
1491 1496 if n == 0:
1492 1497 # no data to send
1493 1498 return []
1494 1499 index_size = n * index.entry_size
1495 1500 data_size = self.end(n - 1)
1496 1501
1497 1502 # XXX we might have been split (or stripped) since the object
1498 1503 # initialization, We need to close this race too, but having a way to
1499 1504 # pre-open the file we feed to the revlog and never closing them before
1500 1505 # we are done streaming.
1501 1506
1502 1507 if self._inline:
1503 1508
1504 1509 def get_stream():
1505 1510 with self.opener(self._indexfile, mode=b"r") as fp:
1506 1511 yield None
1507 1512 size = index_size + data_size
1508 1513 if size <= 65536:
1509 1514 yield fp.read(size)
1510 1515 else:
1511 1516 yield from util.filechunkiter(fp, limit=size)
1512 1517
1513 1518 inline_stream = get_stream()
1514 1519 next(inline_stream)
1515 1520 return [
1516 1521 (self._indexfile, inline_stream, index_size + data_size),
1517 1522 ]
1518 1523 elif force_inline:
1519 1524
1520 1525 def get_stream():
1521 1526 with self.reading():
1522 1527 yield None
1523 1528
1524 1529 for rev in range(n):
1525 1530 idx = self.index.entry_binary(rev)
1526 1531 if rev == 0 and self._docket is None:
1527 1532 # re-inject the inline flag
1528 1533 header = self._format_flags
1529 1534 header |= self._format_version
1530 1535 header |= FLAG_INLINE_DATA
1531 1536 header = self.index.pack_header(header)
1532 1537 idx = header + idx
1533 1538 yield idx
1534 1539 yield self._inner.get_segment_for_revs(rev, rev)[1]
1535 1540
1536 1541 inline_stream = get_stream()
1537 1542 next(inline_stream)
1538 1543 return [
1539 1544 (self._indexfile, inline_stream, index_size + data_size),
1540 1545 ]
1541 1546 else:
1542 1547
1543 1548 def get_index_stream():
1544 1549 with self.opener(self._indexfile, mode=b"r") as fp:
1545 1550 yield None
1546 1551 if index_size <= 65536:
1547 1552 yield fp.read(index_size)
1548 1553 else:
1549 1554 yield from util.filechunkiter(fp, limit=index_size)
1550 1555
1551 1556 def get_data_stream():
1552 1557 with self._datafp() as fp:
1553 1558 yield None
1554 1559 if data_size <= 65536:
1555 1560 yield fp.read(data_size)
1556 1561 else:
1557 1562 yield from util.filechunkiter(fp, limit=data_size)
1558 1563
1559 1564 index_stream = get_index_stream()
1560 1565 next(index_stream)
1561 1566 data_stream = get_data_stream()
1562 1567 next(data_stream)
1563 1568 return [
1564 1569 (self._datafile, data_stream, data_size),
1565 1570 (self._indexfile, index_stream, index_size),
1566 1571 ]
1567 1572
1568 1573 def _loadindex(self, docket=None):
1569 1574
1570 1575 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1571 1576
1572 1577 if self.postfix is not None:
1573 1578 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1574 1579 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1575 1580 entry_point = b'%s.i.a' % self.radix
1576 1581 elif self._try_split and self.opener.exists(self._split_index_file):
1577 1582 entry_point = self._split_index_file
1578 1583 else:
1579 1584 entry_point = b'%s.i' % self.radix
1580 1585
1581 1586 if docket is not None:
1582 1587 self._docket = docket
1583 1588 self._docket_file = entry_point
1584 1589 else:
1585 1590 self._initempty = True
1586 1591 entry_data = self._get_data(entry_point, mmapindexthreshold)
1587 1592 if len(entry_data) > 0:
1588 1593 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1589 1594 self._initempty = False
1590 1595 else:
1591 1596 header = new_header
1592 1597
1593 1598 self._format_flags = header & ~0xFFFF
1594 1599 self._format_version = header & 0xFFFF
1595 1600
1596 1601 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1597 1602 if supported_flags is None:
1598 1603 msg = _(b'unknown version (%d) in revlog %s')
1599 1604 msg %= (self._format_version, self.display_id)
1600 1605 raise error.RevlogError(msg)
1601 1606 elif self._format_flags & ~supported_flags:
1602 1607 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1603 1608 display_flag = self._format_flags >> 16
1604 1609 msg %= (display_flag, self._format_version, self.display_id)
1605 1610 raise error.RevlogError(msg)
1606 1611
1607 1612 features = FEATURES_BY_VERSION[self._format_version]
1608 1613 self._inline = features[b'inline'](self._format_flags)
1609 1614 self.delta_config.general_delta = features[b'generaldelta'](
1610 1615 self._format_flags
1611 1616 )
1612 1617 self.feature_config.has_side_data = features[b'sidedata']
1613 1618
1614 1619 if not features[b'docket']:
1615 1620 self._indexfile = entry_point
1616 1621 index_data = entry_data
1617 1622 else:
1618 1623 self._docket_file = entry_point
1619 1624 if self._initempty:
1620 1625 self._docket = docketutil.default_docket(self, header)
1621 1626 else:
1622 1627 self._docket = docketutil.parse_docket(
1623 1628 self, entry_data, use_pending=self._trypending
1624 1629 )
1625 1630
1626 1631 if self._docket is not None:
1627 1632 self._indexfile = self._docket.index_filepath()
1628 1633 index_data = b''
1629 1634 index_size = self._docket.index_end
1630 1635 if index_size > 0:
1631 1636 index_data = self._get_data(
1632 1637 self._indexfile, mmapindexthreshold, size=index_size
1633 1638 )
1634 1639 if len(index_data) < index_size:
1635 1640 msg = _(b'too few index data for %s: got %d, expected %d')
1636 1641 msg %= (self.display_id, len(index_data), index_size)
1637 1642 raise error.RevlogError(msg)
1638 1643
1639 1644 self._inline = False
1640 1645 # generaldelta implied by version 2 revlogs.
1641 1646 self.delta_config.general_delta = True
1642 1647 # the logic for persistent nodemap will be dealt with within the
1643 1648 # main docket, so disable it for now.
1644 1649 self._nodemap_file = None
1645 1650
1646 1651 if self._docket is not None:
1647 1652 self._datafile = self._docket.data_filepath()
1648 1653 self._sidedatafile = self._docket.sidedata_filepath()
1649 1654 elif self.postfix is None:
1650 1655 self._datafile = b'%s.d' % self.radix
1651 1656 else:
1652 1657 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1653 1658
1654 1659 self.nodeconstants = sha1nodeconstants
1655 1660 self.nullid = self.nodeconstants.nullid
1656 1661
1657 1662 # sparse-revlog can't be on without general-delta (issue6056)
1658 1663 if not self.delta_config.general_delta:
1659 1664 self.delta_config.sparse_revlog = False
1660 1665
1661 1666 self._storedeltachains = True
1662 1667
1663 1668 devel_nodemap = (
1664 1669 self._nodemap_file
1665 1670 and force_nodemap
1666 1671 and parse_index_v1_nodemap is not None
1667 1672 )
1668 1673
1669 1674 use_rust_index = False
1670 1675 if rustrevlog is not None:
1671 1676 if self._nodemap_file is not None:
1672 1677 use_rust_index = True
1673 1678 else:
1674 1679 use_rust_index = self.opener.options.get(b'rust.index')
1675 1680
1676 1681 self._parse_index = parse_index_v1
1677 1682 if self._format_version == REVLOGV0:
1678 1683 self._parse_index = revlogv0.parse_index_v0
1679 1684 elif self._format_version == REVLOGV2:
1680 1685 self._parse_index = parse_index_v2
1681 1686 elif self._format_version == CHANGELOGV2:
1682 1687 self._parse_index = parse_index_cl_v2
1683 1688 elif devel_nodemap:
1684 1689 self._parse_index = parse_index_v1_nodemap
1685 1690 elif use_rust_index:
1686 1691 self._parse_index = parse_index_v1_mixed
1687 1692 try:
1688 1693 d = self._parse_index(index_data, self._inline)
1689 1694 index, chunkcache = d
1690 1695 use_nodemap = (
1691 1696 not self._inline
1692 1697 and self._nodemap_file is not None
1693 1698 and hasattr(index, 'update_nodemap_data')
1694 1699 )
1695 1700 if use_nodemap:
1696 1701 nodemap_data = nodemaputil.persisted_data(self)
1697 1702 if nodemap_data is not None:
1698 1703 docket = nodemap_data[0]
1699 1704 if (
1700 1705 len(d[0]) > docket.tip_rev
1701 1706 and d[0][docket.tip_rev][7] == docket.tip_node
1702 1707 ):
1703 1708 # no changelog tampering
1704 1709 self._nodemap_docket = docket
1705 1710 index.update_nodemap_data(*nodemap_data)
1706 1711 except (ValueError, IndexError):
1707 1712 raise error.RevlogError(
1708 1713 _(b"index %s is corrupted") % self.display_id
1709 1714 )
1710 1715 self.index = index
1711 1716 # revnum -> (chain-length, sum-delta-length)
1712 1717 self._chaininfocache = util.lrucachedict(500)
1713 1718
1714 1719 return chunkcache
1715 1720
1716 1721 def _load_inner(self, chunk_cache):
1717 1722 if self._docket is None:
1718 1723 default_compression_header = None
1719 1724 else:
1720 1725 default_compression_header = self._docket.default_compression_header
1721 1726
1722 1727 self._inner = _InnerRevlog(
1723 1728 opener=self.opener,
1724 1729 index=self.index,
1725 1730 index_file=self._indexfile,
1726 1731 data_file=self._datafile,
1727 1732 sidedata_file=self._sidedatafile,
1728 1733 inline=self._inline,
1729 1734 data_config=self.data_config,
1730 1735 delta_config=self.delta_config,
1731 1736 feature_config=self.feature_config,
1732 1737 chunk_cache=chunk_cache,
1733 1738 default_compression_header=default_compression_header,
1734 1739 )
1735 1740
1736 1741 def get_revlog(self):
1737 1742 """simple function to mirror API of other not-really-revlog API"""
1738 1743 return self
1739 1744
1740 1745 @util.propertycache
1741 1746 def revlog_kind(self):
1742 1747 return self.target[0]
1743 1748
1744 1749 @util.propertycache
1745 1750 def display_id(self):
1746 1751 """The public facing "ID" of the revlog that we use in message"""
1747 1752 if self.revlog_kind == KIND_FILELOG:
1748 1753 # Reference the file without the "data/" prefix, so it is familiar
1749 1754 # to the user.
1750 1755 return self.target[1]
1751 1756 else:
1752 1757 return self.radix
1753 1758
1754 1759 def _datafp(self, mode=b'r'):
1755 1760 """file object for the revlog's data file"""
1756 1761 return self.opener(self._datafile, mode=mode)
1757 1762
1758 1763 def tiprev(self):
1759 1764 return len(self.index) - 1
1760 1765
1761 1766 def tip(self):
1762 1767 return self.node(self.tiprev())
1763 1768
1764 1769 def __contains__(self, rev):
1765 1770 return 0 <= rev < len(self)
1766 1771
1767 1772 def __len__(self):
1768 1773 return len(self.index)
1769 1774
1770 1775 def __iter__(self):
1771 1776 return iter(range(len(self)))
1772 1777
1773 1778 def revs(self, start=0, stop=None):
1774 1779 """iterate over all rev in this revlog (from start to stop)"""
1775 1780 return storageutil.iterrevs(len(self), start=start, stop=stop)
1776 1781
1777 1782 def hasnode(self, node):
1778 1783 try:
1779 1784 self.rev(node)
1780 1785 return True
1781 1786 except KeyError:
1782 1787 return False
1783 1788
1784 1789 def _candelta(self, baserev, rev):
1785 1790 """whether two revisions (baserev, rev) can be delta-ed or not"""
1786 1791 # Disable delta if either rev requires a content-changing flag
1787 1792 # processor (ex. LFS). This is because such flag processor can alter
1788 1793 # the rawtext content that the delta will be based on, and two clients
1789 1794 # could have a same revlog node with different flags (i.e. different
1790 1795 # rawtext contents) and the delta could be incompatible.
1791 1796 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1792 1797 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1793 1798 ):
1794 1799 return False
1795 1800 return True
1796 1801
1797 1802 def update_caches(self, transaction):
1798 1803 """update on disk cache
1799 1804
1800 1805 If a transaction is passed, the update may be delayed to transaction
1801 1806 commit."""
1802 1807 if self._nodemap_file is not None:
1803 1808 if transaction is None:
1804 1809 nodemaputil.update_persistent_nodemap(self)
1805 1810 else:
1806 1811 nodemaputil.setup_persistent_nodemap(transaction, self)
1807 1812
1808 1813 def clearcaches(self):
1809 1814 """Clear in-memory caches"""
1810 self._inner._revisioncache = None
1811 1815 self._chainbasecache.clear()
1812 self._inner._segmentfile.clear_cache()
1813 self._inner._segmentfile_sidedata.clear_cache()
1816 self._inner.clear_cache()
1814 1817 self._pcache = {}
1815 1818 self._nodemap_docket = None
1816 1819 self.index.clearcaches()
1817 1820 # The python code is the one responsible for validating the docket, we
1818 1821 # end up having to refresh it here.
1819 1822 use_nodemap = (
1820 1823 not self._inline
1821 1824 and self._nodemap_file is not None
1822 1825 and hasattr(self.index, 'update_nodemap_data')
1823 1826 )
1824 1827 if use_nodemap:
1825 1828 nodemap_data = nodemaputil.persisted_data(self)
1826 1829 if nodemap_data is not None:
1827 1830 self._nodemap_docket = nodemap_data[0]
1828 1831 self.index.update_nodemap_data(*nodemap_data)
1829 1832
1830 1833 def rev(self, node):
1831 1834 """return the revision number associated with a <nodeid>"""
1832 1835 try:
1833 1836 return self.index.rev(node)
1834 1837 except TypeError:
1835 1838 raise
1836 1839 except error.RevlogError:
1837 1840 # parsers.c radix tree lookup failed
1838 1841 if (
1839 1842 node == self.nodeconstants.wdirid
1840 1843 or node in self.nodeconstants.wdirfilenodeids
1841 1844 ):
1842 1845 raise error.WdirUnsupported
1843 1846 raise error.LookupError(node, self.display_id, _(b'no node'))
1844 1847
1845 1848 # Accessors for index entries.
1846 1849
1847 1850 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1848 1851 # are flags.
1849 1852 def start(self, rev):
1850 1853 return int(self.index[rev][0] >> 16)
1851 1854
1852 1855 def sidedata_cut_off(self, rev):
1853 1856 sd_cut_off = self.index[rev][8]
1854 1857 if sd_cut_off != 0:
1855 1858 return sd_cut_off
1856 1859 # This is some annoying dance, because entries without sidedata
1857 1860 # currently use 0 as their ofsset. (instead of previous-offset +
1858 1861 # previous-size)
1859 1862 #
1860 1863 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1861 1864 # In the meantime, we need this.
1862 1865 while 0 <= rev:
1863 1866 e = self.index[rev]
1864 1867 if e[9] != 0:
1865 1868 return e[8] + e[9]
1866 1869 rev -= 1
1867 1870 return 0
1868 1871
1869 1872 def flags(self, rev):
1870 1873 return self.index[rev][0] & 0xFFFF
1871 1874
1872 1875 def length(self, rev):
1873 1876 return self.index[rev][1]
1874 1877
1875 1878 def sidedata_length(self, rev):
1876 1879 if not self.feature_config.has_side_data:
1877 1880 return 0
1878 1881 return self.index[rev][9]
1879 1882
1880 1883 def rawsize(self, rev):
1881 1884 """return the length of the uncompressed text for a given revision"""
1882 1885 l = self.index[rev][2]
1883 1886 if l >= 0:
1884 1887 return l
1885 1888
1886 1889 t = self.rawdata(rev)
1887 1890 return len(t)
1888 1891
1889 1892 def size(self, rev):
1890 1893 """length of non-raw text (processed by a "read" flag processor)"""
1891 1894 # fast path: if no "read" flag processor could change the content,
1892 1895 # size is rawsize. note: ELLIPSIS is known to not change the content.
1893 1896 flags = self.flags(rev)
1894 1897 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1895 1898 return self.rawsize(rev)
1896 1899
1897 1900 return len(self.revision(rev))
1898 1901
1899 1902 def fast_rank(self, rev):
1900 1903 """Return the rank of a revision if already known, or None otherwise.
1901 1904
1902 1905 The rank of a revision is the size of the sub-graph it defines as a
1903 1906 head. Equivalently, the rank of a revision `r` is the size of the set
1904 1907 `ancestors(r)`, `r` included.
1905 1908
1906 1909 This method returns the rank retrieved from the revlog in constant
1907 1910 time. It makes no attempt at computing unknown values for versions of
1908 1911 the revlog which do not persist the rank.
1909 1912 """
1910 1913 rank = self.index[rev][ENTRY_RANK]
1911 1914 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1912 1915 return None
1913 1916 if rev == nullrev:
1914 1917 return 0 # convention
1915 1918 return rank
1916 1919
1917 1920 def chainbase(self, rev):
1918 1921 base = self._chainbasecache.get(rev)
1919 1922 if base is not None:
1920 1923 return base
1921 1924
1922 1925 index = self.index
1923 1926 iterrev = rev
1924 1927 base = index[iterrev][3]
1925 1928 while base != iterrev:
1926 1929 iterrev = base
1927 1930 base = index[iterrev][3]
1928 1931
1929 1932 self._chainbasecache[rev] = base
1930 1933 return base
1931 1934
1932 1935 def linkrev(self, rev):
1933 1936 return self.index[rev][4]
1934 1937
1935 1938 def parentrevs(self, rev):
1936 1939 try:
1937 1940 entry = self.index[rev]
1938 1941 except IndexError:
1939 1942 if rev == wdirrev:
1940 1943 raise error.WdirUnsupported
1941 1944 raise
1942 1945
1943 1946 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1944 1947 return entry[6], entry[5]
1945 1948 else:
1946 1949 return entry[5], entry[6]
1947 1950
1948 1951 # fast parentrevs(rev) where rev isn't filtered
1949 1952 _uncheckedparentrevs = parentrevs
1950 1953
1951 1954 def node(self, rev):
1952 1955 try:
1953 1956 return self.index[rev][7]
1954 1957 except IndexError:
1955 1958 if rev == wdirrev:
1956 1959 raise error.WdirUnsupported
1957 1960 raise
1958 1961
1959 1962 # Derived from index values.
1960 1963
1961 1964 def end(self, rev):
1962 1965 return self.start(rev) + self.length(rev)
1963 1966
1964 1967 def parents(self, node):
1965 1968 i = self.index
1966 1969 d = i[self.rev(node)]
1967 1970 # inline node() to avoid function call overhead
1968 1971 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1969 1972 return i[d[6]][7], i[d[5]][7]
1970 1973 else:
1971 1974 return i[d[5]][7], i[d[6]][7]
1972 1975
1973 1976 def chainlen(self, rev):
1974 1977 return self._chaininfo(rev)[0]
1975 1978
1976 1979 def _chaininfo(self, rev):
1977 1980 chaininfocache = self._chaininfocache
1978 1981 if rev in chaininfocache:
1979 1982 return chaininfocache[rev]
1980 1983 index = self.index
1981 1984 generaldelta = self.delta_config.general_delta
1982 1985 iterrev = rev
1983 1986 e = index[iterrev]
1984 1987 clen = 0
1985 1988 compresseddeltalen = 0
1986 1989 while iterrev != e[3]:
1987 1990 clen += 1
1988 1991 compresseddeltalen += e[1]
1989 1992 if generaldelta:
1990 1993 iterrev = e[3]
1991 1994 else:
1992 1995 iterrev -= 1
1993 1996 if iterrev in chaininfocache:
1994 1997 t = chaininfocache[iterrev]
1995 1998 clen += t[0]
1996 1999 compresseddeltalen += t[1]
1997 2000 break
1998 2001 e = index[iterrev]
1999 2002 else:
2000 2003 # Add text length of base since decompressing that also takes
2001 2004 # work. For cache hits the length is already included.
2002 2005 compresseddeltalen += e[1]
2003 2006 r = (clen, compresseddeltalen)
2004 2007 chaininfocache[rev] = r
2005 2008 return r
2006 2009
2007 2010 def _deltachain(self, rev, stoprev=None):
2008 2011 return self._inner._deltachain(rev, stoprev=stoprev)
2009 2012
2010 2013 def ancestors(self, revs, stoprev=0, inclusive=False):
2011 2014 """Generate the ancestors of 'revs' in reverse revision order.
2012 2015 Does not generate revs lower than stoprev.
2013 2016
2014 2017 See the documentation for ancestor.lazyancestors for more details."""
2015 2018
2016 2019 # first, make sure start revisions aren't filtered
2017 2020 revs = list(revs)
2018 2021 checkrev = self.node
2019 2022 for r in revs:
2020 2023 checkrev(r)
2021 2024 # and we're sure ancestors aren't filtered as well
2022 2025
2023 2026 if rustancestor is not None and self.index.rust_ext_compat:
2024 2027 lazyancestors = rustancestor.LazyAncestors
2025 2028 arg = self.index
2026 2029 else:
2027 2030 lazyancestors = ancestor.lazyancestors
2028 2031 arg = self._uncheckedparentrevs
2029 2032 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2030 2033
2031 2034 def descendants(self, revs):
2032 2035 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2033 2036
2034 2037 def findcommonmissing(self, common=None, heads=None):
2035 2038 """Return a tuple of the ancestors of common and the ancestors of heads
2036 2039 that are not ancestors of common. In revset terminology, we return the
2037 2040 tuple:
2038 2041
2039 2042 ::common, (::heads) - (::common)
2040 2043
2041 2044 The list is sorted by revision number, meaning it is
2042 2045 topologically sorted.
2043 2046
2044 2047 'heads' and 'common' are both lists of node IDs. If heads is
2045 2048 not supplied, uses all of the revlog's heads. If common is not
2046 2049 supplied, uses nullid."""
2047 2050 if common is None:
2048 2051 common = [self.nullid]
2049 2052 if heads is None:
2050 2053 heads = self.heads()
2051 2054
2052 2055 common = [self.rev(n) for n in common]
2053 2056 heads = [self.rev(n) for n in heads]
2054 2057
2055 2058 # we want the ancestors, but inclusive
2056 2059 class lazyset:
2057 2060 def __init__(self, lazyvalues):
2058 2061 self.addedvalues = set()
2059 2062 self.lazyvalues = lazyvalues
2060 2063
2061 2064 def __contains__(self, value):
2062 2065 return value in self.addedvalues or value in self.lazyvalues
2063 2066
2064 2067 def __iter__(self):
2065 2068 added = self.addedvalues
2066 2069 for r in added:
2067 2070 yield r
2068 2071 for r in self.lazyvalues:
2069 2072 if not r in added:
2070 2073 yield r
2071 2074
2072 2075 def add(self, value):
2073 2076 self.addedvalues.add(value)
2074 2077
2075 2078 def update(self, values):
2076 2079 self.addedvalues.update(values)
2077 2080
2078 2081 has = lazyset(self.ancestors(common))
2079 2082 has.add(nullrev)
2080 2083 has.update(common)
2081 2084
2082 2085 # take all ancestors from heads that aren't in has
2083 2086 missing = set()
2084 2087 visit = collections.deque(r for r in heads if r not in has)
2085 2088 while visit:
2086 2089 r = visit.popleft()
2087 2090 if r in missing:
2088 2091 continue
2089 2092 else:
2090 2093 missing.add(r)
2091 2094 for p in self.parentrevs(r):
2092 2095 if p not in has:
2093 2096 visit.append(p)
2094 2097 missing = list(missing)
2095 2098 missing.sort()
2096 2099 return has, [self.node(miss) for miss in missing]
2097 2100
2098 2101 def incrementalmissingrevs(self, common=None):
2099 2102 """Return an object that can be used to incrementally compute the
2100 2103 revision numbers of the ancestors of arbitrary sets that are not
2101 2104 ancestors of common. This is an ancestor.incrementalmissingancestors
2102 2105 object.
2103 2106
2104 2107 'common' is a list of revision numbers. If common is not supplied, uses
2105 2108 nullrev.
2106 2109 """
2107 2110 if common is None:
2108 2111 common = [nullrev]
2109 2112
2110 2113 if rustancestor is not None and self.index.rust_ext_compat:
2111 2114 return rustancestor.MissingAncestors(self.index, common)
2112 2115 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2113 2116
2114 2117 def findmissingrevs(self, common=None, heads=None):
2115 2118 """Return the revision numbers of the ancestors of heads that
2116 2119 are not ancestors of common.
2117 2120
2118 2121 More specifically, return a list of revision numbers corresponding to
2119 2122 nodes N such that every N satisfies the following constraints:
2120 2123
2121 2124 1. N is an ancestor of some node in 'heads'
2122 2125 2. N is not an ancestor of any node in 'common'
2123 2126
2124 2127 The list is sorted by revision number, meaning it is
2125 2128 topologically sorted.
2126 2129
2127 2130 'heads' and 'common' are both lists of revision numbers. If heads is
2128 2131 not supplied, uses all of the revlog's heads. If common is not
2129 2132 supplied, uses nullid."""
2130 2133 if common is None:
2131 2134 common = [nullrev]
2132 2135 if heads is None:
2133 2136 heads = self.headrevs()
2134 2137
2135 2138 inc = self.incrementalmissingrevs(common=common)
2136 2139 return inc.missingancestors(heads)
2137 2140
2138 2141 def findmissing(self, common=None, heads=None):
2139 2142 """Return the ancestors of heads that are not ancestors of common.
2140 2143
2141 2144 More specifically, return a list of nodes N such that every N
2142 2145 satisfies the following constraints:
2143 2146
2144 2147 1. N is an ancestor of some node in 'heads'
2145 2148 2. N is not an ancestor of any node in 'common'
2146 2149
2147 2150 The list is sorted by revision number, meaning it is
2148 2151 topologically sorted.
2149 2152
2150 2153 'heads' and 'common' are both lists of node IDs. If heads is
2151 2154 not supplied, uses all of the revlog's heads. If common is not
2152 2155 supplied, uses nullid."""
2153 2156 if common is None:
2154 2157 common = [self.nullid]
2155 2158 if heads is None:
2156 2159 heads = self.heads()
2157 2160
2158 2161 common = [self.rev(n) for n in common]
2159 2162 heads = [self.rev(n) for n in heads]
2160 2163
2161 2164 inc = self.incrementalmissingrevs(common=common)
2162 2165 return [self.node(r) for r in inc.missingancestors(heads)]
2163 2166
2164 2167 def nodesbetween(self, roots=None, heads=None):
2165 2168 """Return a topological path from 'roots' to 'heads'.
2166 2169
2167 2170 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2168 2171 topologically sorted list of all nodes N that satisfy both of
2169 2172 these constraints:
2170 2173
2171 2174 1. N is a descendant of some node in 'roots'
2172 2175 2. N is an ancestor of some node in 'heads'
2173 2176
2174 2177 Every node is considered to be both a descendant and an ancestor
2175 2178 of itself, so every reachable node in 'roots' and 'heads' will be
2176 2179 included in 'nodes'.
2177 2180
2178 2181 'outroots' is the list of reachable nodes in 'roots', i.e., the
2179 2182 subset of 'roots' that is returned in 'nodes'. Likewise,
2180 2183 'outheads' is the subset of 'heads' that is also in 'nodes'.
2181 2184
2182 2185 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2183 2186 unspecified, uses nullid as the only root. If 'heads' is
2184 2187 unspecified, uses list of all of the revlog's heads."""
2185 2188 nonodes = ([], [], [])
2186 2189 if roots is not None:
2187 2190 roots = list(roots)
2188 2191 if not roots:
2189 2192 return nonodes
2190 2193 lowestrev = min([self.rev(n) for n in roots])
2191 2194 else:
2192 2195 roots = [self.nullid] # Everybody's a descendant of nullid
2193 2196 lowestrev = nullrev
2194 2197 if (lowestrev == nullrev) and (heads is None):
2195 2198 # We want _all_ the nodes!
2196 2199 return (
2197 2200 [self.node(r) for r in self],
2198 2201 [self.nullid],
2199 2202 list(self.heads()),
2200 2203 )
2201 2204 if heads is None:
2202 2205 # All nodes are ancestors, so the latest ancestor is the last
2203 2206 # node.
2204 2207 highestrev = len(self) - 1
2205 2208 # Set ancestors to None to signal that every node is an ancestor.
2206 2209 ancestors = None
2207 2210 # Set heads to an empty dictionary for later discovery of heads
2208 2211 heads = {}
2209 2212 else:
2210 2213 heads = list(heads)
2211 2214 if not heads:
2212 2215 return nonodes
2213 2216 ancestors = set()
2214 2217 # Turn heads into a dictionary so we can remove 'fake' heads.
2215 2218 # Also, later we will be using it to filter out the heads we can't
2216 2219 # find from roots.
2217 2220 heads = dict.fromkeys(heads, False)
2218 2221 # Start at the top and keep marking parents until we're done.
2219 2222 nodestotag = set(heads)
2220 2223 # Remember where the top was so we can use it as a limit later.
2221 2224 highestrev = max([self.rev(n) for n in nodestotag])
2222 2225 while nodestotag:
2223 2226 # grab a node to tag
2224 2227 n = nodestotag.pop()
2225 2228 # Never tag nullid
2226 2229 if n == self.nullid:
2227 2230 continue
2228 2231 # A node's revision number represents its place in a
2229 2232 # topologically sorted list of nodes.
2230 2233 r = self.rev(n)
2231 2234 if r >= lowestrev:
2232 2235 if n not in ancestors:
2233 2236 # If we are possibly a descendant of one of the roots
2234 2237 # and we haven't already been marked as an ancestor
2235 2238 ancestors.add(n) # Mark as ancestor
2236 2239 # Add non-nullid parents to list of nodes to tag.
2237 2240 nodestotag.update(
2238 2241 [p for p in self.parents(n) if p != self.nullid]
2239 2242 )
2240 2243 elif n in heads: # We've seen it before, is it a fake head?
2241 2244 # So it is, real heads should not be the ancestors of
2242 2245 # any other heads.
2243 2246 heads.pop(n)
2244 2247 if not ancestors:
2245 2248 return nonodes
2246 2249 # Now that we have our set of ancestors, we want to remove any
2247 2250 # roots that are not ancestors.
2248 2251
2249 2252 # If one of the roots was nullid, everything is included anyway.
2250 2253 if lowestrev > nullrev:
2251 2254 # But, since we weren't, let's recompute the lowest rev to not
2252 2255 # include roots that aren't ancestors.
2253 2256
2254 2257 # Filter out roots that aren't ancestors of heads
2255 2258 roots = [root for root in roots if root in ancestors]
2256 2259 # Recompute the lowest revision
2257 2260 if roots:
2258 2261 lowestrev = min([self.rev(root) for root in roots])
2259 2262 else:
2260 2263 # No more roots? Return empty list
2261 2264 return nonodes
2262 2265 else:
2263 2266 # We are descending from nullid, and don't need to care about
2264 2267 # any other roots.
2265 2268 lowestrev = nullrev
2266 2269 roots = [self.nullid]
2267 2270 # Transform our roots list into a set.
2268 2271 descendants = set(roots)
2269 2272 # Also, keep the original roots so we can filter out roots that aren't
2270 2273 # 'real' roots (i.e. are descended from other roots).
2271 2274 roots = descendants.copy()
2272 2275 # Our topologically sorted list of output nodes.
2273 2276 orderedout = []
2274 2277 # Don't start at nullid since we don't want nullid in our output list,
2275 2278 # and if nullid shows up in descendants, empty parents will look like
2276 2279 # they're descendants.
2277 2280 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2278 2281 n = self.node(r)
2279 2282 isdescendant = False
2280 2283 if lowestrev == nullrev: # Everybody is a descendant of nullid
2281 2284 isdescendant = True
2282 2285 elif n in descendants:
2283 2286 # n is already a descendant
2284 2287 isdescendant = True
2285 2288 # This check only needs to be done here because all the roots
2286 2289 # will start being marked is descendants before the loop.
2287 2290 if n in roots:
2288 2291 # If n was a root, check if it's a 'real' root.
2289 2292 p = tuple(self.parents(n))
2290 2293 # If any of its parents are descendants, it's not a root.
2291 2294 if (p[0] in descendants) or (p[1] in descendants):
2292 2295 roots.remove(n)
2293 2296 else:
2294 2297 p = tuple(self.parents(n))
2295 2298 # A node is a descendant if either of its parents are
2296 2299 # descendants. (We seeded the dependents list with the roots
2297 2300 # up there, remember?)
2298 2301 if (p[0] in descendants) or (p[1] in descendants):
2299 2302 descendants.add(n)
2300 2303 isdescendant = True
2301 2304 if isdescendant and ((ancestors is None) or (n in ancestors)):
2302 2305 # Only include nodes that are both descendants and ancestors.
2303 2306 orderedout.append(n)
2304 2307 if (ancestors is not None) and (n in heads):
2305 2308 # We're trying to figure out which heads are reachable
2306 2309 # from roots.
2307 2310 # Mark this head as having been reached
2308 2311 heads[n] = True
2309 2312 elif ancestors is None:
2310 2313 # Otherwise, we're trying to discover the heads.
2311 2314 # Assume this is a head because if it isn't, the next step
2312 2315 # will eventually remove it.
2313 2316 heads[n] = True
2314 2317 # But, obviously its parents aren't.
2315 2318 for p in self.parents(n):
2316 2319 heads.pop(p, None)
2317 2320 heads = [head for head, flag in heads.items() if flag]
2318 2321 roots = list(roots)
2319 2322 assert orderedout
2320 2323 assert roots
2321 2324 assert heads
2322 2325 return (orderedout, roots, heads)
2323 2326
2324 2327 def headrevs(self, revs=None):
2325 2328 if revs is None:
2326 2329 try:
2327 2330 return self.index.headrevs()
2328 2331 except AttributeError:
2329 2332 return self._headrevs()
2330 2333 if rustdagop is not None and self.index.rust_ext_compat:
2331 2334 return rustdagop.headrevs(self.index, revs)
2332 2335 return dagop.headrevs(revs, self._uncheckedparentrevs)
2333 2336
2334 2337 def computephases(self, roots):
2335 2338 return self.index.computephasesmapsets(roots)
2336 2339
2337 2340 def _headrevs(self):
2338 2341 count = len(self)
2339 2342 if not count:
2340 2343 return [nullrev]
2341 2344 # we won't iter over filtered rev so nobody is a head at start
2342 2345 ishead = [0] * (count + 1)
2343 2346 index = self.index
2344 2347 for r in self:
2345 2348 ishead[r] = 1 # I may be an head
2346 2349 e = index[r]
2347 2350 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2348 2351 return [r for r, val in enumerate(ishead) if val]
2349 2352
2350 2353 def heads(self, start=None, stop=None):
2351 2354 """return the list of all nodes that have no children
2352 2355
2353 2356 if start is specified, only heads that are descendants of
2354 2357 start will be returned
2355 2358 if stop is specified, it will consider all the revs from stop
2356 2359 as if they had no children
2357 2360 """
2358 2361 if start is None and stop is None:
2359 2362 if not len(self):
2360 2363 return [self.nullid]
2361 2364 return [self.node(r) for r in self.headrevs()]
2362 2365
2363 2366 if start is None:
2364 2367 start = nullrev
2365 2368 else:
2366 2369 start = self.rev(start)
2367 2370
2368 2371 stoprevs = {self.rev(n) for n in stop or []}
2369 2372
2370 2373 revs = dagop.headrevssubset(
2371 2374 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2372 2375 )
2373 2376
2374 2377 return [self.node(rev) for rev in revs]
2375 2378
2376 2379 def children(self, node):
2377 2380 """find the children of a given node"""
2378 2381 c = []
2379 2382 p = self.rev(node)
2380 2383 for r in self.revs(start=p + 1):
2381 2384 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2382 2385 if prevs:
2383 2386 for pr in prevs:
2384 2387 if pr == p:
2385 2388 c.append(self.node(r))
2386 2389 elif p == nullrev:
2387 2390 c.append(self.node(r))
2388 2391 return c
2389 2392
2390 2393 def commonancestorsheads(self, a, b):
2391 2394 """calculate all the heads of the common ancestors of nodes a and b"""
2392 2395 a, b = self.rev(a), self.rev(b)
2393 2396 ancs = self._commonancestorsheads(a, b)
2394 2397 return pycompat.maplist(self.node, ancs)
2395 2398
2396 2399 def _commonancestorsheads(self, *revs):
2397 2400 """calculate all the heads of the common ancestors of revs"""
2398 2401 try:
2399 2402 ancs = self.index.commonancestorsheads(*revs)
2400 2403 except (AttributeError, OverflowError): # C implementation failed
2401 2404 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2402 2405 return ancs
2403 2406
2404 2407 def isancestor(self, a, b):
2405 2408 """return True if node a is an ancestor of node b
2406 2409
2407 2410 A revision is considered an ancestor of itself."""
2408 2411 a, b = self.rev(a), self.rev(b)
2409 2412 return self.isancestorrev(a, b)
2410 2413
2411 2414 def isancestorrev(self, a, b):
2412 2415 """return True if revision a is an ancestor of revision b
2413 2416
2414 2417 A revision is considered an ancestor of itself.
2415 2418
2416 2419 The implementation of this is trivial but the use of
2417 2420 reachableroots is not."""
2418 2421 if a == nullrev:
2419 2422 return True
2420 2423 elif a == b:
2421 2424 return True
2422 2425 elif a > b:
2423 2426 return False
2424 2427 return bool(self.reachableroots(a, [b], [a], includepath=False))
2425 2428
2426 2429 def reachableroots(self, minroot, heads, roots, includepath=False):
2427 2430 """return (heads(::(<roots> and <roots>::<heads>)))
2428 2431
2429 2432 If includepath is True, return (<roots>::<heads>)."""
2430 2433 try:
2431 2434 return self.index.reachableroots2(
2432 2435 minroot, heads, roots, includepath
2433 2436 )
2434 2437 except AttributeError:
2435 2438 return dagop._reachablerootspure(
2436 2439 self.parentrevs, minroot, roots, heads, includepath
2437 2440 )
2438 2441
2439 2442 def ancestor(self, a, b):
2440 2443 """calculate the "best" common ancestor of nodes a and b"""
2441 2444
2442 2445 a, b = self.rev(a), self.rev(b)
2443 2446 try:
2444 2447 ancs = self.index.ancestors(a, b)
2445 2448 except (AttributeError, OverflowError):
2446 2449 ancs = ancestor.ancestors(self.parentrevs, a, b)
2447 2450 if ancs:
2448 2451 # choose a consistent winner when there's a tie
2449 2452 return min(map(self.node, ancs))
2450 2453 return self.nullid
2451 2454
2452 2455 def _match(self, id):
2453 2456 if isinstance(id, int):
2454 2457 # rev
2455 2458 return self.node(id)
2456 2459 if len(id) == self.nodeconstants.nodelen:
2457 2460 # possibly a binary node
2458 2461 # odds of a binary node being all hex in ASCII are 1 in 10**25
2459 2462 try:
2460 2463 node = id
2461 2464 self.rev(node) # quick search the index
2462 2465 return node
2463 2466 except error.LookupError:
2464 2467 pass # may be partial hex id
2465 2468 try:
2466 2469 # str(rev)
2467 2470 rev = int(id)
2468 2471 if b"%d" % rev != id:
2469 2472 raise ValueError
2470 2473 if rev < 0:
2471 2474 rev = len(self) + rev
2472 2475 if rev < 0 or rev >= len(self):
2473 2476 raise ValueError
2474 2477 return self.node(rev)
2475 2478 except (ValueError, OverflowError):
2476 2479 pass
2477 2480 if len(id) == 2 * self.nodeconstants.nodelen:
2478 2481 try:
2479 2482 # a full hex nodeid?
2480 2483 node = bin(id)
2481 2484 self.rev(node)
2482 2485 return node
2483 2486 except (binascii.Error, error.LookupError):
2484 2487 pass
2485 2488
2486 2489 def _partialmatch(self, id):
2487 2490 # we don't care wdirfilenodeids as they should be always full hash
2488 2491 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2489 2492 ambiguous = False
2490 2493 try:
2491 2494 partial = self.index.partialmatch(id)
2492 2495 if partial and self.hasnode(partial):
2493 2496 if maybewdir:
2494 2497 # single 'ff...' match in radix tree, ambiguous with wdir
2495 2498 ambiguous = True
2496 2499 else:
2497 2500 return partial
2498 2501 elif maybewdir:
2499 2502 # no 'ff...' match in radix tree, wdir identified
2500 2503 raise error.WdirUnsupported
2501 2504 else:
2502 2505 return None
2503 2506 except error.RevlogError:
2504 2507 # parsers.c radix tree lookup gave multiple matches
2505 2508 # fast path: for unfiltered changelog, radix tree is accurate
2506 2509 if not getattr(self, 'filteredrevs', None):
2507 2510 ambiguous = True
2508 2511 # fall through to slow path that filters hidden revisions
2509 2512 except (AttributeError, ValueError):
2510 2513 # we are pure python, or key is not hex
2511 2514 pass
2512 2515 if ambiguous:
2513 2516 raise error.AmbiguousPrefixLookupError(
2514 2517 id, self.display_id, _(b'ambiguous identifier')
2515 2518 )
2516 2519
2517 2520 if id in self._pcache:
2518 2521 return self._pcache[id]
2519 2522
2520 2523 if len(id) <= 40:
2521 2524 # hex(node)[:...]
2522 2525 l = len(id) // 2 * 2 # grab an even number of digits
2523 2526 try:
2524 2527 # we're dropping the last digit, so let's check that it's hex,
2525 2528 # to avoid the expensive computation below if it's not
2526 2529 if len(id) % 2 > 0:
2527 2530 if not (id[-1] in hexdigits):
2528 2531 return None
2529 2532 prefix = bin(id[:l])
2530 2533 except binascii.Error:
2531 2534 pass
2532 2535 else:
2533 2536 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2534 2537 nl = [
2535 2538 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2536 2539 ]
2537 2540 if self.nodeconstants.nullhex.startswith(id):
2538 2541 nl.append(self.nullid)
2539 2542 if len(nl) > 0:
2540 2543 if len(nl) == 1 and not maybewdir:
2541 2544 self._pcache[id] = nl[0]
2542 2545 return nl[0]
2543 2546 raise error.AmbiguousPrefixLookupError(
2544 2547 id, self.display_id, _(b'ambiguous identifier')
2545 2548 )
2546 2549 if maybewdir:
2547 2550 raise error.WdirUnsupported
2548 2551 return None
2549 2552
2550 2553 def lookup(self, id):
2551 2554 """locate a node based on:
2552 2555 - revision number or str(revision number)
2553 2556 - nodeid or subset of hex nodeid
2554 2557 """
2555 2558 n = self._match(id)
2556 2559 if n is not None:
2557 2560 return n
2558 2561 n = self._partialmatch(id)
2559 2562 if n:
2560 2563 return n
2561 2564
2562 2565 raise error.LookupError(id, self.display_id, _(b'no match found'))
2563 2566
2564 2567 def shortest(self, node, minlength=1):
2565 2568 """Find the shortest unambiguous prefix that matches node."""
2566 2569
2567 2570 def isvalid(prefix):
2568 2571 try:
2569 2572 matchednode = self._partialmatch(prefix)
2570 2573 except error.AmbiguousPrefixLookupError:
2571 2574 return False
2572 2575 except error.WdirUnsupported:
2573 2576 # single 'ff...' match
2574 2577 return True
2575 2578 if matchednode is None:
2576 2579 raise error.LookupError(node, self.display_id, _(b'no node'))
2577 2580 return True
2578 2581
2579 2582 def maybewdir(prefix):
2580 2583 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2581 2584
2582 2585 hexnode = hex(node)
2583 2586
2584 2587 def disambiguate(hexnode, minlength):
2585 2588 """Disambiguate against wdirid."""
2586 2589 for length in range(minlength, len(hexnode) + 1):
2587 2590 prefix = hexnode[:length]
2588 2591 if not maybewdir(prefix):
2589 2592 return prefix
2590 2593
2591 2594 if not getattr(self, 'filteredrevs', None):
2592 2595 try:
2593 2596 length = max(self.index.shortest(node), minlength)
2594 2597 return disambiguate(hexnode, length)
2595 2598 except error.RevlogError:
2596 2599 if node != self.nodeconstants.wdirid:
2597 2600 raise error.LookupError(
2598 2601 node, self.display_id, _(b'no node')
2599 2602 )
2600 2603 except AttributeError:
2601 2604 # Fall through to pure code
2602 2605 pass
2603 2606
2604 2607 if node == self.nodeconstants.wdirid:
2605 2608 for length in range(minlength, len(hexnode) + 1):
2606 2609 prefix = hexnode[:length]
2607 2610 if isvalid(prefix):
2608 2611 return prefix
2609 2612
2610 2613 for length in range(minlength, len(hexnode) + 1):
2611 2614 prefix = hexnode[:length]
2612 2615 if isvalid(prefix):
2613 2616 return disambiguate(hexnode, length)
2614 2617
2615 2618 def cmp(self, node, text):
2616 2619 """compare text with a given file revision
2617 2620
2618 2621 returns True if text is different than what is stored.
2619 2622 """
2620 2623 p1, p2 = self.parents(node)
2621 2624 return storageutil.hashrevisionsha1(text, p1, p2) != node
2622 2625
2623 2626 def deltaparent(self, rev):
2624 2627 """return deltaparent of the given revision"""
2625 2628 base = self.index[rev][3]
2626 2629 if base == rev:
2627 2630 return nullrev
2628 2631 elif self.delta_config.general_delta:
2629 2632 return base
2630 2633 else:
2631 2634 return rev - 1
2632 2635
2633 2636 def issnapshot(self, rev):
2634 2637 """tells whether rev is a snapshot"""
2635 2638 ret = self._inner.issnapshot(rev)
2636 2639 self.issnapshot = self._inner.issnapshot
2637 2640 return ret
2638 2641
2639 2642 def snapshotdepth(self, rev):
2640 2643 """number of snapshot in the chain before this one"""
2641 2644 if not self.issnapshot(rev):
2642 2645 raise error.ProgrammingError(b'revision %d not a snapshot')
2643 2646 return len(self._inner._deltachain(rev)[0]) - 1
2644 2647
2645 2648 def revdiff(self, rev1, rev2):
2646 2649 """return or calculate a delta between two revisions
2647 2650
2648 2651 The delta calculated is in binary form and is intended to be written to
2649 2652 revlog data directly. So this function needs raw revision data.
2650 2653 """
2651 2654 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2652 2655 return bytes(self._inner._chunk(rev2))
2653 2656
2654 2657 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2655 2658
2656 2659 def revision(self, nodeorrev):
2657 2660 """return an uncompressed revision of a given node or revision
2658 2661 number.
2659 2662 """
2660 2663 return self._revisiondata(nodeorrev)
2661 2664
2662 2665 def sidedata(self, nodeorrev):
2663 2666 """a map of extra data related to the changeset but not part of the hash
2664 2667
2665 2668 This function currently return a dictionary. However, more advanced
2666 2669 mapping object will likely be used in the future for a more
2667 2670 efficient/lazy code.
2668 2671 """
2669 2672 # deal with <nodeorrev> argument type
2670 2673 if isinstance(nodeorrev, int):
2671 2674 rev = nodeorrev
2672 2675 else:
2673 2676 rev = self.rev(nodeorrev)
2674 2677 return self._sidedata(rev)
2675 2678
2676 2679 def _rawtext(self, node, rev):
2677 2680 """return the possibly unvalidated rawtext for a revision
2678 2681
2679 2682 returns (rev, rawtext, validated)
2680 2683 """
2681 2684 # Check if we have the entry in cache
2682 2685 # The cache entry looks like (node, rev, rawtext)
2683 2686 if self._inner._revisioncache:
2684 2687 if self._inner._revisioncache[0] == node:
2685 2688 return (rev, self._inner._revisioncache[2], True)
2686 2689
2687 2690 if rev is None:
2688 2691 rev = self.rev(node)
2689 2692
2690 2693 return self._inner.raw_text(node, rev)
2691 2694
2692 2695 def _revisiondata(self, nodeorrev, raw=False):
2693 2696 # deal with <nodeorrev> argument type
2694 2697 if isinstance(nodeorrev, int):
2695 2698 rev = nodeorrev
2696 2699 node = self.node(rev)
2697 2700 else:
2698 2701 node = nodeorrev
2699 2702 rev = None
2700 2703
2701 2704 # fast path the special `nullid` rev
2702 2705 if node == self.nullid:
2703 2706 return b""
2704 2707
2705 2708 # ``rawtext`` is the text as stored inside the revlog. Might be the
2706 2709 # revision or might need to be processed to retrieve the revision.
2707 2710 rev, rawtext, validated = self._rawtext(node, rev)
2708 2711
2709 2712 if raw and validated:
2710 2713 # if we don't want to process the raw text and that raw
2711 2714 # text is cached, we can exit early.
2712 2715 return rawtext
2713 2716 if rev is None:
2714 2717 rev = self.rev(node)
2715 2718 # the revlog's flag for this revision
2716 2719 # (usually alter its state or content)
2717 2720 flags = self.flags(rev)
2718 2721
2719 2722 if validated and flags == REVIDX_DEFAULT_FLAGS:
2720 2723 # no extra flags set, no flag processor runs, text = rawtext
2721 2724 return rawtext
2722 2725
2723 2726 if raw:
2724 2727 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2725 2728 text = rawtext
2726 2729 else:
2727 2730 r = flagutil.processflagsread(self, rawtext, flags)
2728 2731 text, validatehash = r
2729 2732 if validatehash:
2730 2733 self.checkhash(text, node, rev=rev)
2731 2734 if not validated:
2732 2735 self._inner._revisioncache = (node, rev, rawtext)
2733 2736
2734 2737 return text
2735 2738
2736 2739 def _sidedata(self, rev):
2737 2740 """Return the sidedata for a given revision number."""
2738 2741 sidedata_end = None
2739 2742 if self._docket is not None:
2740 2743 sidedata_end = self._docket.sidedata_end
2741 2744 return self._inner.sidedata(rev, sidedata_end)
2742 2745
2743 2746 def rawdata(self, nodeorrev):
2744 2747 """return an uncompressed raw data of a given node or revision number."""
2745 2748 return self._revisiondata(nodeorrev, raw=True)
2746 2749
2747 2750 def hash(self, text, p1, p2):
2748 2751 """Compute a node hash.
2749 2752
2750 2753 Available as a function so that subclasses can replace the hash
2751 2754 as needed.
2752 2755 """
2753 2756 return storageutil.hashrevisionsha1(text, p1, p2)
2754 2757
2755 2758 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2756 2759 """Check node hash integrity.
2757 2760
2758 2761 Available as a function so that subclasses can extend hash mismatch
2759 2762 behaviors as needed.
2760 2763 """
2761 2764 try:
2762 2765 if p1 is None and p2 is None:
2763 2766 p1, p2 = self.parents(node)
2764 2767 if node != self.hash(text, p1, p2):
2765 2768 # Clear the revision cache on hash failure. The revision cache
2766 2769 # only stores the raw revision and clearing the cache does have
2767 2770 # the side-effect that we won't have a cache hit when the raw
2768 2771 # revision data is accessed. But this case should be rare and
2769 2772 # it is extra work to teach the cache about the hash
2770 2773 # verification state.
2771 2774 if (
2772 2775 self._inner._revisioncache
2773 2776 and self._inner._revisioncache[0] == node
2774 2777 ):
2775 2778 self._inner._revisioncache = None
2776 2779
2777 2780 revornode = rev
2778 2781 if revornode is None:
2779 2782 revornode = templatefilters.short(hex(node))
2780 2783 raise error.RevlogError(
2781 2784 _(b"integrity check failed on %s:%s")
2782 2785 % (self.display_id, pycompat.bytestr(revornode))
2783 2786 )
2784 2787 except error.RevlogError:
2785 2788 if self.feature_config.censorable and storageutil.iscensoredtext(
2786 2789 text
2787 2790 ):
2788 2791 raise error.CensoredNodeError(self.display_id, node, text)
2789 2792 raise
2790 2793
2791 2794 @property
2792 2795 def _split_index_file(self):
2793 2796 """the path where to expect the index of an ongoing splitting operation
2794 2797
2795 2798 The file will only exist if a splitting operation is in progress, but
2796 2799 it is always expected at the same location."""
2797 2800 parts = self.radix.split(b'/')
2798 2801 if len(parts) > 1:
2799 2802 # adds a '-s' prefix to the ``data/` or `meta/` base
2800 2803 head = parts[0] + b'-s'
2801 2804 mids = parts[1:-1]
2802 2805 tail = parts[-1] + b'.i'
2803 2806 pieces = [head] + mids + [tail]
2804 2807 return b'/'.join(pieces)
2805 2808 else:
2806 2809 # the revlog is stored at the root of the store (changelog or
2807 2810 # manifest), no risk of collision.
2808 2811 return self.radix + b'.i.s'
2809 2812
2810 2813 def _enforceinlinesize(self, tr, side_write=True):
2811 2814 """Check if the revlog is too big for inline and convert if so.
2812 2815
2813 2816 This should be called after revisions are added to the revlog. If the
2814 2817 revlog has grown too large to be an inline revlog, it will convert it
2815 2818 to use multiple index and data files.
2816 2819 """
2817 2820 tiprev = len(self) - 1
2818 2821 total_size = self.start(tiprev) + self.length(tiprev)
2819 2822 if not self._inline or total_size < _maxinline:
2820 2823 return
2821 2824
2822 2825 if self._docket is not None:
2823 2826 msg = b"inline revlog should not have a docket"
2824 2827 raise error.ProgrammingError(msg)
2825 2828
2826 2829 troffset = tr.findoffset(self._indexfile)
2827 2830 if troffset is None:
2828 2831 raise error.RevlogError(
2829 2832 _(b"%s not found in the transaction") % self._indexfile
2830 2833 )
2831 2834 if troffset:
2832 2835 tr.addbackup(self._indexfile, for_offset=True)
2833 2836 tr.add(self._datafile, 0)
2834 2837
2835 2838 new_index_file_path = None
2836 2839 if side_write:
2837 2840 old_index_file_path = self._indexfile
2838 2841 new_index_file_path = self._split_index_file
2839 2842 opener = self.opener
2840 2843 weak_self = weakref.ref(self)
2841 2844
2842 2845 # the "split" index replace the real index when the transaction is
2843 2846 # finalized
2844 2847 def finalize_callback(tr):
2845 2848 opener.rename(
2846 2849 new_index_file_path,
2847 2850 old_index_file_path,
2848 2851 checkambig=True,
2849 2852 )
2850 2853 maybe_self = weak_self()
2851 2854 if maybe_self is not None:
2852 2855 maybe_self._indexfile = old_index_file_path
2853 2856 maybe_self._inner.index_file = maybe_self._indexfile
2854 2857
2855 2858 def abort_callback(tr):
2856 2859 maybe_self = weak_self()
2857 2860 if maybe_self is not None:
2858 2861 maybe_self._indexfile = old_index_file_path
2859 2862 maybe_self._inner.inline = True
2860 2863 maybe_self._inner.index_file = old_index_file_path
2861 2864
2862 2865 tr.registertmp(new_index_file_path)
2863 2866 if self.target[1] is not None:
2864 2867 callback_id = b'000-revlog-split-%d-%s' % self.target
2865 2868 else:
2866 2869 callback_id = b'000-revlog-split-%d' % self.target[0]
2867 2870 tr.addfinalize(callback_id, finalize_callback)
2868 2871 tr.addabort(callback_id, abort_callback)
2869 2872
2870 2873 self._format_flags &= ~FLAG_INLINE_DATA
2871 2874 self._inner.split_inline(
2872 2875 tr,
2873 2876 self._format_flags | self._format_version,
2874 2877 new_index_file_path=new_index_file_path,
2875 2878 )
2876 2879
2877 2880 self._inline = False
2878 2881 if new_index_file_path is not None:
2879 2882 self._indexfile = new_index_file_path
2880 2883
2881 2884 nodemaputil.setup_persistent_nodemap(tr, self)
2882 2885
2883 2886 def _nodeduplicatecallback(self, transaction, node):
2884 2887 """called when trying to add a node already stored."""
2885 2888
2886 2889 @contextlib.contextmanager
2887 2890 def reading(self):
2888 2891 with self._inner.reading():
2889 2892 yield
2890 2893
2891 2894 @contextlib.contextmanager
2892 2895 def _writing(self, transaction):
2893 2896 if self._trypending:
2894 2897 msg = b'try to write in a `trypending` revlog: %s'
2895 2898 msg %= self.display_id
2896 2899 raise error.ProgrammingError(msg)
2897 2900 if self._inner.is_writing:
2898 2901 yield
2899 2902 else:
2900 2903 data_end = None
2901 2904 sidedata_end = None
2902 2905 if self._docket is not None:
2903 2906 data_end = self._docket.data_end
2904 2907 sidedata_end = self._docket.sidedata_end
2905 2908 with self._inner.writing(
2906 2909 transaction,
2907 2910 data_end=data_end,
2908 2911 sidedata_end=sidedata_end,
2909 2912 ):
2910 2913 yield
2911 2914 if self._docket is not None:
2912 2915 self._write_docket(transaction)
2913 2916
2914 2917 def _write_docket(self, transaction):
2915 2918 """write the current docket on disk
2916 2919
2917 2920 Exist as a method to help changelog to implement transaction logic
2918 2921
2919 2922 We could also imagine using the same transaction logic for all revlog
2920 2923 since docket are cheap."""
2921 2924 self._docket.write(transaction)
2922 2925
2923 2926 def addrevision(
2924 2927 self,
2925 2928 text,
2926 2929 transaction,
2927 2930 link,
2928 2931 p1,
2929 2932 p2,
2930 2933 cachedelta=None,
2931 2934 node=None,
2932 2935 flags=REVIDX_DEFAULT_FLAGS,
2933 2936 deltacomputer=None,
2934 2937 sidedata=None,
2935 2938 ):
2936 2939 """add a revision to the log
2937 2940
2938 2941 text - the revision data to add
2939 2942 transaction - the transaction object used for rollback
2940 2943 link - the linkrev data to add
2941 2944 p1, p2 - the parent nodeids of the revision
2942 2945 cachedelta - an optional precomputed delta
2943 2946 node - nodeid of revision; typically node is not specified, and it is
2944 2947 computed by default as hash(text, p1, p2), however subclasses might
2945 2948 use different hashing method (and override checkhash() in such case)
2946 2949 flags - the known flags to set on the revision
2947 2950 deltacomputer - an optional deltacomputer instance shared between
2948 2951 multiple calls
2949 2952 """
2950 2953 if link == nullrev:
2951 2954 raise error.RevlogError(
2952 2955 _(b"attempted to add linkrev -1 to %s") % self.display_id
2953 2956 )
2954 2957
2955 2958 if sidedata is None:
2956 2959 sidedata = {}
2957 2960 elif sidedata and not self.feature_config.has_side_data:
2958 2961 raise error.ProgrammingError(
2959 2962 _(b"trying to add sidedata to a revlog who don't support them")
2960 2963 )
2961 2964
2962 2965 if flags:
2963 2966 node = node or self.hash(text, p1, p2)
2964 2967
2965 2968 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2966 2969
2967 2970 # If the flag processor modifies the revision data, ignore any provided
2968 2971 # cachedelta.
2969 2972 if rawtext != text:
2970 2973 cachedelta = None
2971 2974
2972 2975 if len(rawtext) > _maxentrysize:
2973 2976 raise error.RevlogError(
2974 2977 _(
2975 2978 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2976 2979 )
2977 2980 % (self.display_id, len(rawtext))
2978 2981 )
2979 2982
2980 2983 node = node or self.hash(rawtext, p1, p2)
2981 2984 rev = self.index.get_rev(node)
2982 2985 if rev is not None:
2983 2986 return rev
2984 2987
2985 2988 if validatehash:
2986 2989 self.checkhash(rawtext, node, p1=p1, p2=p2)
2987 2990
2988 2991 return self.addrawrevision(
2989 2992 rawtext,
2990 2993 transaction,
2991 2994 link,
2992 2995 p1,
2993 2996 p2,
2994 2997 node,
2995 2998 flags,
2996 2999 cachedelta=cachedelta,
2997 3000 deltacomputer=deltacomputer,
2998 3001 sidedata=sidedata,
2999 3002 )
3000 3003
3001 3004 def addrawrevision(
3002 3005 self,
3003 3006 rawtext,
3004 3007 transaction,
3005 3008 link,
3006 3009 p1,
3007 3010 p2,
3008 3011 node,
3009 3012 flags,
3010 3013 cachedelta=None,
3011 3014 deltacomputer=None,
3012 3015 sidedata=None,
3013 3016 ):
3014 3017 """add a raw revision with known flags, node and parents
3015 3018 useful when reusing a revision not stored in this revlog (ex: received
3016 3019 over wire, or read from an external bundle).
3017 3020 """
3018 3021 with self._writing(transaction):
3019 3022 return self._addrevision(
3020 3023 node,
3021 3024 rawtext,
3022 3025 transaction,
3023 3026 link,
3024 3027 p1,
3025 3028 p2,
3026 3029 flags,
3027 3030 cachedelta,
3028 3031 deltacomputer=deltacomputer,
3029 3032 sidedata=sidedata,
3030 3033 )
3031 3034
3032 3035 def compress(self, data):
3033 3036 return self._inner.compress(data)
3034 3037
3035 3038 def decompress(self, data):
3036 3039 return self._inner.decompress(data)
3037 3040
3038 3041 def _addrevision(
3039 3042 self,
3040 3043 node,
3041 3044 rawtext,
3042 3045 transaction,
3043 3046 link,
3044 3047 p1,
3045 3048 p2,
3046 3049 flags,
3047 3050 cachedelta,
3048 3051 alwayscache=False,
3049 3052 deltacomputer=None,
3050 3053 sidedata=None,
3051 3054 ):
3052 3055 """internal function to add revisions to the log
3053 3056
3054 3057 see addrevision for argument descriptions.
3055 3058
3056 3059 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3057 3060
3058 3061 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3059 3062 be used.
3060 3063
3061 3064 invariants:
3062 3065 - rawtext is optional (can be None); if not set, cachedelta must be set.
3063 3066 if both are set, they must correspond to each other.
3064 3067 """
3065 3068 if node == self.nullid:
3066 3069 raise error.RevlogError(
3067 3070 _(b"%s: attempt to add null revision") % self.display_id
3068 3071 )
3069 3072 if (
3070 3073 node == self.nodeconstants.wdirid
3071 3074 or node in self.nodeconstants.wdirfilenodeids
3072 3075 ):
3073 3076 raise error.RevlogError(
3074 3077 _(b"%s: attempt to add wdir revision") % self.display_id
3075 3078 )
3076 3079 if self._inner._writinghandles is None:
3077 3080 msg = b'adding revision outside `revlog._writing` context'
3078 3081 raise error.ProgrammingError(msg)
3079 3082
3080 3083 btext = [rawtext]
3081 3084
3082 3085 curr = len(self)
3083 3086 prev = curr - 1
3084 3087
3085 3088 offset = self._get_data_offset(prev)
3086 3089
3087 3090 if self._concurrencychecker:
3088 3091 ifh, dfh, sdfh = self._inner._writinghandles
3089 3092 # XXX no checking for the sidedata file
3090 3093 if self._inline:
3091 3094 # offset is "as if" it were in the .d file, so we need to add on
3092 3095 # the size of the entry metadata.
3093 3096 self._concurrencychecker(
3094 3097 ifh, self._indexfile, offset + curr * self.index.entry_size
3095 3098 )
3096 3099 else:
3097 3100 # Entries in the .i are a consistent size.
3098 3101 self._concurrencychecker(
3099 3102 ifh, self._indexfile, curr * self.index.entry_size
3100 3103 )
3101 3104 self._concurrencychecker(dfh, self._datafile, offset)
3102 3105
3103 3106 p1r, p2r = self.rev(p1), self.rev(p2)
3104 3107
3105 3108 # full versions are inserted when the needed deltas
3106 3109 # become comparable to the uncompressed text
3107 3110 if rawtext is None:
3108 3111 # need rawtext size, before changed by flag processors, which is
3109 3112 # the non-raw size. use revlog explicitly to avoid filelog's extra
3110 3113 # logic that might remove metadata size.
3111 3114 textlen = mdiff.patchedsize(
3112 3115 revlog.size(self, cachedelta[0]), cachedelta[1]
3113 3116 )
3114 3117 else:
3115 3118 textlen = len(rawtext)
3116 3119
3117 3120 if deltacomputer is None:
3118 3121 write_debug = None
3119 3122 if self.delta_config.debug_delta:
3120 3123 write_debug = transaction._report
3121 3124 deltacomputer = deltautil.deltacomputer(
3122 3125 self, write_debug=write_debug
3123 3126 )
3124 3127
3125 3128 if cachedelta is not None and len(cachedelta) == 2:
3126 3129 # If the cached delta has no information about how it should be
3127 3130 # reused, add the default reuse instruction according to the
3128 3131 # revlog's configuration.
3129 3132 if (
3130 3133 self.delta_config.general_delta
3131 3134 and self.delta_config.lazy_delta_base
3132 3135 ):
3133 3136 delta_base_reuse = DELTA_BASE_REUSE_TRY
3134 3137 else:
3135 3138 delta_base_reuse = DELTA_BASE_REUSE_NO
3136 3139 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3137 3140
3138 3141 revinfo = revlogutils.revisioninfo(
3139 3142 node,
3140 3143 p1,
3141 3144 p2,
3142 3145 btext,
3143 3146 textlen,
3144 3147 cachedelta,
3145 3148 flags,
3146 3149 )
3147 3150
3148 3151 deltainfo = deltacomputer.finddeltainfo(revinfo)
3149 3152
3150 3153 compression_mode = COMP_MODE_INLINE
3151 3154 if self._docket is not None:
3152 3155 default_comp = self._docket.default_compression_header
3153 3156 r = deltautil.delta_compression(default_comp, deltainfo)
3154 3157 compression_mode, deltainfo = r
3155 3158
3156 3159 sidedata_compression_mode = COMP_MODE_INLINE
3157 3160 if sidedata and self.feature_config.has_side_data:
3158 3161 sidedata_compression_mode = COMP_MODE_PLAIN
3159 3162 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3160 3163 sidedata_offset = self._docket.sidedata_end
3161 3164 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3162 3165 if (
3163 3166 h != b'u'
3164 3167 and comp_sidedata[0:1] != b'\0'
3165 3168 and len(comp_sidedata) < len(serialized_sidedata)
3166 3169 ):
3167 3170 assert not h
3168 3171 if (
3169 3172 comp_sidedata[0:1]
3170 3173 == self._docket.default_compression_header
3171 3174 ):
3172 3175 sidedata_compression_mode = COMP_MODE_DEFAULT
3173 3176 serialized_sidedata = comp_sidedata
3174 3177 else:
3175 3178 sidedata_compression_mode = COMP_MODE_INLINE
3176 3179 serialized_sidedata = comp_sidedata
3177 3180 else:
3178 3181 serialized_sidedata = b""
3179 3182 # Don't store the offset if the sidedata is empty, that way
3180 3183 # we can easily detect empty sidedata and they will be no different
3181 3184 # than ones we manually add.
3182 3185 sidedata_offset = 0
3183 3186
3184 3187 rank = RANK_UNKNOWN
3185 3188 if self.feature_config.compute_rank:
3186 3189 if (p1r, p2r) == (nullrev, nullrev):
3187 3190 rank = 1
3188 3191 elif p1r != nullrev and p2r == nullrev:
3189 3192 rank = 1 + self.fast_rank(p1r)
3190 3193 elif p1r == nullrev and p2r != nullrev:
3191 3194 rank = 1 + self.fast_rank(p2r)
3192 3195 else: # merge node
3193 3196 if rustdagop is not None and self.index.rust_ext_compat:
3194 3197 rank = rustdagop.rank(self.index, p1r, p2r)
3195 3198 else:
3196 3199 pmin, pmax = sorted((p1r, p2r))
3197 3200 rank = 1 + self.fast_rank(pmax)
3198 3201 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3199 3202
3200 3203 e = revlogutils.entry(
3201 3204 flags=flags,
3202 3205 data_offset=offset,
3203 3206 data_compressed_length=deltainfo.deltalen,
3204 3207 data_uncompressed_length=textlen,
3205 3208 data_compression_mode=compression_mode,
3206 3209 data_delta_base=deltainfo.base,
3207 3210 link_rev=link,
3208 3211 parent_rev_1=p1r,
3209 3212 parent_rev_2=p2r,
3210 3213 node_id=node,
3211 3214 sidedata_offset=sidedata_offset,
3212 3215 sidedata_compressed_length=len(serialized_sidedata),
3213 3216 sidedata_compression_mode=sidedata_compression_mode,
3214 3217 rank=rank,
3215 3218 )
3216 3219
3217 3220 self.index.append(e)
3218 3221 entry = self.index.entry_binary(curr)
3219 3222 if curr == 0 and self._docket is None:
3220 3223 header = self._format_flags | self._format_version
3221 3224 header = self.index.pack_header(header)
3222 3225 entry = header + entry
3223 3226 self._writeentry(
3224 3227 transaction,
3225 3228 entry,
3226 3229 deltainfo.data,
3227 3230 link,
3228 3231 offset,
3229 3232 serialized_sidedata,
3230 3233 sidedata_offset,
3231 3234 )
3232 3235
3233 3236 rawtext = btext[0]
3234 3237
3235 3238 if alwayscache and rawtext is None:
3236 3239 rawtext = deltacomputer.buildtext(revinfo)
3237 3240
3238 3241 if type(rawtext) == bytes: # only accept immutable objects
3239 3242 self._inner._revisioncache = (node, curr, rawtext)
3240 3243 self._chainbasecache[curr] = deltainfo.chainbase
3241 3244 return curr
3242 3245
3243 3246 def _get_data_offset(self, prev):
3244 3247 """Returns the current offset in the (in-transaction) data file.
3245 3248 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3246 3249 file to store that information: since sidedata can be rewritten to the
3247 3250 end of the data file within a transaction, you can have cases where, for
3248 3251 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3249 3252 to `n - 1`'s sidedata being written after `n`'s data.
3250 3253
3251 3254 TODO cache this in a docket file before getting out of experimental."""
3252 3255 if self._docket is None:
3253 3256 return self.end(prev)
3254 3257 else:
3255 3258 return self._docket.data_end
3256 3259
3257 3260 def _writeentry(
3258 3261 self,
3259 3262 transaction,
3260 3263 entry,
3261 3264 data,
3262 3265 link,
3263 3266 offset,
3264 3267 sidedata,
3265 3268 sidedata_offset,
3266 3269 ):
3267 3270 # Files opened in a+ mode have inconsistent behavior on various
3268 3271 # platforms. Windows requires that a file positioning call be made
3269 3272 # when the file handle transitions between reads and writes. See
3270 3273 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3271 3274 # platforms, Python or the platform itself can be buggy. Some versions
3272 3275 # of Solaris have been observed to not append at the end of the file
3273 3276 # if the file was seeked to before the end. See issue4943 for more.
3274 3277 #
3275 3278 # We work around this issue by inserting a seek() before writing.
3276 3279 # Note: This is likely not necessary on Python 3. However, because
3277 3280 # the file handle is reused for reads and may be seeked there, we need
3278 3281 # to be careful before changing this.
3279 3282 index_end = data_end = sidedata_end = None
3280 3283 if self._docket is not None:
3281 3284 index_end = self._docket.index_end
3282 3285 data_end = self._docket.data_end
3283 3286 sidedata_end = self._docket.sidedata_end
3284 3287
3285 3288 files_end = self._inner.write_entry(
3286 3289 transaction,
3287 3290 entry,
3288 3291 data,
3289 3292 link,
3290 3293 offset,
3291 3294 sidedata,
3292 3295 sidedata_offset,
3293 3296 index_end,
3294 3297 data_end,
3295 3298 sidedata_end,
3296 3299 )
3297 3300 self._enforceinlinesize(transaction)
3298 3301 if self._docket is not None:
3299 3302 self._docket.index_end = files_end[0]
3300 3303 self._docket.data_end = files_end[1]
3301 3304 self._docket.sidedata_end = files_end[2]
3302 3305
3303 3306 nodemaputil.setup_persistent_nodemap(transaction, self)
3304 3307
3305 3308 def addgroup(
3306 3309 self,
3307 3310 deltas,
3308 3311 linkmapper,
3309 3312 transaction,
3310 3313 alwayscache=False,
3311 3314 addrevisioncb=None,
3312 3315 duplicaterevisioncb=None,
3313 3316 debug_info=None,
3314 3317 delta_base_reuse_policy=None,
3315 3318 ):
3316 3319 """
3317 3320 add a delta group
3318 3321
3319 3322 given a set of deltas, add them to the revision log. the
3320 3323 first delta is against its parent, which should be in our
3321 3324 log, the rest are against the previous delta.
3322 3325
3323 3326 If ``addrevisioncb`` is defined, it will be called with arguments of
3324 3327 this revlog and the node that was added.
3325 3328 """
3326 3329
3327 3330 if self._adding_group:
3328 3331 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3329 3332
3330 3333 # read the default delta-base reuse policy from revlog config if the
3331 3334 # group did not specify one.
3332 3335 if delta_base_reuse_policy is None:
3333 3336 if (
3334 3337 self.delta_config.general_delta
3335 3338 and self.delta_config.lazy_delta_base
3336 3339 ):
3337 3340 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3338 3341 else:
3339 3342 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3340 3343
3341 3344 self._adding_group = True
3342 3345 empty = True
3343 3346 try:
3344 3347 with self._writing(transaction):
3345 3348 write_debug = None
3346 3349 if self.delta_config.debug_delta:
3347 3350 write_debug = transaction._report
3348 3351 deltacomputer = deltautil.deltacomputer(
3349 3352 self,
3350 3353 write_debug=write_debug,
3351 3354 debug_info=debug_info,
3352 3355 )
3353 3356 # loop through our set of deltas
3354 3357 for data in deltas:
3355 3358 (
3356 3359 node,
3357 3360 p1,
3358 3361 p2,
3359 3362 linknode,
3360 3363 deltabase,
3361 3364 delta,
3362 3365 flags,
3363 3366 sidedata,
3364 3367 ) = data
3365 3368 link = linkmapper(linknode)
3366 3369 flags = flags or REVIDX_DEFAULT_FLAGS
3367 3370
3368 3371 rev = self.index.get_rev(node)
3369 3372 if rev is not None:
3370 3373 # this can happen if two branches make the same change
3371 3374 self._nodeduplicatecallback(transaction, rev)
3372 3375 if duplicaterevisioncb:
3373 3376 duplicaterevisioncb(self, rev)
3374 3377 empty = False
3375 3378 continue
3376 3379
3377 3380 for p in (p1, p2):
3378 3381 if not self.index.has_node(p):
3379 3382 raise error.LookupError(
3380 3383 p, self.radix, _(b'unknown parent')
3381 3384 )
3382 3385
3383 3386 if not self.index.has_node(deltabase):
3384 3387 raise error.LookupError(
3385 3388 deltabase, self.display_id, _(b'unknown delta base')
3386 3389 )
3387 3390
3388 3391 baserev = self.rev(deltabase)
3389 3392
3390 3393 if baserev != nullrev and self.iscensored(baserev):
3391 3394 # if base is censored, delta must be full replacement in a
3392 3395 # single patch operation
3393 3396 hlen = struct.calcsize(b">lll")
3394 3397 oldlen = self.rawsize(baserev)
3395 3398 newlen = len(delta) - hlen
3396 3399 if delta[:hlen] != mdiff.replacediffheader(
3397 3400 oldlen, newlen
3398 3401 ):
3399 3402 raise error.CensoredBaseError(
3400 3403 self.display_id, self.node(baserev)
3401 3404 )
3402 3405
3403 3406 if not flags and self._peek_iscensored(baserev, delta):
3404 3407 flags |= REVIDX_ISCENSORED
3405 3408
3406 3409 # We assume consumers of addrevisioncb will want to retrieve
3407 3410 # the added revision, which will require a call to
3408 3411 # revision(). revision() will fast path if there is a cache
3409 3412 # hit. So, we tell _addrevision() to always cache in this case.
3410 3413 # We're only using addgroup() in the context of changegroup
3411 3414 # generation so the revision data can always be handled as raw
3412 3415 # by the flagprocessor.
3413 3416 rev = self._addrevision(
3414 3417 node,
3415 3418 None,
3416 3419 transaction,
3417 3420 link,
3418 3421 p1,
3419 3422 p2,
3420 3423 flags,
3421 3424 (baserev, delta, delta_base_reuse_policy),
3422 3425 alwayscache=alwayscache,
3423 3426 deltacomputer=deltacomputer,
3424 3427 sidedata=sidedata,
3425 3428 )
3426 3429
3427 3430 if addrevisioncb:
3428 3431 addrevisioncb(self, rev)
3429 3432 empty = False
3430 3433 finally:
3431 3434 self._adding_group = False
3432 3435 return not empty
3433 3436
3434 3437 def iscensored(self, rev):
3435 3438 """Check if a file revision is censored."""
3436 3439 if not self.feature_config.censorable:
3437 3440 return False
3438 3441
3439 3442 return self.flags(rev) & REVIDX_ISCENSORED
3440 3443
3441 3444 def _peek_iscensored(self, baserev, delta):
3442 3445 """Quickly check if a delta produces a censored revision."""
3443 3446 if not self.feature_config.censorable:
3444 3447 return False
3445 3448
3446 3449 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3447 3450
3448 3451 def getstrippoint(self, minlink):
3449 3452 """find the minimum rev that must be stripped to strip the linkrev
3450 3453
3451 3454 Returns a tuple containing the minimum rev and a set of all revs that
3452 3455 have linkrevs that will be broken by this strip.
3453 3456 """
3454 3457 return storageutil.resolvestripinfo(
3455 3458 minlink,
3456 3459 len(self) - 1,
3457 3460 self.headrevs(),
3458 3461 self.linkrev,
3459 3462 self.parentrevs,
3460 3463 )
3461 3464
3462 3465 def strip(self, minlink, transaction):
3463 3466 """truncate the revlog on the first revision with a linkrev >= minlink
3464 3467
3465 3468 This function is called when we're stripping revision minlink and
3466 3469 its descendants from the repository.
3467 3470
3468 3471 We have to remove all revisions with linkrev >= minlink, because
3469 3472 the equivalent changelog revisions will be renumbered after the
3470 3473 strip.
3471 3474
3472 3475 So we truncate the revlog on the first of these revisions, and
3473 3476 trust that the caller has saved the revisions that shouldn't be
3474 3477 removed and that it'll re-add them after this truncation.
3475 3478 """
3476 3479 if len(self) == 0:
3477 3480 return
3478 3481
3479 3482 rev, _ = self.getstrippoint(minlink)
3480 3483 if rev == len(self):
3481 3484 return
3482 3485
3483 3486 # first truncate the files on disk
3484 3487 data_end = self.start(rev)
3485 3488 if not self._inline:
3486 3489 transaction.add(self._datafile, data_end)
3487 3490 end = rev * self.index.entry_size
3488 3491 else:
3489 3492 end = data_end + (rev * self.index.entry_size)
3490 3493
3491 3494 if self._sidedatafile:
3492 3495 sidedata_end = self.sidedata_cut_off(rev)
3493 3496 transaction.add(self._sidedatafile, sidedata_end)
3494 3497
3495 3498 transaction.add(self._indexfile, end)
3496 3499 if self._docket is not None:
3497 3500 # XXX we could, leverage the docket while stripping. However it is
3498 3501 # not powerfull enough at the time of this comment
3499 3502 self._docket.index_end = end
3500 3503 self._docket.data_end = data_end
3501 3504 self._docket.sidedata_end = sidedata_end
3502 3505 self._docket.write(transaction, stripping=True)
3503 3506
3504 3507 # then reset internal state in memory to forget those revisions
3505 self._inner._revisioncache = None
3506 3508 self._chaininfocache = util.lrucachedict(500)
3507 self._inner._segmentfile.clear_cache()
3508 self._inner._segmentfile_sidedata.clear_cache()
3509 self._inner.clear_cache()
3509 3510
3510 3511 del self.index[rev:-1]
3511 3512
3512 3513 def checksize(self):
3513 3514 """Check size of index and data files
3514 3515
3515 3516 return a (dd, di) tuple.
3516 3517 - dd: extra bytes for the "data" file
3517 3518 - di: extra bytes for the "index" file
3518 3519
3519 3520 A healthy revlog will return (0, 0).
3520 3521 """
3521 3522 expected = 0
3522 3523 if len(self):
3523 3524 expected = max(0, self.end(len(self) - 1))
3524 3525
3525 3526 try:
3526 3527 with self._datafp() as f:
3527 3528 f.seek(0, io.SEEK_END)
3528 3529 actual = f.tell()
3529 3530 dd = actual - expected
3530 3531 except FileNotFoundError:
3531 3532 dd = 0
3532 3533
3533 3534 try:
3534 3535 f = self.opener(self._indexfile)
3535 3536 f.seek(0, io.SEEK_END)
3536 3537 actual = f.tell()
3537 3538 f.close()
3538 3539 s = self.index.entry_size
3539 3540 i = max(0, actual // s)
3540 3541 di = actual - (i * s)
3541 3542 if self._inline:
3542 3543 databytes = 0
3543 3544 for r in self:
3544 3545 databytes += max(0, self.length(r))
3545 3546 dd = 0
3546 3547 di = actual - len(self) * s - databytes
3547 3548 except FileNotFoundError:
3548 3549 di = 0
3549 3550
3550 3551 return (dd, di)
3551 3552
3552 3553 def files(self):
3553 3554 """return list of files that compose this revlog"""
3554 3555 res = [self._indexfile]
3555 3556 if self._docket_file is None:
3556 3557 if not self._inline:
3557 3558 res.append(self._datafile)
3558 3559 else:
3559 3560 res.append(self._docket_file)
3560 3561 res.extend(self._docket.old_index_filepaths(include_empty=False))
3561 3562 if self._docket.data_end:
3562 3563 res.append(self._datafile)
3563 3564 res.extend(self._docket.old_data_filepaths(include_empty=False))
3564 3565 if self._docket.sidedata_end:
3565 3566 res.append(self._sidedatafile)
3566 3567 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3567 3568 return res
3568 3569
3569 3570 def emitrevisions(
3570 3571 self,
3571 3572 nodes,
3572 3573 nodesorder=None,
3573 3574 revisiondata=False,
3574 3575 assumehaveparentrevisions=False,
3575 3576 deltamode=repository.CG_DELTAMODE_STD,
3576 3577 sidedata_helpers=None,
3577 3578 debug_info=None,
3578 3579 ):
3579 3580 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3580 3581 raise error.ProgrammingError(
3581 3582 b'unhandled value for nodesorder: %s' % nodesorder
3582 3583 )
3583 3584
3584 3585 if nodesorder is None and not self.delta_config.general_delta:
3585 3586 nodesorder = b'storage'
3586 3587
3587 3588 if (
3588 3589 not self._storedeltachains
3589 3590 and deltamode != repository.CG_DELTAMODE_PREV
3590 3591 ):
3591 3592 deltamode = repository.CG_DELTAMODE_FULL
3592 3593
3593 3594 return storageutil.emitrevisions(
3594 3595 self,
3595 3596 nodes,
3596 3597 nodesorder,
3597 3598 revlogrevisiondelta,
3598 3599 deltaparentfn=self.deltaparent,
3599 3600 candeltafn=self._candelta,
3600 3601 rawsizefn=self.rawsize,
3601 3602 revdifffn=self.revdiff,
3602 3603 flagsfn=self.flags,
3603 3604 deltamode=deltamode,
3604 3605 revisiondata=revisiondata,
3605 3606 assumehaveparentrevisions=assumehaveparentrevisions,
3606 3607 sidedata_helpers=sidedata_helpers,
3607 3608 debug_info=debug_info,
3608 3609 )
3609 3610
3610 3611 DELTAREUSEALWAYS = b'always'
3611 3612 DELTAREUSESAMEREVS = b'samerevs'
3612 3613 DELTAREUSENEVER = b'never'
3613 3614
3614 3615 DELTAREUSEFULLADD = b'fulladd'
3615 3616
3616 3617 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3617 3618
3618 3619 def clone(
3619 3620 self,
3620 3621 tr,
3621 3622 destrevlog,
3622 3623 addrevisioncb=None,
3623 3624 deltareuse=DELTAREUSESAMEREVS,
3624 3625 forcedeltabothparents=None,
3625 3626 sidedata_helpers=None,
3626 3627 ):
3627 3628 """Copy this revlog to another, possibly with format changes.
3628 3629
3629 3630 The destination revlog will contain the same revisions and nodes.
3630 3631 However, it may not be bit-for-bit identical due to e.g. delta encoding
3631 3632 differences.
3632 3633
3633 3634 The ``deltareuse`` argument control how deltas from the existing revlog
3634 3635 are preserved in the destination revlog. The argument can have the
3635 3636 following values:
3636 3637
3637 3638 DELTAREUSEALWAYS
3638 3639 Deltas will always be reused (if possible), even if the destination
3639 3640 revlog would not select the same revisions for the delta. This is the
3640 3641 fastest mode of operation.
3641 3642 DELTAREUSESAMEREVS
3642 3643 Deltas will be reused if the destination revlog would pick the same
3643 3644 revisions for the delta. This mode strikes a balance between speed
3644 3645 and optimization.
3645 3646 DELTAREUSENEVER
3646 3647 Deltas will never be reused. This is the slowest mode of execution.
3647 3648 This mode can be used to recompute deltas (e.g. if the diff/delta
3648 3649 algorithm changes).
3649 3650 DELTAREUSEFULLADD
3650 3651 Revision will be re-added as if their were new content. This is
3651 3652 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3652 3653 eg: large file detection and handling.
3653 3654
3654 3655 Delta computation can be slow, so the choice of delta reuse policy can
3655 3656 significantly affect run time.
3656 3657
3657 3658 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3658 3659 two extremes. Deltas will be reused if they are appropriate. But if the
3659 3660 delta could choose a better revision, it will do so. This means if you
3660 3661 are converting a non-generaldelta revlog to a generaldelta revlog,
3661 3662 deltas will be recomputed if the delta's parent isn't a parent of the
3662 3663 revision.
3663 3664
3664 3665 In addition to the delta policy, the ``forcedeltabothparents``
3665 3666 argument controls whether to force compute deltas against both parents
3666 3667 for merges. By default, the current default is used.
3667 3668
3668 3669 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3669 3670 `sidedata_helpers`.
3670 3671 """
3671 3672 if deltareuse not in self.DELTAREUSEALL:
3672 3673 raise ValueError(
3673 3674 _(b'value for deltareuse invalid: %s') % deltareuse
3674 3675 )
3675 3676
3676 3677 if len(destrevlog):
3677 3678 raise ValueError(_(b'destination revlog is not empty'))
3678 3679
3679 3680 if getattr(self, 'filteredrevs', None):
3680 3681 raise ValueError(_(b'source revlog has filtered revisions'))
3681 3682 if getattr(destrevlog, 'filteredrevs', None):
3682 3683 raise ValueError(_(b'destination revlog has filtered revisions'))
3683 3684
3684 3685 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3685 3686 # if possible.
3686 3687 old_delta_config = destrevlog.delta_config
3687 3688 destrevlog.delta_config = destrevlog.delta_config.copy()
3688 3689
3689 3690 try:
3690 3691 if deltareuse == self.DELTAREUSEALWAYS:
3691 3692 destrevlog.delta_config.lazy_delta_base = True
3692 3693 destrevlog.delta_config.lazy_delta = True
3693 3694 elif deltareuse == self.DELTAREUSESAMEREVS:
3694 3695 destrevlog.delta_config.lazy_delta_base = False
3695 3696 destrevlog.delta_config.lazy_delta = True
3696 3697 elif deltareuse == self.DELTAREUSENEVER:
3697 3698 destrevlog.delta_config.lazy_delta_base = False
3698 3699 destrevlog.delta_config.lazy_delta = False
3699 3700
3700 3701 delta_both_parents = (
3701 3702 forcedeltabothparents or old_delta_config.delta_both_parents
3702 3703 )
3703 3704 destrevlog.delta_config.delta_both_parents = delta_both_parents
3704 3705
3705 3706 with self.reading(), destrevlog._writing(tr):
3706 3707 self._clone(
3707 3708 tr,
3708 3709 destrevlog,
3709 3710 addrevisioncb,
3710 3711 deltareuse,
3711 3712 forcedeltabothparents,
3712 3713 sidedata_helpers,
3713 3714 )
3714 3715
3715 3716 finally:
3716 3717 destrevlog.delta_config = old_delta_config
3717 3718
3718 3719 def _clone(
3719 3720 self,
3720 3721 tr,
3721 3722 destrevlog,
3722 3723 addrevisioncb,
3723 3724 deltareuse,
3724 3725 forcedeltabothparents,
3725 3726 sidedata_helpers,
3726 3727 ):
3727 3728 """perform the core duty of `revlog.clone` after parameter processing"""
3728 3729 write_debug = None
3729 3730 if self.delta_config.debug_delta:
3730 3731 write_debug = tr._report
3731 3732 deltacomputer = deltautil.deltacomputer(
3732 3733 destrevlog,
3733 3734 write_debug=write_debug,
3734 3735 )
3735 3736 index = self.index
3736 3737 for rev in self:
3737 3738 entry = index[rev]
3738 3739
3739 3740 # Some classes override linkrev to take filtered revs into
3740 3741 # account. Use raw entry from index.
3741 3742 flags = entry[0] & 0xFFFF
3742 3743 linkrev = entry[4]
3743 3744 p1 = index[entry[5]][7]
3744 3745 p2 = index[entry[6]][7]
3745 3746 node = entry[7]
3746 3747
3747 3748 # (Possibly) reuse the delta from the revlog if allowed and
3748 3749 # the revlog chunk is a delta.
3749 3750 cachedelta = None
3750 3751 rawtext = None
3751 3752 if deltareuse == self.DELTAREUSEFULLADD:
3752 3753 text = self._revisiondata(rev)
3753 3754 sidedata = self.sidedata(rev)
3754 3755
3755 3756 if sidedata_helpers is not None:
3756 3757 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3757 3758 self, sidedata_helpers, sidedata, rev
3758 3759 )
3759 3760 flags = flags | new_flags[0] & ~new_flags[1]
3760 3761
3761 3762 destrevlog.addrevision(
3762 3763 text,
3763 3764 tr,
3764 3765 linkrev,
3765 3766 p1,
3766 3767 p2,
3767 3768 cachedelta=cachedelta,
3768 3769 node=node,
3769 3770 flags=flags,
3770 3771 deltacomputer=deltacomputer,
3771 3772 sidedata=sidedata,
3772 3773 )
3773 3774 else:
3774 3775 if destrevlog.delta_config.lazy_delta:
3775 3776 dp = self.deltaparent(rev)
3776 3777 if dp != nullrev:
3777 3778 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3778 3779
3779 3780 sidedata = None
3780 3781 if not cachedelta:
3781 3782 try:
3782 3783 rawtext = self._revisiondata(rev)
3783 3784 except error.CensoredNodeError as censored:
3784 3785 assert flags & REVIDX_ISCENSORED
3785 3786 rawtext = censored.tombstone
3786 3787 sidedata = self.sidedata(rev)
3787 3788 if sidedata is None:
3788 3789 sidedata = self.sidedata(rev)
3789 3790
3790 3791 if sidedata_helpers is not None:
3791 3792 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3792 3793 self, sidedata_helpers, sidedata, rev
3793 3794 )
3794 3795 flags = flags | new_flags[0] & ~new_flags[1]
3795 3796
3796 3797 destrevlog._addrevision(
3797 3798 node,
3798 3799 rawtext,
3799 3800 tr,
3800 3801 linkrev,
3801 3802 p1,
3802 3803 p2,
3803 3804 flags,
3804 3805 cachedelta,
3805 3806 deltacomputer=deltacomputer,
3806 3807 sidedata=sidedata,
3807 3808 )
3808 3809
3809 3810 if addrevisioncb:
3810 3811 addrevisioncb(self, rev, node)
3811 3812
3812 3813 def censorrevision(self, tr, censornode, tombstone=b''):
3813 3814 if self._format_version == REVLOGV0:
3814 3815 raise error.RevlogError(
3815 3816 _(b'cannot censor with version %d revlogs')
3816 3817 % self._format_version
3817 3818 )
3818 3819 elif self._format_version == REVLOGV1:
3819 3820 rewrite.v1_censor(self, tr, censornode, tombstone)
3820 3821 else:
3821 3822 rewrite.v2_censor(self, tr, censornode, tombstone)
3822 3823
3823 3824 def verifyintegrity(self, state):
3824 3825 """Verifies the integrity of the revlog.
3825 3826
3826 3827 Yields ``revlogproblem`` instances describing problems that are
3827 3828 found.
3828 3829 """
3829 3830 dd, di = self.checksize()
3830 3831 if dd:
3831 3832 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3832 3833 if di:
3833 3834 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3834 3835
3835 3836 version = self._format_version
3836 3837
3837 3838 # The verifier tells us what version revlog we should be.
3838 3839 if version != state[b'expectedversion']:
3839 3840 yield revlogproblem(
3840 3841 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3841 3842 % (self.display_id, version, state[b'expectedversion'])
3842 3843 )
3843 3844
3844 3845 state[b'skipread'] = set()
3845 3846 state[b'safe_renamed'] = set()
3846 3847
3847 3848 for rev in self:
3848 3849 node = self.node(rev)
3849 3850
3850 3851 # Verify contents. 4 cases to care about:
3851 3852 #
3852 3853 # common: the most common case
3853 3854 # rename: with a rename
3854 3855 # meta: file content starts with b'\1\n', the metadata
3855 3856 # header defined in filelog.py, but without a rename
3856 3857 # ext: content stored externally
3857 3858 #
3858 3859 # More formally, their differences are shown below:
3859 3860 #
3860 3861 # | common | rename | meta | ext
3861 3862 # -------------------------------------------------------
3862 3863 # flags() | 0 | 0 | 0 | not 0
3863 3864 # renamed() | False | True | False | ?
3864 3865 # rawtext[0:2]=='\1\n'| False | True | True | ?
3865 3866 #
3866 3867 # "rawtext" means the raw text stored in revlog data, which
3867 3868 # could be retrieved by "rawdata(rev)". "text"
3868 3869 # mentioned below is "revision(rev)".
3869 3870 #
3870 3871 # There are 3 different lengths stored physically:
3871 3872 # 1. L1: rawsize, stored in revlog index
3872 3873 # 2. L2: len(rawtext), stored in revlog data
3873 3874 # 3. L3: len(text), stored in revlog data if flags==0, or
3874 3875 # possibly somewhere else if flags!=0
3875 3876 #
3876 3877 # L1 should be equal to L2. L3 could be different from them.
3877 3878 # "text" may or may not affect commit hash depending on flag
3878 3879 # processors (see flagutil.addflagprocessor).
3879 3880 #
3880 3881 # | common | rename | meta | ext
3881 3882 # -------------------------------------------------
3882 3883 # rawsize() | L1 | L1 | L1 | L1
3883 3884 # size() | L1 | L2-LM | L1(*) | L1 (?)
3884 3885 # len(rawtext) | L2 | L2 | L2 | L2
3885 3886 # len(text) | L2 | L2 | L2 | L3
3886 3887 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3887 3888 #
3888 3889 # LM: length of metadata, depending on rawtext
3889 3890 # (*): not ideal, see comment in filelog.size
3890 3891 # (?): could be "- len(meta)" if the resolved content has
3891 3892 # rename metadata
3892 3893 #
3893 3894 # Checks needed to be done:
3894 3895 # 1. length check: L1 == L2, in all cases.
3895 3896 # 2. hash check: depending on flag processor, we may need to
3896 3897 # use either "text" (external), or "rawtext" (in revlog).
3897 3898
3898 3899 try:
3899 3900 skipflags = state.get(b'skipflags', 0)
3900 3901 if skipflags:
3901 3902 skipflags &= self.flags(rev)
3902 3903
3903 3904 _verify_revision(self, skipflags, state, node)
3904 3905
3905 3906 l1 = self.rawsize(rev)
3906 3907 l2 = len(self.rawdata(node))
3907 3908
3908 3909 if l1 != l2:
3909 3910 yield revlogproblem(
3910 3911 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3911 3912 node=node,
3912 3913 )
3913 3914
3914 3915 except error.CensoredNodeError:
3915 3916 if state[b'erroroncensored']:
3916 3917 yield revlogproblem(
3917 3918 error=_(b'censored file data'), node=node
3918 3919 )
3919 3920 state[b'skipread'].add(node)
3920 3921 except Exception as e:
3921 3922 yield revlogproblem(
3922 3923 error=_(b'unpacking %s: %s')
3923 3924 % (short(node), stringutil.forcebytestr(e)),
3924 3925 node=node,
3925 3926 )
3926 3927 state[b'skipread'].add(node)
3927 3928
3928 3929 def storageinfo(
3929 3930 self,
3930 3931 exclusivefiles=False,
3931 3932 sharedfiles=False,
3932 3933 revisionscount=False,
3933 3934 trackedsize=False,
3934 3935 storedsize=False,
3935 3936 ):
3936 3937 d = {}
3937 3938
3938 3939 if exclusivefiles:
3939 3940 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3940 3941 if not self._inline:
3941 3942 d[b'exclusivefiles'].append((self.opener, self._datafile))
3942 3943
3943 3944 if sharedfiles:
3944 3945 d[b'sharedfiles'] = []
3945 3946
3946 3947 if revisionscount:
3947 3948 d[b'revisionscount'] = len(self)
3948 3949
3949 3950 if trackedsize:
3950 3951 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3951 3952
3952 3953 if storedsize:
3953 3954 d[b'storedsize'] = sum(
3954 3955 self.opener.stat(path).st_size for path in self.files()
3955 3956 )
3956 3957
3957 3958 return d
3958 3959
3959 3960 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3960 3961 if not self.feature_config.has_side_data:
3961 3962 return
3962 3963 # revlog formats with sidedata support does not support inline
3963 3964 assert not self._inline
3964 3965 if not helpers[1] and not helpers[2]:
3965 3966 # Nothing to generate or remove
3966 3967 return
3967 3968
3968 3969 new_entries = []
3969 3970 # append the new sidedata
3970 3971 with self._writing(transaction):
3971 3972 ifh, dfh, sdfh = self._inner._writinghandles
3972 3973 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3973 3974
3974 3975 current_offset = sdfh.tell()
3975 3976 for rev in range(startrev, endrev + 1):
3976 3977 entry = self.index[rev]
3977 3978 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3978 3979 store=self,
3979 3980 sidedata_helpers=helpers,
3980 3981 sidedata={},
3981 3982 rev=rev,
3982 3983 )
3983 3984
3984 3985 serialized_sidedata = sidedatautil.serialize_sidedata(
3985 3986 new_sidedata
3986 3987 )
3987 3988
3988 3989 sidedata_compression_mode = COMP_MODE_INLINE
3989 3990 if serialized_sidedata and self.feature_config.has_side_data:
3990 3991 sidedata_compression_mode = COMP_MODE_PLAIN
3991 3992 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3992 3993 if (
3993 3994 h != b'u'
3994 3995 and comp_sidedata[0] != b'\0'
3995 3996 and len(comp_sidedata) < len(serialized_sidedata)
3996 3997 ):
3997 3998 assert not h
3998 3999 if (
3999 4000 comp_sidedata[0]
4000 4001 == self._docket.default_compression_header
4001 4002 ):
4002 4003 sidedata_compression_mode = COMP_MODE_DEFAULT
4003 4004 serialized_sidedata = comp_sidedata
4004 4005 else:
4005 4006 sidedata_compression_mode = COMP_MODE_INLINE
4006 4007 serialized_sidedata = comp_sidedata
4007 4008 if entry[8] != 0 or entry[9] != 0:
4008 4009 # rewriting entries that already have sidedata is not
4009 4010 # supported yet, because it introduces garbage data in the
4010 4011 # revlog.
4011 4012 msg = b"rewriting existing sidedata is not supported yet"
4012 4013 raise error.Abort(msg)
4013 4014
4014 4015 # Apply (potential) flags to add and to remove after running
4015 4016 # the sidedata helpers
4016 4017 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4017 4018 entry_update = (
4018 4019 current_offset,
4019 4020 len(serialized_sidedata),
4020 4021 new_offset_flags,
4021 4022 sidedata_compression_mode,
4022 4023 )
4023 4024
4024 4025 # the sidedata computation might have move the file cursors around
4025 4026 sdfh.seek(current_offset, os.SEEK_SET)
4026 4027 sdfh.write(serialized_sidedata)
4027 4028 new_entries.append(entry_update)
4028 4029 current_offset += len(serialized_sidedata)
4029 4030 self._docket.sidedata_end = sdfh.tell()
4030 4031
4031 4032 # rewrite the new index entries
4032 4033 ifh.seek(startrev * self.index.entry_size)
4033 4034 for i, e in enumerate(new_entries):
4034 4035 rev = startrev + i
4035 4036 self.index.replace_sidedata_info(rev, *e)
4036 4037 packed = self.index.entry_binary(rev)
4037 4038 if rev == 0 and self._docket is None:
4038 4039 header = self._format_flags | self._format_version
4039 4040 header = self.index.pack_header(header)
4040 4041 packed = header + packed
4041 4042 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now