##// END OF EJS Templates
revlog: add a `canonical_index_file` attribute on inner revlog...
marmoute -
r51998:af96fbb8 default
parent child Browse files
Show More
@@ -1,4049 +1,4053 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 393 # 3-tuple of (node, rev, text) for a raw revision.
394 394 self._revisioncache = None
395 395
396 396 @property
397 397 def index_file(self):
398 398 return self.__index_file
399 399
400 400 @index_file.setter
401 401 def index_file(self, new_index_file):
402 402 self.__index_file = new_index_file
403 403 if self.inline:
404 404 self._segmentfile.filename = new_index_file
405 405
406 406 def __len__(self):
407 407 return len(self.index)
408 408
409 409 def clear_cache(self):
410 410 self._revisioncache = None
411 411 self._segmentfile.clear_cache()
412 412 self._segmentfile_sidedata.clear_cache()
413 413
414 @property
415 def canonical_index_file(self):
416 return self.index_file
417
414 418 # Derived from index values.
415 419
416 420 def start(self, rev):
417 421 """the offset of the data chunk for this revision"""
418 422 return int(self.index[rev][0] >> 16)
419 423
420 424 def length(self, rev):
421 425 """the length of the data chunk for this revision"""
422 426 return self.index[rev][1]
423 427
424 428 def end(self, rev):
425 429 """the end of the data chunk for this revision"""
426 430 return self.start(rev) + self.length(rev)
427 431
428 432 def deltaparent(self, rev):
429 433 """return deltaparent of the given revision"""
430 434 base = self.index[rev][3]
431 435 if base == rev:
432 436 return nullrev
433 437 elif self.delta_config.general_delta:
434 438 return base
435 439 else:
436 440 return rev - 1
437 441
438 442 def issnapshot(self, rev):
439 443 """tells whether rev is a snapshot"""
440 444 if not self.delta_config.sparse_revlog:
441 445 return self.deltaparent(rev) == nullrev
442 446 elif hasattr(self.index, 'issnapshot'):
443 447 # directly assign the method to cache the testing and access
444 448 self.issnapshot = self.index.issnapshot
445 449 return self.issnapshot(rev)
446 450 if rev == nullrev:
447 451 return True
448 452 entry = self.index[rev]
449 453 base = entry[3]
450 454 if base == rev:
451 455 return True
452 456 if base == nullrev:
453 457 return True
454 458 p1 = entry[5]
455 459 while self.length(p1) == 0:
456 460 b = self.deltaparent(p1)
457 461 if b == p1:
458 462 break
459 463 p1 = b
460 464 p2 = entry[6]
461 465 while self.length(p2) == 0:
462 466 b = self.deltaparent(p2)
463 467 if b == p2:
464 468 break
465 469 p2 = b
466 470 if base == p1 or base == p2:
467 471 return False
468 472 return self.issnapshot(base)
469 473
470 474 def _deltachain(self, rev, stoprev=None):
471 475 """Obtain the delta chain for a revision.
472 476
473 477 ``stoprev`` specifies a revision to stop at. If not specified, we
474 478 stop at the base of the chain.
475 479
476 480 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
477 481 revs in ascending order and ``stopped`` is a bool indicating whether
478 482 ``stoprev`` was hit.
479 483 """
480 484 generaldelta = self.delta_config.general_delta
481 485 # Try C implementation.
482 486 try:
483 487 return self.index.deltachain(rev, stoprev, generaldelta)
484 488 except AttributeError:
485 489 pass
486 490
487 491 chain = []
488 492
489 493 # Alias to prevent attribute lookup in tight loop.
490 494 index = self.index
491 495
492 496 iterrev = rev
493 497 e = index[iterrev]
494 498 while iterrev != e[3] and iterrev != stoprev:
495 499 chain.append(iterrev)
496 500 if generaldelta:
497 501 iterrev = e[3]
498 502 else:
499 503 iterrev -= 1
500 504 e = index[iterrev]
501 505
502 506 if iterrev == stoprev:
503 507 stopped = True
504 508 else:
505 509 chain.append(iterrev)
506 510 stopped = False
507 511
508 512 chain.reverse()
509 513 return chain, stopped
510 514
511 515 @util.propertycache
512 516 def _compressor(self):
513 517 engine = util.compengines[self.feature_config.compression_engine]
514 518 return engine.revlogcompressor(
515 519 self.feature_config.compression_engine_options
516 520 )
517 521
518 522 @util.propertycache
519 523 def _decompressor(self):
520 524 """the default decompressor"""
521 525 if self._default_compression_header is None:
522 526 return None
523 527 t = self._default_compression_header
524 528 c = self._get_decompressor(t)
525 529 return c.decompress
526 530
527 531 def _get_decompressor(self, t):
528 532 try:
529 533 compressor = self._decompressors[t]
530 534 except KeyError:
531 535 try:
532 536 engine = util.compengines.forrevlogheader(t)
533 537 compressor = engine.revlogcompressor(
534 538 self.feature_config.compression_engine_options
535 539 )
536 540 self._decompressors[t] = compressor
537 541 except KeyError:
538 542 raise error.RevlogError(
539 543 _(b'unknown compression type %s') % binascii.hexlify(t)
540 544 )
541 545 return compressor
542 546
543 547 def compress(self, data):
544 548 """Generate a possibly-compressed representation of data."""
545 549 if not data:
546 550 return b'', data
547 551
548 552 compressed = self._compressor.compress(data)
549 553
550 554 if compressed:
551 555 # The revlog compressor added the header in the returned data.
552 556 return b'', compressed
553 557
554 558 if data[0:1] == b'\0':
555 559 return b'', data
556 560 return b'u', data
557 561
558 562 def decompress(self, data):
559 563 """Decompress a revlog chunk.
560 564
561 565 The chunk is expected to begin with a header identifying the
562 566 format type so it can be routed to an appropriate decompressor.
563 567 """
564 568 if not data:
565 569 return data
566 570
567 571 # Revlogs are read much more frequently than they are written and many
568 572 # chunks only take microseconds to decompress, so performance is
569 573 # important here.
570 574 #
571 575 # We can make a few assumptions about revlogs:
572 576 #
573 577 # 1) the majority of chunks will be compressed (as opposed to inline
574 578 # raw data).
575 579 # 2) decompressing *any* data will likely by at least 10x slower than
576 580 # returning raw inline data.
577 581 # 3) we want to prioritize common and officially supported compression
578 582 # engines
579 583 #
580 584 # It follows that we want to optimize for "decompress compressed data
581 585 # when encoded with common and officially supported compression engines"
582 586 # case over "raw data" and "data encoded by less common or non-official
583 587 # compression engines." That is why we have the inline lookup first
584 588 # followed by the compengines lookup.
585 589 #
586 590 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
587 591 # compressed chunks. And this matters for changelog and manifest reads.
588 592 t = data[0:1]
589 593
590 594 if t == b'x':
591 595 try:
592 596 return _zlibdecompress(data)
593 597 except zlib.error as e:
594 598 raise error.RevlogError(
595 599 _(b'revlog decompress error: %s')
596 600 % stringutil.forcebytestr(e)
597 601 )
598 602 # '\0' is more common than 'u' so it goes first.
599 603 elif t == b'\0':
600 604 return data
601 605 elif t == b'u':
602 606 return util.buffer(data, 1)
603 607
604 608 compressor = self._get_decompressor(t)
605 609
606 610 return compressor.decompress(data)
607 611
608 612 @contextlib.contextmanager
609 613 def reading(self):
610 614 """Context manager that keeps data and sidedata files open for reading"""
611 615 if len(self.index) == 0:
612 616 yield # nothing to be read
613 617 else:
614 618 with self._segmentfile.reading():
615 619 with self._segmentfile_sidedata.reading():
616 620 yield
617 621
618 622 @property
619 623 def is_writing(self):
620 624 """True is a writing context is open"""
621 625 return self._writinghandles is not None
622 626
623 627 @property
624 628 def is_open(self):
625 629 """True if any file handle is being held
626 630
627 631 Used for assert and debug in the python code"""
628 632 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
629 633
630 634 @contextlib.contextmanager
631 635 def writing(self, transaction, data_end=None, sidedata_end=None):
632 636 """Open the revlog files for writing
633 637
634 638 Add content to a revlog should be done within such context.
635 639 """
636 640 if self.is_writing:
637 641 yield
638 642 else:
639 643 ifh = dfh = sdfh = None
640 644 try:
641 645 r = len(self.index)
642 646 # opening the data file.
643 647 dsize = 0
644 648 if r:
645 649 dsize = self.end(r - 1)
646 650 dfh = None
647 651 if not self.inline:
648 652 try:
649 653 dfh = self.opener(self.data_file, mode=b"r+")
650 654 if data_end is None:
651 655 dfh.seek(0, os.SEEK_END)
652 656 else:
653 657 dfh.seek(data_end, os.SEEK_SET)
654 658 except FileNotFoundError:
655 659 dfh = self.opener(self.data_file, mode=b"w+")
656 660 transaction.add(self.data_file, dsize)
657 661 if self.sidedata_file is not None:
658 662 assert sidedata_end is not None
659 663 # revlog-v2 does not inline, help Pytype
660 664 assert dfh is not None
661 665 try:
662 666 sdfh = self.opener(self.sidedata_file, mode=b"r+")
663 667 dfh.seek(sidedata_end, os.SEEK_SET)
664 668 except FileNotFoundError:
665 669 sdfh = self.opener(self.sidedata_file, mode=b"w+")
666 670 transaction.add(self.sidedata_file, sidedata_end)
667 671
668 672 # opening the index file.
669 673 isize = r * self.index.entry_size
670 674 ifh = self.__index_write_fp()
671 675 if self.inline:
672 676 transaction.add(self.index_file, dsize + isize)
673 677 else:
674 678 transaction.add(self.index_file, isize)
675 679 # exposing all file handle for writing.
676 680 self._writinghandles = (ifh, dfh, sdfh)
677 681 self._segmentfile.writing_handle = ifh if self.inline else dfh
678 682 self._segmentfile_sidedata.writing_handle = sdfh
679 683 yield
680 684 finally:
681 685 self._writinghandles = None
682 686 self._segmentfile.writing_handle = None
683 687 self._segmentfile_sidedata.writing_handle = None
684 688 if dfh is not None:
685 689 dfh.close()
686 690 if sdfh is not None:
687 691 sdfh.close()
688 692 # closing the index file last to avoid exposing referent to
689 693 # potential unflushed data content.
690 694 if ifh is not None:
691 695 ifh.close()
692 696
693 697 def __index_write_fp(self, index_end=None):
694 698 """internal method to open the index file for writing
695 699
696 700 You should not use this directly and use `_writing` instead
697 701 """
698 702 try:
699 703 f = self.opener(
700 704 self.index_file,
701 705 mode=b"r+",
702 706 checkambig=self.data_config.check_ambig,
703 707 )
704 708 if index_end is None:
705 709 f.seek(0, os.SEEK_END)
706 710 else:
707 711 f.seek(index_end, os.SEEK_SET)
708 712 return f
709 713 except FileNotFoundError:
710 714 return self.opener(
711 715 self.index_file,
712 716 mode=b"w+",
713 717 checkambig=self.data_config.check_ambig,
714 718 )
715 719
716 720 def __index_new_fp(self):
717 721 """internal method to create a new index file for writing
718 722
719 723 You should not use this unless you are upgrading from inline revlog
720 724 """
721 725 return self.opener(
722 726 self.index_file,
723 727 mode=b"w",
724 728 checkambig=self.data_config.check_ambig,
725 729 atomictemp=True,
726 730 )
727 731
728 732 def split_inline(self, tr, header, new_index_file_path=None):
729 733 """split the data of an inline revlog into an index and a data file"""
730 734 existing_handles = False
731 735 if self._writinghandles is not None:
732 736 existing_handles = True
733 737 fp = self._writinghandles[0]
734 738 fp.flush()
735 739 fp.close()
736 740 # We can't use the cached file handle after close(). So prevent
737 741 # its usage.
738 742 self._writinghandles = None
739 743 self._segmentfile.writing_handle = None
740 744 # No need to deal with sidedata writing handle as it is only
741 745 # relevant with revlog-v2 which is never inline, not reaching
742 746 # this code
743 747
744 748 new_dfh = self.opener(self.data_file, mode=b"w+")
745 749 new_dfh.truncate(0) # drop any potentially existing data
746 750 try:
747 751 with self.reading():
748 752 for r in range(len(self.index)):
749 753 new_dfh.write(self.get_segment_for_revs(r, r)[1])
750 754 new_dfh.flush()
751 755
752 756 if new_index_file_path is not None:
753 757 self.index_file = new_index_file_path
754 758 with self.__index_new_fp() as fp:
755 759 self.inline = False
756 760 for i in range(len(self.index)):
757 761 e = self.index.entry_binary(i)
758 762 if i == 0:
759 763 packed_header = self.index.pack_header(header)
760 764 e = packed_header + e
761 765 fp.write(e)
762 766
763 767 # If we don't use side-write, the temp file replace the real
764 768 # index when we exit the context manager
765 769
766 770 self._segmentfile = randomaccessfile.randomaccessfile(
767 771 self.opener,
768 772 self.data_file,
769 773 self.data_config.chunk_cache_size,
770 774 )
771 775
772 776 if existing_handles:
773 777 # switched from inline to conventional reopen the index
774 778 ifh = self.__index_write_fp()
775 779 self._writinghandles = (ifh, new_dfh, None)
776 780 self._segmentfile.writing_handle = new_dfh
777 781 new_dfh = None
778 782 # No need to deal with sidedata writing handle as it is only
779 783 # relevant with revlog-v2 which is never inline, not reaching
780 784 # this code
781 785 finally:
782 786 if new_dfh is not None:
783 787 new_dfh.close()
784 788 return self.index_file
785 789
786 790 def get_segment_for_revs(self, startrev, endrev):
787 791 """Obtain a segment of raw data corresponding to a range of revisions.
788 792
789 793 Accepts the start and end revisions and an optional already-open
790 794 file handle to be used for reading. If the file handle is read, its
791 795 seek position will not be preserved.
792 796
793 797 Requests for data may be satisfied by a cache.
794 798
795 799 Returns a 2-tuple of (offset, data) for the requested range of
796 800 revisions. Offset is the integer offset from the beginning of the
797 801 revlog and data is a str or buffer of the raw byte data.
798 802
799 803 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
800 804 to determine where each revision's data begins and ends.
801 805
802 806 API: we should consider making this a private part of the InnerRevlog
803 807 at some point.
804 808 """
805 809 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
806 810 # (functions are expensive).
807 811 index = self.index
808 812 istart = index[startrev]
809 813 start = int(istart[0] >> 16)
810 814 if startrev == endrev:
811 815 end = start + istart[1]
812 816 else:
813 817 iend = index[endrev]
814 818 end = int(iend[0] >> 16) + iend[1]
815 819
816 820 if self.inline:
817 821 start += (startrev + 1) * self.index.entry_size
818 822 end += (endrev + 1) * self.index.entry_size
819 823 length = end - start
820 824
821 825 return start, self._segmentfile.read_chunk(start, length)
822 826
823 827 def _chunk(self, rev):
824 828 """Obtain a single decompressed chunk for a revision.
825 829
826 830 Accepts an integer revision and an optional already-open file handle
827 831 to be used for reading. If used, the seek position of the file will not
828 832 be preserved.
829 833
830 834 Returns a str holding uncompressed data for the requested revision.
831 835 """
832 836 compression_mode = self.index[rev][10]
833 837 data = self.get_segment_for_revs(rev, rev)[1]
834 838 if compression_mode == COMP_MODE_PLAIN:
835 839 return data
836 840 elif compression_mode == COMP_MODE_DEFAULT:
837 841 return self._decompressor(data)
838 842 elif compression_mode == COMP_MODE_INLINE:
839 843 return self.decompress(data)
840 844 else:
841 845 msg = b'unknown compression mode %d'
842 846 msg %= compression_mode
843 847 raise error.RevlogError(msg)
844 848
845 849 def _chunks(self, revs, targetsize=None):
846 850 """Obtain decompressed chunks for the specified revisions.
847 851
848 852 Accepts an iterable of numeric revisions that are assumed to be in
849 853 ascending order. Also accepts an optional already-open file handle
850 854 to be used for reading. If used, the seek position of the file will
851 855 not be preserved.
852 856
853 857 This function is similar to calling ``self._chunk()`` multiple times,
854 858 but is faster.
855 859
856 860 Returns a list with decompressed data for each requested revision.
857 861 """
858 862 if not revs:
859 863 return []
860 864 start = self.start
861 865 length = self.length
862 866 inline = self.inline
863 867 iosize = self.index.entry_size
864 868 buffer = util.buffer
865 869
866 870 l = []
867 871 ladd = l.append
868 872
869 873 if not self.data_config.with_sparse_read:
870 874 slicedchunks = (revs,)
871 875 else:
872 876 slicedchunks = deltautil.slicechunk(
873 877 self,
874 878 revs,
875 879 targetsize=targetsize,
876 880 )
877 881
878 882 for revschunk in slicedchunks:
879 883 firstrev = revschunk[0]
880 884 # Skip trailing revisions with empty diff
881 885 for lastrev in revschunk[::-1]:
882 886 if length(lastrev) != 0:
883 887 break
884 888
885 889 try:
886 890 offset, data = self.get_segment_for_revs(firstrev, lastrev)
887 891 except OverflowError:
888 892 # issue4215 - we can't cache a run of chunks greater than
889 893 # 2G on Windows
890 894 return [self._chunk(rev) for rev in revschunk]
891 895
892 896 decomp = self.decompress
893 897 # self._decompressor might be None, but will not be used in that case
894 898 def_decomp = self._decompressor
895 899 for rev in revschunk:
896 900 chunkstart = start(rev)
897 901 if inline:
898 902 chunkstart += (rev + 1) * iosize
899 903 chunklength = length(rev)
900 904 comp_mode = self.index[rev][10]
901 905 c = buffer(data, chunkstart - offset, chunklength)
902 906 if comp_mode == COMP_MODE_PLAIN:
903 907 ladd(c)
904 908 elif comp_mode == COMP_MODE_INLINE:
905 909 ladd(decomp(c))
906 910 elif comp_mode == COMP_MODE_DEFAULT:
907 911 ladd(def_decomp(c))
908 912 else:
909 913 msg = b'unknown compression mode %d'
910 914 msg %= comp_mode
911 915 raise error.RevlogError(msg)
912 916
913 917 return l
914 918
915 919 def raw_text(self, node, rev):
916 920 """return the possibly unvalidated rawtext for a revision
917 921
918 922 returns (rev, rawtext, validated)
919 923 """
920 924
921 925 # revision in the cache (could be useful to apply delta)
922 926 cachedrev = None
923 927 # An intermediate text to apply deltas to
924 928 basetext = None
925 929
926 930 # Check if we have the entry in cache
927 931 # The cache entry looks like (node, rev, rawtext)
928 932 if self._revisioncache:
929 933 cachedrev = self._revisioncache[1]
930 934
931 935 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
932 936 if stopped:
933 937 basetext = self._revisioncache[2]
934 938
935 939 # drop cache to save memory, the caller is expected to
936 940 # update self._inner._revisioncache after validating the text
937 941 self._revisioncache = None
938 942
939 943 targetsize = None
940 944 rawsize = self.index[rev][2]
941 945 if 0 <= rawsize:
942 946 targetsize = 4 * rawsize
943 947
944 948 bins = self._chunks(chain, targetsize=targetsize)
945 949 if basetext is None:
946 950 basetext = bytes(bins[0])
947 951 bins = bins[1:]
948 952
949 953 rawtext = mdiff.patches(basetext, bins)
950 954 del basetext # let us have a chance to free memory early
951 955 return (rev, rawtext, False)
952 956
953 957 def sidedata(self, rev, sidedata_end):
954 958 """Return the sidedata for a given revision number."""
955 959 index_entry = self.index[rev]
956 960 sidedata_offset = index_entry[8]
957 961 sidedata_size = index_entry[9]
958 962
959 963 if self.inline:
960 964 sidedata_offset += self.index.entry_size * (1 + rev)
961 965 if sidedata_size == 0:
962 966 return {}
963 967
964 968 if sidedata_end < sidedata_offset + sidedata_size:
965 969 filename = self.sidedata_file
966 970 end = sidedata_end
967 971 offset = sidedata_offset
968 972 length = sidedata_size
969 973 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
970 974 raise error.RevlogError(m)
971 975
972 976 comp_segment = self._segmentfile_sidedata.read_chunk(
973 977 sidedata_offset, sidedata_size
974 978 )
975 979
976 980 comp = self.index[rev][11]
977 981 if comp == COMP_MODE_PLAIN:
978 982 segment = comp_segment
979 983 elif comp == COMP_MODE_DEFAULT:
980 984 segment = self._decompressor(comp_segment)
981 985 elif comp == COMP_MODE_INLINE:
982 986 segment = self.decompress(comp_segment)
983 987 else:
984 988 msg = b'unknown compression mode %d'
985 989 msg %= comp
986 990 raise error.RevlogError(msg)
987 991
988 992 sidedata = sidedatautil.deserialize_sidedata(segment)
989 993 return sidedata
990 994
991 995 def write_entry(
992 996 self,
993 997 transaction,
994 998 entry,
995 999 data,
996 1000 link,
997 1001 offset,
998 1002 sidedata,
999 1003 sidedata_offset,
1000 1004 index_end,
1001 1005 data_end,
1002 1006 sidedata_end,
1003 1007 ):
1004 1008 # Files opened in a+ mode have inconsistent behavior on various
1005 1009 # platforms. Windows requires that a file positioning call be made
1006 1010 # when the file handle transitions between reads and writes. See
1007 1011 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1008 1012 # platforms, Python or the platform itself can be buggy. Some versions
1009 1013 # of Solaris have been observed to not append at the end of the file
1010 1014 # if the file was seeked to before the end. See issue4943 for more.
1011 1015 #
1012 1016 # We work around this issue by inserting a seek() before writing.
1013 1017 # Note: This is likely not necessary on Python 3. However, because
1014 1018 # the file handle is reused for reads and may be seeked there, we need
1015 1019 # to be careful before changing this.
1016 1020 if self._writinghandles is None:
1017 1021 msg = b'adding revision outside `revlog._writing` context'
1018 1022 raise error.ProgrammingError(msg)
1019 1023 ifh, dfh, sdfh = self._writinghandles
1020 1024 if index_end is None:
1021 1025 ifh.seek(0, os.SEEK_END)
1022 1026 else:
1023 1027 ifh.seek(index_end, os.SEEK_SET)
1024 1028 if dfh:
1025 1029 if data_end is None:
1026 1030 dfh.seek(0, os.SEEK_END)
1027 1031 else:
1028 1032 dfh.seek(data_end, os.SEEK_SET)
1029 1033 if sdfh:
1030 1034 sdfh.seek(sidedata_end, os.SEEK_SET)
1031 1035
1032 1036 curr = len(self.index) - 1
1033 1037 if not self.inline:
1034 1038 transaction.add(self.data_file, offset)
1035 1039 if self.sidedata_file:
1036 1040 transaction.add(self.sidedata_file, sidedata_offset)
1037 transaction.add(self.index_file, curr * len(entry))
1041 transaction.add(self.canonical_index_file, curr * len(entry))
1038 1042 if data[0]:
1039 1043 dfh.write(data[0])
1040 1044 dfh.write(data[1])
1041 1045 if sidedata:
1042 1046 sdfh.write(sidedata)
1043 1047 ifh.write(entry)
1044 1048 else:
1045 1049 offset += curr * self.index.entry_size
1046 transaction.add(self.index_file, offset)
1050 transaction.add(self.canonical_index_file, offset)
1047 1051 ifh.write(entry)
1048 1052 ifh.write(data[0])
1049 1053 ifh.write(data[1])
1050 1054 assert not sidedata
1051 1055 return (
1052 1056 ifh.tell(),
1053 1057 dfh.tell() if dfh else None,
1054 1058 sdfh.tell() if sdfh else None,
1055 1059 )
1056 1060
1057 1061
1058 1062 class revlog:
1059 1063 """
1060 1064 the underlying revision storage object
1061 1065
1062 1066 A revlog consists of two parts, an index and the revision data.
1063 1067
1064 1068 The index is a file with a fixed record size containing
1065 1069 information on each revision, including its nodeid (hash), the
1066 1070 nodeids of its parents, the position and offset of its data within
1067 1071 the data file, and the revision it's based on. Finally, each entry
1068 1072 contains a linkrev entry that can serve as a pointer to external
1069 1073 data.
1070 1074
1071 1075 The revision data itself is a linear collection of data chunks.
1072 1076 Each chunk represents a revision and is usually represented as a
1073 1077 delta against the previous chunk. To bound lookup time, runs of
1074 1078 deltas are limited to about 2 times the length of the original
1075 1079 version data. This makes retrieval of a version proportional to
1076 1080 its size, or O(1) relative to the number of revisions.
1077 1081
1078 1082 Both pieces of the revlog are written to in an append-only
1079 1083 fashion, which means we never need to rewrite a file to insert or
1080 1084 remove data, and can use some simple techniques to avoid the need
1081 1085 for locking while reading.
1082 1086
1083 1087 If checkambig, indexfile is opened with checkambig=True at
1084 1088 writing, to avoid file stat ambiguity.
1085 1089
1086 1090 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1087 1091 index will be mmapped rather than read if it is larger than the
1088 1092 configured threshold.
1089 1093
1090 1094 If censorable is True, the revlog can have censored revisions.
1091 1095
1092 1096 If `upperboundcomp` is not None, this is the expected maximal gain from
1093 1097 compression for the data content.
1094 1098
1095 1099 `concurrencychecker` is an optional function that receives 3 arguments: a
1096 1100 file handle, a filename, and an expected position. It should check whether
1097 1101 the current position in the file handle is valid, and log/warn/fail (by
1098 1102 raising).
1099 1103
1100 1104 See mercurial/revlogutils/contants.py for details about the content of an
1101 1105 index entry.
1102 1106 """
1103 1107
1104 1108 _flagserrorclass = error.RevlogError
1105 1109
1106 1110 @staticmethod
1107 1111 def is_inline_index(header_bytes):
1108 1112 """Determine if a revlog is inline from the initial bytes of the index"""
1109 1113 header = INDEX_HEADER.unpack(header_bytes)[0]
1110 1114
1111 1115 _format_flags = header & ~0xFFFF
1112 1116 _format_version = header & 0xFFFF
1113 1117
1114 1118 features = FEATURES_BY_VERSION[_format_version]
1115 1119 return features[b'inline'](_format_flags)
1116 1120
1117 1121 def __init__(
1118 1122 self,
1119 1123 opener,
1120 1124 target,
1121 1125 radix,
1122 1126 postfix=None, # only exist for `tmpcensored` now
1123 1127 checkambig=False,
1124 1128 mmaplargeindex=False,
1125 1129 censorable=False,
1126 1130 upperboundcomp=None,
1127 1131 persistentnodemap=False,
1128 1132 concurrencychecker=None,
1129 1133 trypending=False,
1130 1134 try_split=False,
1131 1135 canonical_parent_order=True,
1132 1136 ):
1133 1137 """
1134 1138 create a revlog object
1135 1139
1136 1140 opener is a function that abstracts the file opening operation
1137 1141 and can be used to implement COW semantics or the like.
1138 1142
1139 1143 `target`: a (KIND, ID) tuple that identify the content stored in
1140 1144 this revlog. It help the rest of the code to understand what the revlog
1141 1145 is about without having to resort to heuristic and index filename
1142 1146 analysis. Note: that this must be reliably be set by normal code, but
1143 1147 that test, debug, or performance measurement code might not set this to
1144 1148 accurate value.
1145 1149 """
1146 1150
1147 1151 self.radix = radix
1148 1152
1149 1153 self._docket_file = None
1150 1154 self._indexfile = None
1151 1155 self._datafile = None
1152 1156 self._sidedatafile = None
1153 1157 self._nodemap_file = None
1154 1158 self.postfix = postfix
1155 1159 self._trypending = trypending
1156 1160 self._try_split = try_split
1157 1161 self.opener = opener
1158 1162 if persistentnodemap:
1159 1163 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1160 1164
1161 1165 assert target[0] in ALL_KINDS
1162 1166 assert len(target) == 2
1163 1167 self.target = target
1164 1168 if b'feature-config' in self.opener.options:
1165 1169 self.feature_config = self.opener.options[b'feature-config'].copy()
1166 1170 else:
1167 1171 self.feature_config = FeatureConfig()
1168 1172 self.feature_config.censorable = censorable
1169 1173 self.feature_config.canonical_parent_order = canonical_parent_order
1170 1174 if b'data-config' in self.opener.options:
1171 1175 self.data_config = self.opener.options[b'data-config'].copy()
1172 1176 else:
1173 1177 self.data_config = DataConfig()
1174 1178 self.data_config.check_ambig = checkambig
1175 1179 self.data_config.mmap_large_index = mmaplargeindex
1176 1180 if b'delta-config' in self.opener.options:
1177 1181 self.delta_config = self.opener.options[b'delta-config'].copy()
1178 1182 else:
1179 1183 self.delta_config = DeltaConfig()
1180 1184 self.delta_config.upper_bound_comp = upperboundcomp
1181 1185
1182 1186 # Maps rev to chain base rev.
1183 1187 self._chainbasecache = util.lrucachedict(100)
1184 1188
1185 1189 self.index = None
1186 1190 self._docket = None
1187 1191 self._nodemap_docket = None
1188 1192 # Mapping of partial identifiers to full nodes.
1189 1193 self._pcache = {}
1190 1194
1191 1195 # other optionnals features
1192 1196
1193 1197 # Make copy of flag processors so each revlog instance can support
1194 1198 # custom flags.
1195 1199 self._flagprocessors = dict(flagutil.flagprocessors)
1196 1200 # prevent nesting of addgroup
1197 1201 self._adding_group = None
1198 1202
1199 1203 chunk_cache = self._loadindex()
1200 1204 self._load_inner(chunk_cache)
1201 1205 self._concurrencychecker = concurrencychecker
1202 1206
1203 1207 @property
1204 1208 def _generaldelta(self):
1205 1209 """temporary compatibility proxy"""
1206 1210 util.nouideprecwarn(
1207 1211 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1208 1212 )
1209 1213 return self.delta_config.general_delta
1210 1214
1211 1215 @property
1212 1216 def _checkambig(self):
1213 1217 """temporary compatibility proxy"""
1214 1218 util.nouideprecwarn(
1215 1219 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1216 1220 )
1217 1221 return self.data_config.check_ambig
1218 1222
1219 1223 @property
1220 1224 def _mmaplargeindex(self):
1221 1225 """temporary compatibility proxy"""
1222 1226 util.nouideprecwarn(
1223 1227 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1224 1228 )
1225 1229 return self.data_config.mmap_large_index
1226 1230
1227 1231 @property
1228 1232 def _censorable(self):
1229 1233 """temporary compatibility proxy"""
1230 1234 util.nouideprecwarn(
1231 1235 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1232 1236 )
1233 1237 return self.feature_config.censorable
1234 1238
1235 1239 @property
1236 1240 def _chunkcachesize(self):
1237 1241 """temporary compatibility proxy"""
1238 1242 util.nouideprecwarn(
1239 1243 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1240 1244 )
1241 1245 return self.data_config.chunk_cache_size
1242 1246
1243 1247 @property
1244 1248 def _maxchainlen(self):
1245 1249 """temporary compatibility proxy"""
1246 1250 util.nouideprecwarn(
1247 1251 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1248 1252 )
1249 1253 return self.delta_config.max_chain_len
1250 1254
1251 1255 @property
1252 1256 def _deltabothparents(self):
1253 1257 """temporary compatibility proxy"""
1254 1258 util.nouideprecwarn(
1255 1259 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1256 1260 )
1257 1261 return self.delta_config.delta_both_parents
1258 1262
1259 1263 @property
1260 1264 def _candidate_group_chunk_size(self):
1261 1265 """temporary compatibility proxy"""
1262 1266 util.nouideprecwarn(
1263 1267 b"use revlog.delta_config.candidate_group_chunk_size",
1264 1268 b"6.6",
1265 1269 stacklevel=2,
1266 1270 )
1267 1271 return self.delta_config.candidate_group_chunk_size
1268 1272
1269 1273 @property
1270 1274 def _debug_delta(self):
1271 1275 """temporary compatibility proxy"""
1272 1276 util.nouideprecwarn(
1273 1277 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1274 1278 )
1275 1279 return self.delta_config.debug_delta
1276 1280
1277 1281 @property
1278 1282 def _compengine(self):
1279 1283 """temporary compatibility proxy"""
1280 1284 util.nouideprecwarn(
1281 1285 b"use revlog.feature_config.compression_engine",
1282 1286 b"6.6",
1283 1287 stacklevel=2,
1284 1288 )
1285 1289 return self.feature_config.compression_engine
1286 1290
1287 1291 @property
1288 1292 def upperboundcomp(self):
1289 1293 """temporary compatibility proxy"""
1290 1294 util.nouideprecwarn(
1291 1295 b"use revlog.delta_config.upper_bound_comp",
1292 1296 b"6.6",
1293 1297 stacklevel=2,
1294 1298 )
1295 1299 return self.delta_config.upper_bound_comp
1296 1300
1297 1301 @property
1298 1302 def _compengineopts(self):
1299 1303 """temporary compatibility proxy"""
1300 1304 util.nouideprecwarn(
1301 1305 b"use revlog.feature_config.compression_engine_options",
1302 1306 b"6.6",
1303 1307 stacklevel=2,
1304 1308 )
1305 1309 return self.feature_config.compression_engine_options
1306 1310
1307 1311 @property
1308 1312 def _maxdeltachainspan(self):
1309 1313 """temporary compatibility proxy"""
1310 1314 util.nouideprecwarn(
1311 1315 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1312 1316 )
1313 1317 return self.delta_config.max_deltachain_span
1314 1318
1315 1319 @property
1316 1320 def _withsparseread(self):
1317 1321 """temporary compatibility proxy"""
1318 1322 util.nouideprecwarn(
1319 1323 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1320 1324 )
1321 1325 return self.data_config.with_sparse_read
1322 1326
1323 1327 @property
1324 1328 def _sparserevlog(self):
1325 1329 """temporary compatibility proxy"""
1326 1330 util.nouideprecwarn(
1327 1331 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1328 1332 )
1329 1333 return self.delta_config.sparse_revlog
1330 1334
1331 1335 @property
1332 1336 def hassidedata(self):
1333 1337 """temporary compatibility proxy"""
1334 1338 util.nouideprecwarn(
1335 1339 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1336 1340 )
1337 1341 return self.feature_config.has_side_data
1338 1342
1339 1343 @property
1340 1344 def _srdensitythreshold(self):
1341 1345 """temporary compatibility proxy"""
1342 1346 util.nouideprecwarn(
1343 1347 b"use revlog.data_config.sr_density_threshold",
1344 1348 b"6.6",
1345 1349 stacklevel=2,
1346 1350 )
1347 1351 return self.data_config.sr_density_threshold
1348 1352
1349 1353 @property
1350 1354 def _srmingapsize(self):
1351 1355 """temporary compatibility proxy"""
1352 1356 util.nouideprecwarn(
1353 1357 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1354 1358 )
1355 1359 return self.data_config.sr_min_gap_size
1356 1360
1357 1361 @property
1358 1362 def _compute_rank(self):
1359 1363 """temporary compatibility proxy"""
1360 1364 util.nouideprecwarn(
1361 1365 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1362 1366 )
1363 1367 return self.feature_config.compute_rank
1364 1368
1365 1369 @property
1366 1370 def canonical_parent_order(self):
1367 1371 """temporary compatibility proxy"""
1368 1372 util.nouideprecwarn(
1369 1373 b"use revlog.feature_config.canonical_parent_order",
1370 1374 b"6.6",
1371 1375 stacklevel=2,
1372 1376 )
1373 1377 return self.feature_config.canonical_parent_order
1374 1378
1375 1379 @property
1376 1380 def _lazydelta(self):
1377 1381 """temporary compatibility proxy"""
1378 1382 util.nouideprecwarn(
1379 1383 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1380 1384 )
1381 1385 return self.delta_config.lazy_delta
1382 1386
1383 1387 @property
1384 1388 def _lazydeltabase(self):
1385 1389 """temporary compatibility proxy"""
1386 1390 util.nouideprecwarn(
1387 1391 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1388 1392 )
1389 1393 return self.delta_config.lazy_delta_base
1390 1394
1391 1395 def _init_opts(self):
1392 1396 """process options (from above/config) to setup associated default revlog mode
1393 1397
1394 1398 These values might be affected when actually reading on disk information.
1395 1399
1396 1400 The relevant values are returned for use in _loadindex().
1397 1401
1398 1402 * newversionflags:
1399 1403 version header to use if we need to create a new revlog
1400 1404
1401 1405 * mmapindexthreshold:
1402 1406 minimal index size for start to use mmap
1403 1407
1404 1408 * force_nodemap:
1405 1409 force the usage of a "development" version of the nodemap code
1406 1410 """
1407 1411 opts = self.opener.options
1408 1412
1409 1413 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1410 1414 new_header = CHANGELOGV2
1411 1415 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1412 1416 self.feature_config.compute_rank = compute_rank
1413 1417 elif b'revlogv2' in opts:
1414 1418 new_header = REVLOGV2
1415 1419 elif b'revlogv1' in opts:
1416 1420 new_header = REVLOGV1 | FLAG_INLINE_DATA
1417 1421 if b'generaldelta' in opts:
1418 1422 new_header |= FLAG_GENERALDELTA
1419 1423 elif b'revlogv0' in self.opener.options:
1420 1424 new_header = REVLOGV0
1421 1425 else:
1422 1426 new_header = REVLOG_DEFAULT_VERSION
1423 1427
1424 1428 mmapindexthreshold = None
1425 1429 if self.data_config.mmap_large_index:
1426 1430 mmapindexthreshold = self.data_config.mmap_index_threshold
1427 1431 if self.feature_config.enable_ellipsis:
1428 1432 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1429 1433
1430 1434 # revlog v0 doesn't have flag processors
1431 1435 for flag, processor in opts.get(b'flagprocessors', {}).items():
1432 1436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1433 1437
1434 1438 chunk_cache_size = self.data_config.chunk_cache_size
1435 1439 if chunk_cache_size <= 0:
1436 1440 raise error.RevlogError(
1437 1441 _(b'revlog chunk cache size %r is not greater than 0')
1438 1442 % chunk_cache_size
1439 1443 )
1440 1444 elif chunk_cache_size & (chunk_cache_size - 1):
1441 1445 raise error.RevlogError(
1442 1446 _(b'revlog chunk cache size %r is not a power of 2')
1443 1447 % chunk_cache_size
1444 1448 )
1445 1449 force_nodemap = opts.get(b'devel-force-nodemap', False)
1446 1450 return new_header, mmapindexthreshold, force_nodemap
1447 1451
1448 1452 def _get_data(self, filepath, mmap_threshold, size=None):
1449 1453 """return a file content with or without mmap
1450 1454
1451 1455 If the file is missing return the empty string"""
1452 1456 try:
1453 1457 with self.opener(filepath) as fp:
1454 1458 if mmap_threshold is not None:
1455 1459 file_size = self.opener.fstat(fp).st_size
1456 1460 if file_size >= mmap_threshold:
1457 1461 if size is not None:
1458 1462 # avoid potentiel mmap crash
1459 1463 size = min(file_size, size)
1460 1464 # TODO: should .close() to release resources without
1461 1465 # relying on Python GC
1462 1466 if size is None:
1463 1467 return util.buffer(util.mmapread(fp))
1464 1468 else:
1465 1469 return util.buffer(util.mmapread(fp, size))
1466 1470 if size is None:
1467 1471 return fp.read()
1468 1472 else:
1469 1473 return fp.read(size)
1470 1474 except FileNotFoundError:
1471 1475 return b''
1472 1476
1473 1477 def get_streams(self, max_linkrev, force_inline=False):
1474 1478 """return a list of streams that represent this revlog
1475 1479
1476 1480 This is used by stream-clone to do bytes to bytes copies of a repository.
1477 1481
1478 1482 This streams data for all revisions that refer to a changelog revision up
1479 1483 to `max_linkrev`.
1480 1484
1481 1485 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1482 1486
1483 1487 It returns is a list of three-tuple:
1484 1488
1485 1489 [
1486 1490 (filename, bytes_stream, stream_size),
1487 1491 …
1488 1492 ]
1489 1493 """
1490 1494 n = len(self)
1491 1495 index = self.index
1492 1496 while n > 0:
1493 1497 linkrev = index[n - 1][4]
1494 1498 if linkrev < max_linkrev:
1495 1499 break
1496 1500 # note: this loop will rarely go through multiple iterations, since
1497 1501 # it only traverses commits created during the current streaming
1498 1502 # pull operation.
1499 1503 #
1500 1504 # If this become a problem, using a binary search should cap the
1501 1505 # runtime of this.
1502 1506 n = n - 1
1503 1507 if n == 0:
1504 1508 # no data to send
1505 1509 return []
1506 1510 index_size = n * index.entry_size
1507 1511 data_size = self.end(n - 1)
1508 1512
1509 1513 # XXX we might have been split (or stripped) since the object
1510 1514 # initialization, We need to close this race too, but having a way to
1511 1515 # pre-open the file we feed to the revlog and never closing them before
1512 1516 # we are done streaming.
1513 1517
1514 1518 if self._inline:
1515 1519
1516 1520 def get_stream():
1517 1521 with self.opener(self._indexfile, mode=b"r") as fp:
1518 1522 yield None
1519 1523 size = index_size + data_size
1520 1524 if size <= 65536:
1521 1525 yield fp.read(size)
1522 1526 else:
1523 1527 yield from util.filechunkiter(fp, limit=size)
1524 1528
1525 1529 inline_stream = get_stream()
1526 1530 next(inline_stream)
1527 1531 return [
1528 1532 (self._indexfile, inline_stream, index_size + data_size),
1529 1533 ]
1530 1534 elif force_inline:
1531 1535
1532 1536 def get_stream():
1533 1537 with self.reading():
1534 1538 yield None
1535 1539
1536 1540 for rev in range(n):
1537 1541 idx = self.index.entry_binary(rev)
1538 1542 if rev == 0 and self._docket is None:
1539 1543 # re-inject the inline flag
1540 1544 header = self._format_flags
1541 1545 header |= self._format_version
1542 1546 header |= FLAG_INLINE_DATA
1543 1547 header = self.index.pack_header(header)
1544 1548 idx = header + idx
1545 1549 yield idx
1546 1550 yield self._inner.get_segment_for_revs(rev, rev)[1]
1547 1551
1548 1552 inline_stream = get_stream()
1549 1553 next(inline_stream)
1550 1554 return [
1551 1555 (self._indexfile, inline_stream, index_size + data_size),
1552 1556 ]
1553 1557 else:
1554 1558
1555 1559 def get_index_stream():
1556 1560 with self.opener(self._indexfile, mode=b"r") as fp:
1557 1561 yield None
1558 1562 if index_size <= 65536:
1559 1563 yield fp.read(index_size)
1560 1564 else:
1561 1565 yield from util.filechunkiter(fp, limit=index_size)
1562 1566
1563 1567 def get_data_stream():
1564 1568 with self._datafp() as fp:
1565 1569 yield None
1566 1570 if data_size <= 65536:
1567 1571 yield fp.read(data_size)
1568 1572 else:
1569 1573 yield from util.filechunkiter(fp, limit=data_size)
1570 1574
1571 1575 index_stream = get_index_stream()
1572 1576 next(index_stream)
1573 1577 data_stream = get_data_stream()
1574 1578 next(data_stream)
1575 1579 return [
1576 1580 (self._datafile, data_stream, data_size),
1577 1581 (self._indexfile, index_stream, index_size),
1578 1582 ]
1579 1583
1580 1584 def _loadindex(self, docket=None):
1581 1585
1582 1586 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1583 1587
1584 1588 if self.postfix is not None:
1585 1589 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1586 1590 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1587 1591 entry_point = b'%s.i.a' % self.radix
1588 1592 elif self._try_split and self.opener.exists(self._split_index_file):
1589 1593 entry_point = self._split_index_file
1590 1594 else:
1591 1595 entry_point = b'%s.i' % self.radix
1592 1596
1593 1597 if docket is not None:
1594 1598 self._docket = docket
1595 1599 self._docket_file = entry_point
1596 1600 else:
1597 1601 self._initempty = True
1598 1602 entry_data = self._get_data(entry_point, mmapindexthreshold)
1599 1603 if len(entry_data) > 0:
1600 1604 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1601 1605 self._initempty = False
1602 1606 else:
1603 1607 header = new_header
1604 1608
1605 1609 self._format_flags = header & ~0xFFFF
1606 1610 self._format_version = header & 0xFFFF
1607 1611
1608 1612 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1609 1613 if supported_flags is None:
1610 1614 msg = _(b'unknown version (%d) in revlog %s')
1611 1615 msg %= (self._format_version, self.display_id)
1612 1616 raise error.RevlogError(msg)
1613 1617 elif self._format_flags & ~supported_flags:
1614 1618 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1615 1619 display_flag = self._format_flags >> 16
1616 1620 msg %= (display_flag, self._format_version, self.display_id)
1617 1621 raise error.RevlogError(msg)
1618 1622
1619 1623 features = FEATURES_BY_VERSION[self._format_version]
1620 1624 self._inline = features[b'inline'](self._format_flags)
1621 1625 self.delta_config.general_delta = features[b'generaldelta'](
1622 1626 self._format_flags
1623 1627 )
1624 1628 self.feature_config.has_side_data = features[b'sidedata']
1625 1629
1626 1630 if not features[b'docket']:
1627 1631 self._indexfile = entry_point
1628 1632 index_data = entry_data
1629 1633 else:
1630 1634 self._docket_file = entry_point
1631 1635 if self._initempty:
1632 1636 self._docket = docketutil.default_docket(self, header)
1633 1637 else:
1634 1638 self._docket = docketutil.parse_docket(
1635 1639 self, entry_data, use_pending=self._trypending
1636 1640 )
1637 1641
1638 1642 if self._docket is not None:
1639 1643 self._indexfile = self._docket.index_filepath()
1640 1644 index_data = b''
1641 1645 index_size = self._docket.index_end
1642 1646 if index_size > 0:
1643 1647 index_data = self._get_data(
1644 1648 self._indexfile, mmapindexthreshold, size=index_size
1645 1649 )
1646 1650 if len(index_data) < index_size:
1647 1651 msg = _(b'too few index data for %s: got %d, expected %d')
1648 1652 msg %= (self.display_id, len(index_data), index_size)
1649 1653 raise error.RevlogError(msg)
1650 1654
1651 1655 self._inline = False
1652 1656 # generaldelta implied by version 2 revlogs.
1653 1657 self.delta_config.general_delta = True
1654 1658 # the logic for persistent nodemap will be dealt with within the
1655 1659 # main docket, so disable it for now.
1656 1660 self._nodemap_file = None
1657 1661
1658 1662 if self._docket is not None:
1659 1663 self._datafile = self._docket.data_filepath()
1660 1664 self._sidedatafile = self._docket.sidedata_filepath()
1661 1665 elif self.postfix is None:
1662 1666 self._datafile = b'%s.d' % self.radix
1663 1667 else:
1664 1668 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1665 1669
1666 1670 self.nodeconstants = sha1nodeconstants
1667 1671 self.nullid = self.nodeconstants.nullid
1668 1672
1669 1673 # sparse-revlog can't be on without general-delta (issue6056)
1670 1674 if not self.delta_config.general_delta:
1671 1675 self.delta_config.sparse_revlog = False
1672 1676
1673 1677 self._storedeltachains = True
1674 1678
1675 1679 devel_nodemap = (
1676 1680 self._nodemap_file
1677 1681 and force_nodemap
1678 1682 and parse_index_v1_nodemap is not None
1679 1683 )
1680 1684
1681 1685 use_rust_index = False
1682 1686 if rustrevlog is not None:
1683 1687 if self._nodemap_file is not None:
1684 1688 use_rust_index = True
1685 1689 else:
1686 1690 use_rust_index = self.opener.options.get(b'rust.index')
1687 1691
1688 1692 self._parse_index = parse_index_v1
1689 1693 if self._format_version == REVLOGV0:
1690 1694 self._parse_index = revlogv0.parse_index_v0
1691 1695 elif self._format_version == REVLOGV2:
1692 1696 self._parse_index = parse_index_v2
1693 1697 elif self._format_version == CHANGELOGV2:
1694 1698 self._parse_index = parse_index_cl_v2
1695 1699 elif devel_nodemap:
1696 1700 self._parse_index = parse_index_v1_nodemap
1697 1701 elif use_rust_index:
1698 1702 self._parse_index = parse_index_v1_mixed
1699 1703 try:
1700 1704 d = self._parse_index(index_data, self._inline)
1701 1705 index, chunkcache = d
1702 1706 use_nodemap = (
1703 1707 not self._inline
1704 1708 and self._nodemap_file is not None
1705 1709 and hasattr(index, 'update_nodemap_data')
1706 1710 )
1707 1711 if use_nodemap:
1708 1712 nodemap_data = nodemaputil.persisted_data(self)
1709 1713 if nodemap_data is not None:
1710 1714 docket = nodemap_data[0]
1711 1715 if (
1712 1716 len(d[0]) > docket.tip_rev
1713 1717 and d[0][docket.tip_rev][7] == docket.tip_node
1714 1718 ):
1715 1719 # no changelog tampering
1716 1720 self._nodemap_docket = docket
1717 1721 index.update_nodemap_data(*nodemap_data)
1718 1722 except (ValueError, IndexError):
1719 1723 raise error.RevlogError(
1720 1724 _(b"index %s is corrupted") % self.display_id
1721 1725 )
1722 1726 self.index = index
1723 1727 # revnum -> (chain-length, sum-delta-length)
1724 1728 self._chaininfocache = util.lrucachedict(500)
1725 1729
1726 1730 return chunkcache
1727 1731
1728 1732 def _load_inner(self, chunk_cache):
1729 1733 if self._docket is None:
1730 1734 default_compression_header = None
1731 1735 else:
1732 1736 default_compression_header = self._docket.default_compression_header
1733 1737
1734 1738 self._inner = _InnerRevlog(
1735 1739 opener=self.opener,
1736 1740 index=self.index,
1737 1741 index_file=self._indexfile,
1738 1742 data_file=self._datafile,
1739 1743 sidedata_file=self._sidedatafile,
1740 1744 inline=self._inline,
1741 1745 data_config=self.data_config,
1742 1746 delta_config=self.delta_config,
1743 1747 feature_config=self.feature_config,
1744 1748 chunk_cache=chunk_cache,
1745 1749 default_compression_header=default_compression_header,
1746 1750 )
1747 1751
1748 1752 def get_revlog(self):
1749 1753 """simple function to mirror API of other not-really-revlog API"""
1750 1754 return self
1751 1755
1752 1756 @util.propertycache
1753 1757 def revlog_kind(self):
1754 1758 return self.target[0]
1755 1759
1756 1760 @util.propertycache
1757 1761 def display_id(self):
1758 1762 """The public facing "ID" of the revlog that we use in message"""
1759 1763 if self.revlog_kind == KIND_FILELOG:
1760 1764 # Reference the file without the "data/" prefix, so it is familiar
1761 1765 # to the user.
1762 1766 return self.target[1]
1763 1767 else:
1764 1768 return self.radix
1765 1769
1766 1770 def _datafp(self, mode=b'r'):
1767 1771 """file object for the revlog's data file"""
1768 1772 return self.opener(self._datafile, mode=mode)
1769 1773
1770 1774 def tiprev(self):
1771 1775 return len(self.index) - 1
1772 1776
1773 1777 def tip(self):
1774 1778 return self.node(self.tiprev())
1775 1779
1776 1780 def __contains__(self, rev):
1777 1781 return 0 <= rev < len(self)
1778 1782
1779 1783 def __len__(self):
1780 1784 return len(self.index)
1781 1785
1782 1786 def __iter__(self):
1783 1787 return iter(range(len(self)))
1784 1788
1785 1789 def revs(self, start=0, stop=None):
1786 1790 """iterate over all rev in this revlog (from start to stop)"""
1787 1791 return storageutil.iterrevs(len(self), start=start, stop=stop)
1788 1792
1789 1793 def hasnode(self, node):
1790 1794 try:
1791 1795 self.rev(node)
1792 1796 return True
1793 1797 except KeyError:
1794 1798 return False
1795 1799
1796 1800 def _candelta(self, baserev, rev):
1797 1801 """whether two revisions (baserev, rev) can be delta-ed or not"""
1798 1802 # Disable delta if either rev requires a content-changing flag
1799 1803 # processor (ex. LFS). This is because such flag processor can alter
1800 1804 # the rawtext content that the delta will be based on, and two clients
1801 1805 # could have a same revlog node with different flags (i.e. different
1802 1806 # rawtext contents) and the delta could be incompatible.
1803 1807 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1804 1808 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1805 1809 ):
1806 1810 return False
1807 1811 return True
1808 1812
1809 1813 def update_caches(self, transaction):
1810 1814 """update on disk cache
1811 1815
1812 1816 If a transaction is passed, the update may be delayed to transaction
1813 1817 commit."""
1814 1818 if self._nodemap_file is not None:
1815 1819 if transaction is None:
1816 1820 nodemaputil.update_persistent_nodemap(self)
1817 1821 else:
1818 1822 nodemaputil.setup_persistent_nodemap(transaction, self)
1819 1823
1820 1824 def clearcaches(self):
1821 1825 """Clear in-memory caches"""
1822 1826 self._chainbasecache.clear()
1823 1827 self._inner.clear_cache()
1824 1828 self._pcache = {}
1825 1829 self._nodemap_docket = None
1826 1830 self.index.clearcaches()
1827 1831 # The python code is the one responsible for validating the docket, we
1828 1832 # end up having to refresh it here.
1829 1833 use_nodemap = (
1830 1834 not self._inline
1831 1835 and self._nodemap_file is not None
1832 1836 and hasattr(self.index, 'update_nodemap_data')
1833 1837 )
1834 1838 if use_nodemap:
1835 1839 nodemap_data = nodemaputil.persisted_data(self)
1836 1840 if nodemap_data is not None:
1837 1841 self._nodemap_docket = nodemap_data[0]
1838 1842 self.index.update_nodemap_data(*nodemap_data)
1839 1843
1840 1844 def rev(self, node):
1841 1845 """return the revision number associated with a <nodeid>"""
1842 1846 try:
1843 1847 return self.index.rev(node)
1844 1848 except TypeError:
1845 1849 raise
1846 1850 except error.RevlogError:
1847 1851 # parsers.c radix tree lookup failed
1848 1852 if (
1849 1853 node == self.nodeconstants.wdirid
1850 1854 or node in self.nodeconstants.wdirfilenodeids
1851 1855 ):
1852 1856 raise error.WdirUnsupported
1853 1857 raise error.LookupError(node, self.display_id, _(b'no node'))
1854 1858
1855 1859 # Accessors for index entries.
1856 1860
1857 1861 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1858 1862 # are flags.
1859 1863 def start(self, rev):
1860 1864 return int(self.index[rev][0] >> 16)
1861 1865
1862 1866 def sidedata_cut_off(self, rev):
1863 1867 sd_cut_off = self.index[rev][8]
1864 1868 if sd_cut_off != 0:
1865 1869 return sd_cut_off
1866 1870 # This is some annoying dance, because entries without sidedata
1867 1871 # currently use 0 as their ofsset. (instead of previous-offset +
1868 1872 # previous-size)
1869 1873 #
1870 1874 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1871 1875 # In the meantime, we need this.
1872 1876 while 0 <= rev:
1873 1877 e = self.index[rev]
1874 1878 if e[9] != 0:
1875 1879 return e[8] + e[9]
1876 1880 rev -= 1
1877 1881 return 0
1878 1882
1879 1883 def flags(self, rev):
1880 1884 return self.index[rev][0] & 0xFFFF
1881 1885
1882 1886 def length(self, rev):
1883 1887 return self.index[rev][1]
1884 1888
1885 1889 def sidedata_length(self, rev):
1886 1890 if not self.feature_config.has_side_data:
1887 1891 return 0
1888 1892 return self.index[rev][9]
1889 1893
1890 1894 def rawsize(self, rev):
1891 1895 """return the length of the uncompressed text for a given revision"""
1892 1896 l = self.index[rev][2]
1893 1897 if l >= 0:
1894 1898 return l
1895 1899
1896 1900 t = self.rawdata(rev)
1897 1901 return len(t)
1898 1902
1899 1903 def size(self, rev):
1900 1904 """length of non-raw text (processed by a "read" flag processor)"""
1901 1905 # fast path: if no "read" flag processor could change the content,
1902 1906 # size is rawsize. note: ELLIPSIS is known to not change the content.
1903 1907 flags = self.flags(rev)
1904 1908 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1905 1909 return self.rawsize(rev)
1906 1910
1907 1911 return len(self.revision(rev))
1908 1912
1909 1913 def fast_rank(self, rev):
1910 1914 """Return the rank of a revision if already known, or None otherwise.
1911 1915
1912 1916 The rank of a revision is the size of the sub-graph it defines as a
1913 1917 head. Equivalently, the rank of a revision `r` is the size of the set
1914 1918 `ancestors(r)`, `r` included.
1915 1919
1916 1920 This method returns the rank retrieved from the revlog in constant
1917 1921 time. It makes no attempt at computing unknown values for versions of
1918 1922 the revlog which do not persist the rank.
1919 1923 """
1920 1924 rank = self.index[rev][ENTRY_RANK]
1921 1925 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1922 1926 return None
1923 1927 if rev == nullrev:
1924 1928 return 0 # convention
1925 1929 return rank
1926 1930
1927 1931 def chainbase(self, rev):
1928 1932 base = self._chainbasecache.get(rev)
1929 1933 if base is not None:
1930 1934 return base
1931 1935
1932 1936 index = self.index
1933 1937 iterrev = rev
1934 1938 base = index[iterrev][3]
1935 1939 while base != iterrev:
1936 1940 iterrev = base
1937 1941 base = index[iterrev][3]
1938 1942
1939 1943 self._chainbasecache[rev] = base
1940 1944 return base
1941 1945
1942 1946 def linkrev(self, rev):
1943 1947 return self.index[rev][4]
1944 1948
1945 1949 def parentrevs(self, rev):
1946 1950 try:
1947 1951 entry = self.index[rev]
1948 1952 except IndexError:
1949 1953 if rev == wdirrev:
1950 1954 raise error.WdirUnsupported
1951 1955 raise
1952 1956
1953 1957 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1954 1958 return entry[6], entry[5]
1955 1959 else:
1956 1960 return entry[5], entry[6]
1957 1961
1958 1962 # fast parentrevs(rev) where rev isn't filtered
1959 1963 _uncheckedparentrevs = parentrevs
1960 1964
1961 1965 def node(self, rev):
1962 1966 try:
1963 1967 return self.index[rev][7]
1964 1968 except IndexError:
1965 1969 if rev == wdirrev:
1966 1970 raise error.WdirUnsupported
1967 1971 raise
1968 1972
1969 1973 # Derived from index values.
1970 1974
1971 1975 def end(self, rev):
1972 1976 return self.start(rev) + self.length(rev)
1973 1977
1974 1978 def parents(self, node):
1975 1979 i = self.index
1976 1980 d = i[self.rev(node)]
1977 1981 # inline node() to avoid function call overhead
1978 1982 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1979 1983 return i[d[6]][7], i[d[5]][7]
1980 1984 else:
1981 1985 return i[d[5]][7], i[d[6]][7]
1982 1986
1983 1987 def chainlen(self, rev):
1984 1988 return self._chaininfo(rev)[0]
1985 1989
1986 1990 def _chaininfo(self, rev):
1987 1991 chaininfocache = self._chaininfocache
1988 1992 if rev in chaininfocache:
1989 1993 return chaininfocache[rev]
1990 1994 index = self.index
1991 1995 generaldelta = self.delta_config.general_delta
1992 1996 iterrev = rev
1993 1997 e = index[iterrev]
1994 1998 clen = 0
1995 1999 compresseddeltalen = 0
1996 2000 while iterrev != e[3]:
1997 2001 clen += 1
1998 2002 compresseddeltalen += e[1]
1999 2003 if generaldelta:
2000 2004 iterrev = e[3]
2001 2005 else:
2002 2006 iterrev -= 1
2003 2007 if iterrev in chaininfocache:
2004 2008 t = chaininfocache[iterrev]
2005 2009 clen += t[0]
2006 2010 compresseddeltalen += t[1]
2007 2011 break
2008 2012 e = index[iterrev]
2009 2013 else:
2010 2014 # Add text length of base since decompressing that also takes
2011 2015 # work. For cache hits the length is already included.
2012 2016 compresseddeltalen += e[1]
2013 2017 r = (clen, compresseddeltalen)
2014 2018 chaininfocache[rev] = r
2015 2019 return r
2016 2020
2017 2021 def _deltachain(self, rev, stoprev=None):
2018 2022 return self._inner._deltachain(rev, stoprev=stoprev)
2019 2023
2020 2024 def ancestors(self, revs, stoprev=0, inclusive=False):
2021 2025 """Generate the ancestors of 'revs' in reverse revision order.
2022 2026 Does not generate revs lower than stoprev.
2023 2027
2024 2028 See the documentation for ancestor.lazyancestors for more details."""
2025 2029
2026 2030 # first, make sure start revisions aren't filtered
2027 2031 revs = list(revs)
2028 2032 checkrev = self.node
2029 2033 for r in revs:
2030 2034 checkrev(r)
2031 2035 # and we're sure ancestors aren't filtered as well
2032 2036
2033 2037 if rustancestor is not None and self.index.rust_ext_compat:
2034 2038 lazyancestors = rustancestor.LazyAncestors
2035 2039 arg = self.index
2036 2040 else:
2037 2041 lazyancestors = ancestor.lazyancestors
2038 2042 arg = self._uncheckedparentrevs
2039 2043 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2040 2044
2041 2045 def descendants(self, revs):
2042 2046 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2043 2047
2044 2048 def findcommonmissing(self, common=None, heads=None):
2045 2049 """Return a tuple of the ancestors of common and the ancestors of heads
2046 2050 that are not ancestors of common. In revset terminology, we return the
2047 2051 tuple:
2048 2052
2049 2053 ::common, (::heads) - (::common)
2050 2054
2051 2055 The list is sorted by revision number, meaning it is
2052 2056 topologically sorted.
2053 2057
2054 2058 'heads' and 'common' are both lists of node IDs. If heads is
2055 2059 not supplied, uses all of the revlog's heads. If common is not
2056 2060 supplied, uses nullid."""
2057 2061 if common is None:
2058 2062 common = [self.nullid]
2059 2063 if heads is None:
2060 2064 heads = self.heads()
2061 2065
2062 2066 common = [self.rev(n) for n in common]
2063 2067 heads = [self.rev(n) for n in heads]
2064 2068
2065 2069 # we want the ancestors, but inclusive
2066 2070 class lazyset:
2067 2071 def __init__(self, lazyvalues):
2068 2072 self.addedvalues = set()
2069 2073 self.lazyvalues = lazyvalues
2070 2074
2071 2075 def __contains__(self, value):
2072 2076 return value in self.addedvalues or value in self.lazyvalues
2073 2077
2074 2078 def __iter__(self):
2075 2079 added = self.addedvalues
2076 2080 for r in added:
2077 2081 yield r
2078 2082 for r in self.lazyvalues:
2079 2083 if not r in added:
2080 2084 yield r
2081 2085
2082 2086 def add(self, value):
2083 2087 self.addedvalues.add(value)
2084 2088
2085 2089 def update(self, values):
2086 2090 self.addedvalues.update(values)
2087 2091
2088 2092 has = lazyset(self.ancestors(common))
2089 2093 has.add(nullrev)
2090 2094 has.update(common)
2091 2095
2092 2096 # take all ancestors from heads that aren't in has
2093 2097 missing = set()
2094 2098 visit = collections.deque(r for r in heads if r not in has)
2095 2099 while visit:
2096 2100 r = visit.popleft()
2097 2101 if r in missing:
2098 2102 continue
2099 2103 else:
2100 2104 missing.add(r)
2101 2105 for p in self.parentrevs(r):
2102 2106 if p not in has:
2103 2107 visit.append(p)
2104 2108 missing = list(missing)
2105 2109 missing.sort()
2106 2110 return has, [self.node(miss) for miss in missing]
2107 2111
2108 2112 def incrementalmissingrevs(self, common=None):
2109 2113 """Return an object that can be used to incrementally compute the
2110 2114 revision numbers of the ancestors of arbitrary sets that are not
2111 2115 ancestors of common. This is an ancestor.incrementalmissingancestors
2112 2116 object.
2113 2117
2114 2118 'common' is a list of revision numbers. If common is not supplied, uses
2115 2119 nullrev.
2116 2120 """
2117 2121 if common is None:
2118 2122 common = [nullrev]
2119 2123
2120 2124 if rustancestor is not None and self.index.rust_ext_compat:
2121 2125 return rustancestor.MissingAncestors(self.index, common)
2122 2126 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2123 2127
2124 2128 def findmissingrevs(self, common=None, heads=None):
2125 2129 """Return the revision numbers of the ancestors of heads that
2126 2130 are not ancestors of common.
2127 2131
2128 2132 More specifically, return a list of revision numbers corresponding to
2129 2133 nodes N such that every N satisfies the following constraints:
2130 2134
2131 2135 1. N is an ancestor of some node in 'heads'
2132 2136 2. N is not an ancestor of any node in 'common'
2133 2137
2134 2138 The list is sorted by revision number, meaning it is
2135 2139 topologically sorted.
2136 2140
2137 2141 'heads' and 'common' are both lists of revision numbers. If heads is
2138 2142 not supplied, uses all of the revlog's heads. If common is not
2139 2143 supplied, uses nullid."""
2140 2144 if common is None:
2141 2145 common = [nullrev]
2142 2146 if heads is None:
2143 2147 heads = self.headrevs()
2144 2148
2145 2149 inc = self.incrementalmissingrevs(common=common)
2146 2150 return inc.missingancestors(heads)
2147 2151
2148 2152 def findmissing(self, common=None, heads=None):
2149 2153 """Return the ancestors of heads that are not ancestors of common.
2150 2154
2151 2155 More specifically, return a list of nodes N such that every N
2152 2156 satisfies the following constraints:
2153 2157
2154 2158 1. N is an ancestor of some node in 'heads'
2155 2159 2. N is not an ancestor of any node in 'common'
2156 2160
2157 2161 The list is sorted by revision number, meaning it is
2158 2162 topologically sorted.
2159 2163
2160 2164 'heads' and 'common' are both lists of node IDs. If heads is
2161 2165 not supplied, uses all of the revlog's heads. If common is not
2162 2166 supplied, uses nullid."""
2163 2167 if common is None:
2164 2168 common = [self.nullid]
2165 2169 if heads is None:
2166 2170 heads = self.heads()
2167 2171
2168 2172 common = [self.rev(n) for n in common]
2169 2173 heads = [self.rev(n) for n in heads]
2170 2174
2171 2175 inc = self.incrementalmissingrevs(common=common)
2172 2176 return [self.node(r) for r in inc.missingancestors(heads)]
2173 2177
2174 2178 def nodesbetween(self, roots=None, heads=None):
2175 2179 """Return a topological path from 'roots' to 'heads'.
2176 2180
2177 2181 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2178 2182 topologically sorted list of all nodes N that satisfy both of
2179 2183 these constraints:
2180 2184
2181 2185 1. N is a descendant of some node in 'roots'
2182 2186 2. N is an ancestor of some node in 'heads'
2183 2187
2184 2188 Every node is considered to be both a descendant and an ancestor
2185 2189 of itself, so every reachable node in 'roots' and 'heads' will be
2186 2190 included in 'nodes'.
2187 2191
2188 2192 'outroots' is the list of reachable nodes in 'roots', i.e., the
2189 2193 subset of 'roots' that is returned in 'nodes'. Likewise,
2190 2194 'outheads' is the subset of 'heads' that is also in 'nodes'.
2191 2195
2192 2196 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2193 2197 unspecified, uses nullid as the only root. If 'heads' is
2194 2198 unspecified, uses list of all of the revlog's heads."""
2195 2199 nonodes = ([], [], [])
2196 2200 if roots is not None:
2197 2201 roots = list(roots)
2198 2202 if not roots:
2199 2203 return nonodes
2200 2204 lowestrev = min([self.rev(n) for n in roots])
2201 2205 else:
2202 2206 roots = [self.nullid] # Everybody's a descendant of nullid
2203 2207 lowestrev = nullrev
2204 2208 if (lowestrev == nullrev) and (heads is None):
2205 2209 # We want _all_ the nodes!
2206 2210 return (
2207 2211 [self.node(r) for r in self],
2208 2212 [self.nullid],
2209 2213 list(self.heads()),
2210 2214 )
2211 2215 if heads is None:
2212 2216 # All nodes are ancestors, so the latest ancestor is the last
2213 2217 # node.
2214 2218 highestrev = len(self) - 1
2215 2219 # Set ancestors to None to signal that every node is an ancestor.
2216 2220 ancestors = None
2217 2221 # Set heads to an empty dictionary for later discovery of heads
2218 2222 heads = {}
2219 2223 else:
2220 2224 heads = list(heads)
2221 2225 if not heads:
2222 2226 return nonodes
2223 2227 ancestors = set()
2224 2228 # Turn heads into a dictionary so we can remove 'fake' heads.
2225 2229 # Also, later we will be using it to filter out the heads we can't
2226 2230 # find from roots.
2227 2231 heads = dict.fromkeys(heads, False)
2228 2232 # Start at the top and keep marking parents until we're done.
2229 2233 nodestotag = set(heads)
2230 2234 # Remember where the top was so we can use it as a limit later.
2231 2235 highestrev = max([self.rev(n) for n in nodestotag])
2232 2236 while nodestotag:
2233 2237 # grab a node to tag
2234 2238 n = nodestotag.pop()
2235 2239 # Never tag nullid
2236 2240 if n == self.nullid:
2237 2241 continue
2238 2242 # A node's revision number represents its place in a
2239 2243 # topologically sorted list of nodes.
2240 2244 r = self.rev(n)
2241 2245 if r >= lowestrev:
2242 2246 if n not in ancestors:
2243 2247 # If we are possibly a descendant of one of the roots
2244 2248 # and we haven't already been marked as an ancestor
2245 2249 ancestors.add(n) # Mark as ancestor
2246 2250 # Add non-nullid parents to list of nodes to tag.
2247 2251 nodestotag.update(
2248 2252 [p for p in self.parents(n) if p != self.nullid]
2249 2253 )
2250 2254 elif n in heads: # We've seen it before, is it a fake head?
2251 2255 # So it is, real heads should not be the ancestors of
2252 2256 # any other heads.
2253 2257 heads.pop(n)
2254 2258 if not ancestors:
2255 2259 return nonodes
2256 2260 # Now that we have our set of ancestors, we want to remove any
2257 2261 # roots that are not ancestors.
2258 2262
2259 2263 # If one of the roots was nullid, everything is included anyway.
2260 2264 if lowestrev > nullrev:
2261 2265 # But, since we weren't, let's recompute the lowest rev to not
2262 2266 # include roots that aren't ancestors.
2263 2267
2264 2268 # Filter out roots that aren't ancestors of heads
2265 2269 roots = [root for root in roots if root in ancestors]
2266 2270 # Recompute the lowest revision
2267 2271 if roots:
2268 2272 lowestrev = min([self.rev(root) for root in roots])
2269 2273 else:
2270 2274 # No more roots? Return empty list
2271 2275 return nonodes
2272 2276 else:
2273 2277 # We are descending from nullid, and don't need to care about
2274 2278 # any other roots.
2275 2279 lowestrev = nullrev
2276 2280 roots = [self.nullid]
2277 2281 # Transform our roots list into a set.
2278 2282 descendants = set(roots)
2279 2283 # Also, keep the original roots so we can filter out roots that aren't
2280 2284 # 'real' roots (i.e. are descended from other roots).
2281 2285 roots = descendants.copy()
2282 2286 # Our topologically sorted list of output nodes.
2283 2287 orderedout = []
2284 2288 # Don't start at nullid since we don't want nullid in our output list,
2285 2289 # and if nullid shows up in descendants, empty parents will look like
2286 2290 # they're descendants.
2287 2291 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2288 2292 n = self.node(r)
2289 2293 isdescendant = False
2290 2294 if lowestrev == nullrev: # Everybody is a descendant of nullid
2291 2295 isdescendant = True
2292 2296 elif n in descendants:
2293 2297 # n is already a descendant
2294 2298 isdescendant = True
2295 2299 # This check only needs to be done here because all the roots
2296 2300 # will start being marked is descendants before the loop.
2297 2301 if n in roots:
2298 2302 # If n was a root, check if it's a 'real' root.
2299 2303 p = tuple(self.parents(n))
2300 2304 # If any of its parents are descendants, it's not a root.
2301 2305 if (p[0] in descendants) or (p[1] in descendants):
2302 2306 roots.remove(n)
2303 2307 else:
2304 2308 p = tuple(self.parents(n))
2305 2309 # A node is a descendant if either of its parents are
2306 2310 # descendants. (We seeded the dependents list with the roots
2307 2311 # up there, remember?)
2308 2312 if (p[0] in descendants) or (p[1] in descendants):
2309 2313 descendants.add(n)
2310 2314 isdescendant = True
2311 2315 if isdescendant and ((ancestors is None) or (n in ancestors)):
2312 2316 # Only include nodes that are both descendants and ancestors.
2313 2317 orderedout.append(n)
2314 2318 if (ancestors is not None) and (n in heads):
2315 2319 # We're trying to figure out which heads are reachable
2316 2320 # from roots.
2317 2321 # Mark this head as having been reached
2318 2322 heads[n] = True
2319 2323 elif ancestors is None:
2320 2324 # Otherwise, we're trying to discover the heads.
2321 2325 # Assume this is a head because if it isn't, the next step
2322 2326 # will eventually remove it.
2323 2327 heads[n] = True
2324 2328 # But, obviously its parents aren't.
2325 2329 for p in self.parents(n):
2326 2330 heads.pop(p, None)
2327 2331 heads = [head for head, flag in heads.items() if flag]
2328 2332 roots = list(roots)
2329 2333 assert orderedout
2330 2334 assert roots
2331 2335 assert heads
2332 2336 return (orderedout, roots, heads)
2333 2337
2334 2338 def headrevs(self, revs=None):
2335 2339 if revs is None:
2336 2340 try:
2337 2341 return self.index.headrevs()
2338 2342 except AttributeError:
2339 2343 return self._headrevs()
2340 2344 if rustdagop is not None and self.index.rust_ext_compat:
2341 2345 return rustdagop.headrevs(self.index, revs)
2342 2346 return dagop.headrevs(revs, self._uncheckedparentrevs)
2343 2347
2344 2348 def computephases(self, roots):
2345 2349 return self.index.computephasesmapsets(roots)
2346 2350
2347 2351 def _headrevs(self):
2348 2352 count = len(self)
2349 2353 if not count:
2350 2354 return [nullrev]
2351 2355 # we won't iter over filtered rev so nobody is a head at start
2352 2356 ishead = [0] * (count + 1)
2353 2357 index = self.index
2354 2358 for r in self:
2355 2359 ishead[r] = 1 # I may be an head
2356 2360 e = index[r]
2357 2361 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2358 2362 return [r for r, val in enumerate(ishead) if val]
2359 2363
2360 2364 def heads(self, start=None, stop=None):
2361 2365 """return the list of all nodes that have no children
2362 2366
2363 2367 if start is specified, only heads that are descendants of
2364 2368 start will be returned
2365 2369 if stop is specified, it will consider all the revs from stop
2366 2370 as if they had no children
2367 2371 """
2368 2372 if start is None and stop is None:
2369 2373 if not len(self):
2370 2374 return [self.nullid]
2371 2375 return [self.node(r) for r in self.headrevs()]
2372 2376
2373 2377 if start is None:
2374 2378 start = nullrev
2375 2379 else:
2376 2380 start = self.rev(start)
2377 2381
2378 2382 stoprevs = {self.rev(n) for n in stop or []}
2379 2383
2380 2384 revs = dagop.headrevssubset(
2381 2385 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2382 2386 )
2383 2387
2384 2388 return [self.node(rev) for rev in revs]
2385 2389
2386 2390 def children(self, node):
2387 2391 """find the children of a given node"""
2388 2392 c = []
2389 2393 p = self.rev(node)
2390 2394 for r in self.revs(start=p + 1):
2391 2395 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2392 2396 if prevs:
2393 2397 for pr in prevs:
2394 2398 if pr == p:
2395 2399 c.append(self.node(r))
2396 2400 elif p == nullrev:
2397 2401 c.append(self.node(r))
2398 2402 return c
2399 2403
2400 2404 def commonancestorsheads(self, a, b):
2401 2405 """calculate all the heads of the common ancestors of nodes a and b"""
2402 2406 a, b = self.rev(a), self.rev(b)
2403 2407 ancs = self._commonancestorsheads(a, b)
2404 2408 return pycompat.maplist(self.node, ancs)
2405 2409
2406 2410 def _commonancestorsheads(self, *revs):
2407 2411 """calculate all the heads of the common ancestors of revs"""
2408 2412 try:
2409 2413 ancs = self.index.commonancestorsheads(*revs)
2410 2414 except (AttributeError, OverflowError): # C implementation failed
2411 2415 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2412 2416 return ancs
2413 2417
2414 2418 def isancestor(self, a, b):
2415 2419 """return True if node a is an ancestor of node b
2416 2420
2417 2421 A revision is considered an ancestor of itself."""
2418 2422 a, b = self.rev(a), self.rev(b)
2419 2423 return self.isancestorrev(a, b)
2420 2424
2421 2425 def isancestorrev(self, a, b):
2422 2426 """return True if revision a is an ancestor of revision b
2423 2427
2424 2428 A revision is considered an ancestor of itself.
2425 2429
2426 2430 The implementation of this is trivial but the use of
2427 2431 reachableroots is not."""
2428 2432 if a == nullrev:
2429 2433 return True
2430 2434 elif a == b:
2431 2435 return True
2432 2436 elif a > b:
2433 2437 return False
2434 2438 return bool(self.reachableroots(a, [b], [a], includepath=False))
2435 2439
2436 2440 def reachableroots(self, minroot, heads, roots, includepath=False):
2437 2441 """return (heads(::(<roots> and <roots>::<heads>)))
2438 2442
2439 2443 If includepath is True, return (<roots>::<heads>)."""
2440 2444 try:
2441 2445 return self.index.reachableroots2(
2442 2446 minroot, heads, roots, includepath
2443 2447 )
2444 2448 except AttributeError:
2445 2449 return dagop._reachablerootspure(
2446 2450 self.parentrevs, minroot, roots, heads, includepath
2447 2451 )
2448 2452
2449 2453 def ancestor(self, a, b):
2450 2454 """calculate the "best" common ancestor of nodes a and b"""
2451 2455
2452 2456 a, b = self.rev(a), self.rev(b)
2453 2457 try:
2454 2458 ancs = self.index.ancestors(a, b)
2455 2459 except (AttributeError, OverflowError):
2456 2460 ancs = ancestor.ancestors(self.parentrevs, a, b)
2457 2461 if ancs:
2458 2462 # choose a consistent winner when there's a tie
2459 2463 return min(map(self.node, ancs))
2460 2464 return self.nullid
2461 2465
2462 2466 def _match(self, id):
2463 2467 if isinstance(id, int):
2464 2468 # rev
2465 2469 return self.node(id)
2466 2470 if len(id) == self.nodeconstants.nodelen:
2467 2471 # possibly a binary node
2468 2472 # odds of a binary node being all hex in ASCII are 1 in 10**25
2469 2473 try:
2470 2474 node = id
2471 2475 self.rev(node) # quick search the index
2472 2476 return node
2473 2477 except error.LookupError:
2474 2478 pass # may be partial hex id
2475 2479 try:
2476 2480 # str(rev)
2477 2481 rev = int(id)
2478 2482 if b"%d" % rev != id:
2479 2483 raise ValueError
2480 2484 if rev < 0:
2481 2485 rev = len(self) + rev
2482 2486 if rev < 0 or rev >= len(self):
2483 2487 raise ValueError
2484 2488 return self.node(rev)
2485 2489 except (ValueError, OverflowError):
2486 2490 pass
2487 2491 if len(id) == 2 * self.nodeconstants.nodelen:
2488 2492 try:
2489 2493 # a full hex nodeid?
2490 2494 node = bin(id)
2491 2495 self.rev(node)
2492 2496 return node
2493 2497 except (binascii.Error, error.LookupError):
2494 2498 pass
2495 2499
2496 2500 def _partialmatch(self, id):
2497 2501 # we don't care wdirfilenodeids as they should be always full hash
2498 2502 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2499 2503 ambiguous = False
2500 2504 try:
2501 2505 partial = self.index.partialmatch(id)
2502 2506 if partial and self.hasnode(partial):
2503 2507 if maybewdir:
2504 2508 # single 'ff...' match in radix tree, ambiguous with wdir
2505 2509 ambiguous = True
2506 2510 else:
2507 2511 return partial
2508 2512 elif maybewdir:
2509 2513 # no 'ff...' match in radix tree, wdir identified
2510 2514 raise error.WdirUnsupported
2511 2515 else:
2512 2516 return None
2513 2517 except error.RevlogError:
2514 2518 # parsers.c radix tree lookup gave multiple matches
2515 2519 # fast path: for unfiltered changelog, radix tree is accurate
2516 2520 if not getattr(self, 'filteredrevs', None):
2517 2521 ambiguous = True
2518 2522 # fall through to slow path that filters hidden revisions
2519 2523 except (AttributeError, ValueError):
2520 2524 # we are pure python, or key is not hex
2521 2525 pass
2522 2526 if ambiguous:
2523 2527 raise error.AmbiguousPrefixLookupError(
2524 2528 id, self.display_id, _(b'ambiguous identifier')
2525 2529 )
2526 2530
2527 2531 if id in self._pcache:
2528 2532 return self._pcache[id]
2529 2533
2530 2534 if len(id) <= 40:
2531 2535 # hex(node)[:...]
2532 2536 l = len(id) // 2 * 2 # grab an even number of digits
2533 2537 try:
2534 2538 # we're dropping the last digit, so let's check that it's hex,
2535 2539 # to avoid the expensive computation below if it's not
2536 2540 if len(id) % 2 > 0:
2537 2541 if not (id[-1] in hexdigits):
2538 2542 return None
2539 2543 prefix = bin(id[:l])
2540 2544 except binascii.Error:
2541 2545 pass
2542 2546 else:
2543 2547 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2544 2548 nl = [
2545 2549 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2546 2550 ]
2547 2551 if self.nodeconstants.nullhex.startswith(id):
2548 2552 nl.append(self.nullid)
2549 2553 if len(nl) > 0:
2550 2554 if len(nl) == 1 and not maybewdir:
2551 2555 self._pcache[id] = nl[0]
2552 2556 return nl[0]
2553 2557 raise error.AmbiguousPrefixLookupError(
2554 2558 id, self.display_id, _(b'ambiguous identifier')
2555 2559 )
2556 2560 if maybewdir:
2557 2561 raise error.WdirUnsupported
2558 2562 return None
2559 2563
2560 2564 def lookup(self, id):
2561 2565 """locate a node based on:
2562 2566 - revision number or str(revision number)
2563 2567 - nodeid or subset of hex nodeid
2564 2568 """
2565 2569 n = self._match(id)
2566 2570 if n is not None:
2567 2571 return n
2568 2572 n = self._partialmatch(id)
2569 2573 if n:
2570 2574 return n
2571 2575
2572 2576 raise error.LookupError(id, self.display_id, _(b'no match found'))
2573 2577
2574 2578 def shortest(self, node, minlength=1):
2575 2579 """Find the shortest unambiguous prefix that matches node."""
2576 2580
2577 2581 def isvalid(prefix):
2578 2582 try:
2579 2583 matchednode = self._partialmatch(prefix)
2580 2584 except error.AmbiguousPrefixLookupError:
2581 2585 return False
2582 2586 except error.WdirUnsupported:
2583 2587 # single 'ff...' match
2584 2588 return True
2585 2589 if matchednode is None:
2586 2590 raise error.LookupError(node, self.display_id, _(b'no node'))
2587 2591 return True
2588 2592
2589 2593 def maybewdir(prefix):
2590 2594 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2591 2595
2592 2596 hexnode = hex(node)
2593 2597
2594 2598 def disambiguate(hexnode, minlength):
2595 2599 """Disambiguate against wdirid."""
2596 2600 for length in range(minlength, len(hexnode) + 1):
2597 2601 prefix = hexnode[:length]
2598 2602 if not maybewdir(prefix):
2599 2603 return prefix
2600 2604
2601 2605 if not getattr(self, 'filteredrevs', None):
2602 2606 try:
2603 2607 length = max(self.index.shortest(node), minlength)
2604 2608 return disambiguate(hexnode, length)
2605 2609 except error.RevlogError:
2606 2610 if node != self.nodeconstants.wdirid:
2607 2611 raise error.LookupError(
2608 2612 node, self.display_id, _(b'no node')
2609 2613 )
2610 2614 except AttributeError:
2611 2615 # Fall through to pure code
2612 2616 pass
2613 2617
2614 2618 if node == self.nodeconstants.wdirid:
2615 2619 for length in range(minlength, len(hexnode) + 1):
2616 2620 prefix = hexnode[:length]
2617 2621 if isvalid(prefix):
2618 2622 return prefix
2619 2623
2620 2624 for length in range(minlength, len(hexnode) + 1):
2621 2625 prefix = hexnode[:length]
2622 2626 if isvalid(prefix):
2623 2627 return disambiguate(hexnode, length)
2624 2628
2625 2629 def cmp(self, node, text):
2626 2630 """compare text with a given file revision
2627 2631
2628 2632 returns True if text is different than what is stored.
2629 2633 """
2630 2634 p1, p2 = self.parents(node)
2631 2635 return storageutil.hashrevisionsha1(text, p1, p2) != node
2632 2636
2633 2637 def deltaparent(self, rev):
2634 2638 """return deltaparent of the given revision"""
2635 2639 base = self.index[rev][3]
2636 2640 if base == rev:
2637 2641 return nullrev
2638 2642 elif self.delta_config.general_delta:
2639 2643 return base
2640 2644 else:
2641 2645 return rev - 1
2642 2646
2643 2647 def issnapshot(self, rev):
2644 2648 """tells whether rev is a snapshot"""
2645 2649 ret = self._inner.issnapshot(rev)
2646 2650 self.issnapshot = self._inner.issnapshot
2647 2651 return ret
2648 2652
2649 2653 def snapshotdepth(self, rev):
2650 2654 """number of snapshot in the chain before this one"""
2651 2655 if not self.issnapshot(rev):
2652 2656 raise error.ProgrammingError(b'revision %d not a snapshot')
2653 2657 return len(self._inner._deltachain(rev)[0]) - 1
2654 2658
2655 2659 def revdiff(self, rev1, rev2):
2656 2660 """return or calculate a delta between two revisions
2657 2661
2658 2662 The delta calculated is in binary form and is intended to be written to
2659 2663 revlog data directly. So this function needs raw revision data.
2660 2664 """
2661 2665 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2662 2666 return bytes(self._inner._chunk(rev2))
2663 2667
2664 2668 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2665 2669
2666 2670 def revision(self, nodeorrev):
2667 2671 """return an uncompressed revision of a given node or revision
2668 2672 number.
2669 2673 """
2670 2674 return self._revisiondata(nodeorrev)
2671 2675
2672 2676 def sidedata(self, nodeorrev):
2673 2677 """a map of extra data related to the changeset but not part of the hash
2674 2678
2675 2679 This function currently return a dictionary. However, more advanced
2676 2680 mapping object will likely be used in the future for a more
2677 2681 efficient/lazy code.
2678 2682 """
2679 2683 # deal with <nodeorrev> argument type
2680 2684 if isinstance(nodeorrev, int):
2681 2685 rev = nodeorrev
2682 2686 else:
2683 2687 rev = self.rev(nodeorrev)
2684 2688 return self._sidedata(rev)
2685 2689
2686 2690 def _rawtext(self, node, rev):
2687 2691 """return the possibly unvalidated rawtext for a revision
2688 2692
2689 2693 returns (rev, rawtext, validated)
2690 2694 """
2691 2695 # Check if we have the entry in cache
2692 2696 # The cache entry looks like (node, rev, rawtext)
2693 2697 if self._inner._revisioncache:
2694 2698 if self._inner._revisioncache[0] == node:
2695 2699 return (rev, self._inner._revisioncache[2], True)
2696 2700
2697 2701 if rev is None:
2698 2702 rev = self.rev(node)
2699 2703
2700 2704 return self._inner.raw_text(node, rev)
2701 2705
2702 2706 def _revisiondata(self, nodeorrev, raw=False):
2703 2707 # deal with <nodeorrev> argument type
2704 2708 if isinstance(nodeorrev, int):
2705 2709 rev = nodeorrev
2706 2710 node = self.node(rev)
2707 2711 else:
2708 2712 node = nodeorrev
2709 2713 rev = None
2710 2714
2711 2715 # fast path the special `nullid` rev
2712 2716 if node == self.nullid:
2713 2717 return b""
2714 2718
2715 2719 # ``rawtext`` is the text as stored inside the revlog. Might be the
2716 2720 # revision or might need to be processed to retrieve the revision.
2717 2721 rev, rawtext, validated = self._rawtext(node, rev)
2718 2722
2719 2723 if raw and validated:
2720 2724 # if we don't want to process the raw text and that raw
2721 2725 # text is cached, we can exit early.
2722 2726 return rawtext
2723 2727 if rev is None:
2724 2728 rev = self.rev(node)
2725 2729 # the revlog's flag for this revision
2726 2730 # (usually alter its state or content)
2727 2731 flags = self.flags(rev)
2728 2732
2729 2733 if validated and flags == REVIDX_DEFAULT_FLAGS:
2730 2734 # no extra flags set, no flag processor runs, text = rawtext
2731 2735 return rawtext
2732 2736
2733 2737 if raw:
2734 2738 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2735 2739 text = rawtext
2736 2740 else:
2737 2741 r = flagutil.processflagsread(self, rawtext, flags)
2738 2742 text, validatehash = r
2739 2743 if validatehash:
2740 2744 self.checkhash(text, node, rev=rev)
2741 2745 if not validated:
2742 2746 self._inner._revisioncache = (node, rev, rawtext)
2743 2747
2744 2748 return text
2745 2749
2746 2750 def _sidedata(self, rev):
2747 2751 """Return the sidedata for a given revision number."""
2748 2752 sidedata_end = None
2749 2753 if self._docket is not None:
2750 2754 sidedata_end = self._docket.sidedata_end
2751 2755 return self._inner.sidedata(rev, sidedata_end)
2752 2756
2753 2757 def rawdata(self, nodeorrev):
2754 2758 """return an uncompressed raw data of a given node or revision number."""
2755 2759 return self._revisiondata(nodeorrev, raw=True)
2756 2760
2757 2761 def hash(self, text, p1, p2):
2758 2762 """Compute a node hash.
2759 2763
2760 2764 Available as a function so that subclasses can replace the hash
2761 2765 as needed.
2762 2766 """
2763 2767 return storageutil.hashrevisionsha1(text, p1, p2)
2764 2768
2765 2769 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2766 2770 """Check node hash integrity.
2767 2771
2768 2772 Available as a function so that subclasses can extend hash mismatch
2769 2773 behaviors as needed.
2770 2774 """
2771 2775 try:
2772 2776 if p1 is None and p2 is None:
2773 2777 p1, p2 = self.parents(node)
2774 2778 if node != self.hash(text, p1, p2):
2775 2779 # Clear the revision cache on hash failure. The revision cache
2776 2780 # only stores the raw revision and clearing the cache does have
2777 2781 # the side-effect that we won't have a cache hit when the raw
2778 2782 # revision data is accessed. But this case should be rare and
2779 2783 # it is extra work to teach the cache about the hash
2780 2784 # verification state.
2781 2785 if (
2782 2786 self._inner._revisioncache
2783 2787 and self._inner._revisioncache[0] == node
2784 2788 ):
2785 2789 self._inner._revisioncache = None
2786 2790
2787 2791 revornode = rev
2788 2792 if revornode is None:
2789 2793 revornode = templatefilters.short(hex(node))
2790 2794 raise error.RevlogError(
2791 2795 _(b"integrity check failed on %s:%s")
2792 2796 % (self.display_id, pycompat.bytestr(revornode))
2793 2797 )
2794 2798 except error.RevlogError:
2795 2799 if self.feature_config.censorable and storageutil.iscensoredtext(
2796 2800 text
2797 2801 ):
2798 2802 raise error.CensoredNodeError(self.display_id, node, text)
2799 2803 raise
2800 2804
2801 2805 @property
2802 2806 def _split_index_file(self):
2803 2807 """the path where to expect the index of an ongoing splitting operation
2804 2808
2805 2809 The file will only exist if a splitting operation is in progress, but
2806 2810 it is always expected at the same location."""
2807 2811 parts = self.radix.split(b'/')
2808 2812 if len(parts) > 1:
2809 2813 # adds a '-s' prefix to the ``data/` or `meta/` base
2810 2814 head = parts[0] + b'-s'
2811 2815 mids = parts[1:-1]
2812 2816 tail = parts[-1] + b'.i'
2813 2817 pieces = [head] + mids + [tail]
2814 2818 return b'/'.join(pieces)
2815 2819 else:
2816 2820 # the revlog is stored at the root of the store (changelog or
2817 2821 # manifest), no risk of collision.
2818 2822 return self.radix + b'.i.s'
2819 2823
2820 2824 def _enforceinlinesize(self, tr, side_write=True):
2821 2825 """Check if the revlog is too big for inline and convert if so.
2822 2826
2823 2827 This should be called after revisions are added to the revlog. If the
2824 2828 revlog has grown too large to be an inline revlog, it will convert it
2825 2829 to use multiple index and data files.
2826 2830 """
2827 2831 tiprev = len(self) - 1
2828 2832 total_size = self.start(tiprev) + self.length(tiprev)
2829 2833 if not self._inline or total_size < _maxinline:
2830 2834 return
2831 2835
2832 2836 if self._docket is not None:
2833 2837 msg = b"inline revlog should not have a docket"
2834 2838 raise error.ProgrammingError(msg)
2835 2839
2836 troffset = tr.findoffset(self._indexfile)
2840 troffset = tr.findoffset(self._inner.canonical_index_file)
2837 2841 if troffset is None:
2838 2842 raise error.RevlogError(
2839 2843 _(b"%s not found in the transaction") % self._indexfile
2840 2844 )
2841 2845 if troffset:
2842 tr.addbackup(self._indexfile, for_offset=True)
2846 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
2843 2847 tr.add(self._datafile, 0)
2844 2848
2845 2849 new_index_file_path = None
2846 2850 if side_write:
2847 2851 old_index_file_path = self._indexfile
2848 2852 new_index_file_path = self._split_index_file
2849 2853 opener = self.opener
2850 2854 weak_self = weakref.ref(self)
2851 2855
2852 2856 # the "split" index replace the real index when the transaction is
2853 2857 # finalized
2854 2858 def finalize_callback(tr):
2855 2859 opener.rename(
2856 2860 new_index_file_path,
2857 2861 old_index_file_path,
2858 2862 checkambig=True,
2859 2863 )
2860 2864 maybe_self = weak_self()
2861 2865 if maybe_self is not None:
2862 2866 maybe_self._indexfile = old_index_file_path
2863 2867 maybe_self._inner.index_file = maybe_self._indexfile
2864 2868
2865 2869 def abort_callback(tr):
2866 2870 maybe_self = weak_self()
2867 2871 if maybe_self is not None:
2868 2872 maybe_self._indexfile = old_index_file_path
2869 2873 maybe_self._inner.inline = True
2870 2874 maybe_self._inner.index_file = old_index_file_path
2871 2875
2872 2876 tr.registertmp(new_index_file_path)
2873 2877 if self.target[1] is not None:
2874 2878 callback_id = b'000-revlog-split-%d-%s' % self.target
2875 2879 else:
2876 2880 callback_id = b'000-revlog-split-%d' % self.target[0]
2877 2881 tr.addfinalize(callback_id, finalize_callback)
2878 2882 tr.addabort(callback_id, abort_callback)
2879 2883
2880 2884 self._format_flags &= ~FLAG_INLINE_DATA
2881 2885 self._inner.split_inline(
2882 2886 tr,
2883 2887 self._format_flags | self._format_version,
2884 2888 new_index_file_path=new_index_file_path,
2885 2889 )
2886 2890
2887 2891 self._inline = False
2888 2892 if new_index_file_path is not None:
2889 2893 self._indexfile = new_index_file_path
2890 2894
2891 2895 nodemaputil.setup_persistent_nodemap(tr, self)
2892 2896
2893 2897 def _nodeduplicatecallback(self, transaction, node):
2894 2898 """called when trying to add a node already stored."""
2895 2899
2896 2900 @contextlib.contextmanager
2897 2901 def reading(self):
2898 2902 with self._inner.reading():
2899 2903 yield
2900 2904
2901 2905 @contextlib.contextmanager
2902 2906 def _writing(self, transaction):
2903 2907 if self._trypending:
2904 2908 msg = b'try to write in a `trypending` revlog: %s'
2905 2909 msg %= self.display_id
2906 2910 raise error.ProgrammingError(msg)
2907 2911 if self._inner.is_writing:
2908 2912 yield
2909 2913 else:
2910 2914 data_end = None
2911 2915 sidedata_end = None
2912 2916 if self._docket is not None:
2913 2917 data_end = self._docket.data_end
2914 2918 sidedata_end = self._docket.sidedata_end
2915 2919 with self._inner.writing(
2916 2920 transaction,
2917 2921 data_end=data_end,
2918 2922 sidedata_end=sidedata_end,
2919 2923 ):
2920 2924 yield
2921 2925 if self._docket is not None:
2922 2926 self._write_docket(transaction)
2923 2927
2924 2928 def _write_docket(self, transaction):
2925 2929 """write the current docket on disk
2926 2930
2927 2931 Exist as a method to help changelog to implement transaction logic
2928 2932
2929 2933 We could also imagine using the same transaction logic for all revlog
2930 2934 since docket are cheap."""
2931 2935 self._docket.write(transaction)
2932 2936
2933 2937 def addrevision(
2934 2938 self,
2935 2939 text,
2936 2940 transaction,
2937 2941 link,
2938 2942 p1,
2939 2943 p2,
2940 2944 cachedelta=None,
2941 2945 node=None,
2942 2946 flags=REVIDX_DEFAULT_FLAGS,
2943 2947 deltacomputer=None,
2944 2948 sidedata=None,
2945 2949 ):
2946 2950 """add a revision to the log
2947 2951
2948 2952 text - the revision data to add
2949 2953 transaction - the transaction object used for rollback
2950 2954 link - the linkrev data to add
2951 2955 p1, p2 - the parent nodeids of the revision
2952 2956 cachedelta - an optional precomputed delta
2953 2957 node - nodeid of revision; typically node is not specified, and it is
2954 2958 computed by default as hash(text, p1, p2), however subclasses might
2955 2959 use different hashing method (and override checkhash() in such case)
2956 2960 flags - the known flags to set on the revision
2957 2961 deltacomputer - an optional deltacomputer instance shared between
2958 2962 multiple calls
2959 2963 """
2960 2964 if link == nullrev:
2961 2965 raise error.RevlogError(
2962 2966 _(b"attempted to add linkrev -1 to %s") % self.display_id
2963 2967 )
2964 2968
2965 2969 if sidedata is None:
2966 2970 sidedata = {}
2967 2971 elif sidedata and not self.feature_config.has_side_data:
2968 2972 raise error.ProgrammingError(
2969 2973 _(b"trying to add sidedata to a revlog who don't support them")
2970 2974 )
2971 2975
2972 2976 if flags:
2973 2977 node = node or self.hash(text, p1, p2)
2974 2978
2975 2979 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2976 2980
2977 2981 # If the flag processor modifies the revision data, ignore any provided
2978 2982 # cachedelta.
2979 2983 if rawtext != text:
2980 2984 cachedelta = None
2981 2985
2982 2986 if len(rawtext) > _maxentrysize:
2983 2987 raise error.RevlogError(
2984 2988 _(
2985 2989 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2986 2990 )
2987 2991 % (self.display_id, len(rawtext))
2988 2992 )
2989 2993
2990 2994 node = node or self.hash(rawtext, p1, p2)
2991 2995 rev = self.index.get_rev(node)
2992 2996 if rev is not None:
2993 2997 return rev
2994 2998
2995 2999 if validatehash:
2996 3000 self.checkhash(rawtext, node, p1=p1, p2=p2)
2997 3001
2998 3002 return self.addrawrevision(
2999 3003 rawtext,
3000 3004 transaction,
3001 3005 link,
3002 3006 p1,
3003 3007 p2,
3004 3008 node,
3005 3009 flags,
3006 3010 cachedelta=cachedelta,
3007 3011 deltacomputer=deltacomputer,
3008 3012 sidedata=sidedata,
3009 3013 )
3010 3014
3011 3015 def addrawrevision(
3012 3016 self,
3013 3017 rawtext,
3014 3018 transaction,
3015 3019 link,
3016 3020 p1,
3017 3021 p2,
3018 3022 node,
3019 3023 flags,
3020 3024 cachedelta=None,
3021 3025 deltacomputer=None,
3022 3026 sidedata=None,
3023 3027 ):
3024 3028 """add a raw revision with known flags, node and parents
3025 3029 useful when reusing a revision not stored in this revlog (ex: received
3026 3030 over wire, or read from an external bundle).
3027 3031 """
3028 3032 with self._writing(transaction):
3029 3033 return self._addrevision(
3030 3034 node,
3031 3035 rawtext,
3032 3036 transaction,
3033 3037 link,
3034 3038 p1,
3035 3039 p2,
3036 3040 flags,
3037 3041 cachedelta,
3038 3042 deltacomputer=deltacomputer,
3039 3043 sidedata=sidedata,
3040 3044 )
3041 3045
3042 3046 def compress(self, data):
3043 3047 return self._inner.compress(data)
3044 3048
3045 3049 def decompress(self, data):
3046 3050 return self._inner.decompress(data)
3047 3051
3048 3052 def _addrevision(
3049 3053 self,
3050 3054 node,
3051 3055 rawtext,
3052 3056 transaction,
3053 3057 link,
3054 3058 p1,
3055 3059 p2,
3056 3060 flags,
3057 3061 cachedelta,
3058 3062 alwayscache=False,
3059 3063 deltacomputer=None,
3060 3064 sidedata=None,
3061 3065 ):
3062 3066 """internal function to add revisions to the log
3063 3067
3064 3068 see addrevision for argument descriptions.
3065 3069
3066 3070 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3067 3071
3068 3072 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3069 3073 be used.
3070 3074
3071 3075 invariants:
3072 3076 - rawtext is optional (can be None); if not set, cachedelta must be set.
3073 3077 if both are set, they must correspond to each other.
3074 3078 """
3075 3079 if node == self.nullid:
3076 3080 raise error.RevlogError(
3077 3081 _(b"%s: attempt to add null revision") % self.display_id
3078 3082 )
3079 3083 if (
3080 3084 node == self.nodeconstants.wdirid
3081 3085 or node in self.nodeconstants.wdirfilenodeids
3082 3086 ):
3083 3087 raise error.RevlogError(
3084 3088 _(b"%s: attempt to add wdir revision") % self.display_id
3085 3089 )
3086 3090 if self._inner._writinghandles is None:
3087 3091 msg = b'adding revision outside `revlog._writing` context'
3088 3092 raise error.ProgrammingError(msg)
3089 3093
3090 3094 btext = [rawtext]
3091 3095
3092 3096 curr = len(self)
3093 3097 prev = curr - 1
3094 3098
3095 3099 offset = self._get_data_offset(prev)
3096 3100
3097 3101 if self._concurrencychecker:
3098 3102 ifh, dfh, sdfh = self._inner._writinghandles
3099 3103 # XXX no checking for the sidedata file
3100 3104 if self._inline:
3101 3105 # offset is "as if" it were in the .d file, so we need to add on
3102 3106 # the size of the entry metadata.
3103 3107 self._concurrencychecker(
3104 3108 ifh, self._indexfile, offset + curr * self.index.entry_size
3105 3109 )
3106 3110 else:
3107 3111 # Entries in the .i are a consistent size.
3108 3112 self._concurrencychecker(
3109 3113 ifh, self._indexfile, curr * self.index.entry_size
3110 3114 )
3111 3115 self._concurrencychecker(dfh, self._datafile, offset)
3112 3116
3113 3117 p1r, p2r = self.rev(p1), self.rev(p2)
3114 3118
3115 3119 # full versions are inserted when the needed deltas
3116 3120 # become comparable to the uncompressed text
3117 3121 if rawtext is None:
3118 3122 # need rawtext size, before changed by flag processors, which is
3119 3123 # the non-raw size. use revlog explicitly to avoid filelog's extra
3120 3124 # logic that might remove metadata size.
3121 3125 textlen = mdiff.patchedsize(
3122 3126 revlog.size(self, cachedelta[0]), cachedelta[1]
3123 3127 )
3124 3128 else:
3125 3129 textlen = len(rawtext)
3126 3130
3127 3131 if deltacomputer is None:
3128 3132 write_debug = None
3129 3133 if self.delta_config.debug_delta:
3130 3134 write_debug = transaction._report
3131 3135 deltacomputer = deltautil.deltacomputer(
3132 3136 self, write_debug=write_debug
3133 3137 )
3134 3138
3135 3139 if cachedelta is not None and len(cachedelta) == 2:
3136 3140 # If the cached delta has no information about how it should be
3137 3141 # reused, add the default reuse instruction according to the
3138 3142 # revlog's configuration.
3139 3143 if (
3140 3144 self.delta_config.general_delta
3141 3145 and self.delta_config.lazy_delta_base
3142 3146 ):
3143 3147 delta_base_reuse = DELTA_BASE_REUSE_TRY
3144 3148 else:
3145 3149 delta_base_reuse = DELTA_BASE_REUSE_NO
3146 3150 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3147 3151
3148 3152 revinfo = revlogutils.revisioninfo(
3149 3153 node,
3150 3154 p1,
3151 3155 p2,
3152 3156 btext,
3153 3157 textlen,
3154 3158 cachedelta,
3155 3159 flags,
3156 3160 )
3157 3161
3158 3162 deltainfo = deltacomputer.finddeltainfo(revinfo)
3159 3163
3160 3164 compression_mode = COMP_MODE_INLINE
3161 3165 if self._docket is not None:
3162 3166 default_comp = self._docket.default_compression_header
3163 3167 r = deltautil.delta_compression(default_comp, deltainfo)
3164 3168 compression_mode, deltainfo = r
3165 3169
3166 3170 sidedata_compression_mode = COMP_MODE_INLINE
3167 3171 if sidedata and self.feature_config.has_side_data:
3168 3172 sidedata_compression_mode = COMP_MODE_PLAIN
3169 3173 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3170 3174 sidedata_offset = self._docket.sidedata_end
3171 3175 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3172 3176 if (
3173 3177 h != b'u'
3174 3178 and comp_sidedata[0:1] != b'\0'
3175 3179 and len(comp_sidedata) < len(serialized_sidedata)
3176 3180 ):
3177 3181 assert not h
3178 3182 if (
3179 3183 comp_sidedata[0:1]
3180 3184 == self._docket.default_compression_header
3181 3185 ):
3182 3186 sidedata_compression_mode = COMP_MODE_DEFAULT
3183 3187 serialized_sidedata = comp_sidedata
3184 3188 else:
3185 3189 sidedata_compression_mode = COMP_MODE_INLINE
3186 3190 serialized_sidedata = comp_sidedata
3187 3191 else:
3188 3192 serialized_sidedata = b""
3189 3193 # Don't store the offset if the sidedata is empty, that way
3190 3194 # we can easily detect empty sidedata and they will be no different
3191 3195 # than ones we manually add.
3192 3196 sidedata_offset = 0
3193 3197
3194 3198 rank = RANK_UNKNOWN
3195 3199 if self.feature_config.compute_rank:
3196 3200 if (p1r, p2r) == (nullrev, nullrev):
3197 3201 rank = 1
3198 3202 elif p1r != nullrev and p2r == nullrev:
3199 3203 rank = 1 + self.fast_rank(p1r)
3200 3204 elif p1r == nullrev and p2r != nullrev:
3201 3205 rank = 1 + self.fast_rank(p2r)
3202 3206 else: # merge node
3203 3207 if rustdagop is not None and self.index.rust_ext_compat:
3204 3208 rank = rustdagop.rank(self.index, p1r, p2r)
3205 3209 else:
3206 3210 pmin, pmax = sorted((p1r, p2r))
3207 3211 rank = 1 + self.fast_rank(pmax)
3208 3212 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3209 3213
3210 3214 e = revlogutils.entry(
3211 3215 flags=flags,
3212 3216 data_offset=offset,
3213 3217 data_compressed_length=deltainfo.deltalen,
3214 3218 data_uncompressed_length=textlen,
3215 3219 data_compression_mode=compression_mode,
3216 3220 data_delta_base=deltainfo.base,
3217 3221 link_rev=link,
3218 3222 parent_rev_1=p1r,
3219 3223 parent_rev_2=p2r,
3220 3224 node_id=node,
3221 3225 sidedata_offset=sidedata_offset,
3222 3226 sidedata_compressed_length=len(serialized_sidedata),
3223 3227 sidedata_compression_mode=sidedata_compression_mode,
3224 3228 rank=rank,
3225 3229 )
3226 3230
3227 3231 self.index.append(e)
3228 3232 entry = self.index.entry_binary(curr)
3229 3233 if curr == 0 and self._docket is None:
3230 3234 header = self._format_flags | self._format_version
3231 3235 header = self.index.pack_header(header)
3232 3236 entry = header + entry
3233 3237 self._writeentry(
3234 3238 transaction,
3235 3239 entry,
3236 3240 deltainfo.data,
3237 3241 link,
3238 3242 offset,
3239 3243 serialized_sidedata,
3240 3244 sidedata_offset,
3241 3245 )
3242 3246
3243 3247 rawtext = btext[0]
3244 3248
3245 3249 if alwayscache and rawtext is None:
3246 3250 rawtext = deltacomputer.buildtext(revinfo)
3247 3251
3248 3252 if type(rawtext) == bytes: # only accept immutable objects
3249 3253 self._inner._revisioncache = (node, curr, rawtext)
3250 3254 self._chainbasecache[curr] = deltainfo.chainbase
3251 3255 return curr
3252 3256
3253 3257 def _get_data_offset(self, prev):
3254 3258 """Returns the current offset in the (in-transaction) data file.
3255 3259 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3256 3260 file to store that information: since sidedata can be rewritten to the
3257 3261 end of the data file within a transaction, you can have cases where, for
3258 3262 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3259 3263 to `n - 1`'s sidedata being written after `n`'s data.
3260 3264
3261 3265 TODO cache this in a docket file before getting out of experimental."""
3262 3266 if self._docket is None:
3263 3267 return self.end(prev)
3264 3268 else:
3265 3269 return self._docket.data_end
3266 3270
3267 3271 def _writeentry(
3268 3272 self,
3269 3273 transaction,
3270 3274 entry,
3271 3275 data,
3272 3276 link,
3273 3277 offset,
3274 3278 sidedata,
3275 3279 sidedata_offset,
3276 3280 ):
3277 3281 # Files opened in a+ mode have inconsistent behavior on various
3278 3282 # platforms. Windows requires that a file positioning call be made
3279 3283 # when the file handle transitions between reads and writes. See
3280 3284 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3281 3285 # platforms, Python or the platform itself can be buggy. Some versions
3282 3286 # of Solaris have been observed to not append at the end of the file
3283 3287 # if the file was seeked to before the end. See issue4943 for more.
3284 3288 #
3285 3289 # We work around this issue by inserting a seek() before writing.
3286 3290 # Note: This is likely not necessary on Python 3. However, because
3287 3291 # the file handle is reused for reads and may be seeked there, we need
3288 3292 # to be careful before changing this.
3289 3293 index_end = data_end = sidedata_end = None
3290 3294 if self._docket is not None:
3291 3295 index_end = self._docket.index_end
3292 3296 data_end = self._docket.data_end
3293 3297 sidedata_end = self._docket.sidedata_end
3294 3298
3295 3299 files_end = self._inner.write_entry(
3296 3300 transaction,
3297 3301 entry,
3298 3302 data,
3299 3303 link,
3300 3304 offset,
3301 3305 sidedata,
3302 3306 sidedata_offset,
3303 3307 index_end,
3304 3308 data_end,
3305 3309 sidedata_end,
3306 3310 )
3307 3311 self._enforceinlinesize(transaction)
3308 3312 if self._docket is not None:
3309 3313 self._docket.index_end = files_end[0]
3310 3314 self._docket.data_end = files_end[1]
3311 3315 self._docket.sidedata_end = files_end[2]
3312 3316
3313 3317 nodemaputil.setup_persistent_nodemap(transaction, self)
3314 3318
3315 3319 def addgroup(
3316 3320 self,
3317 3321 deltas,
3318 3322 linkmapper,
3319 3323 transaction,
3320 3324 alwayscache=False,
3321 3325 addrevisioncb=None,
3322 3326 duplicaterevisioncb=None,
3323 3327 debug_info=None,
3324 3328 delta_base_reuse_policy=None,
3325 3329 ):
3326 3330 """
3327 3331 add a delta group
3328 3332
3329 3333 given a set of deltas, add them to the revision log. the
3330 3334 first delta is against its parent, which should be in our
3331 3335 log, the rest are against the previous delta.
3332 3336
3333 3337 If ``addrevisioncb`` is defined, it will be called with arguments of
3334 3338 this revlog and the node that was added.
3335 3339 """
3336 3340
3337 3341 if self._adding_group:
3338 3342 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3339 3343
3340 3344 # read the default delta-base reuse policy from revlog config if the
3341 3345 # group did not specify one.
3342 3346 if delta_base_reuse_policy is None:
3343 3347 if (
3344 3348 self.delta_config.general_delta
3345 3349 and self.delta_config.lazy_delta_base
3346 3350 ):
3347 3351 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3348 3352 else:
3349 3353 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3350 3354
3351 3355 self._adding_group = True
3352 3356 empty = True
3353 3357 try:
3354 3358 with self._writing(transaction):
3355 3359 write_debug = None
3356 3360 if self.delta_config.debug_delta:
3357 3361 write_debug = transaction._report
3358 3362 deltacomputer = deltautil.deltacomputer(
3359 3363 self,
3360 3364 write_debug=write_debug,
3361 3365 debug_info=debug_info,
3362 3366 )
3363 3367 # loop through our set of deltas
3364 3368 for data in deltas:
3365 3369 (
3366 3370 node,
3367 3371 p1,
3368 3372 p2,
3369 3373 linknode,
3370 3374 deltabase,
3371 3375 delta,
3372 3376 flags,
3373 3377 sidedata,
3374 3378 ) = data
3375 3379 link = linkmapper(linknode)
3376 3380 flags = flags or REVIDX_DEFAULT_FLAGS
3377 3381
3378 3382 rev = self.index.get_rev(node)
3379 3383 if rev is not None:
3380 3384 # this can happen if two branches make the same change
3381 3385 self._nodeduplicatecallback(transaction, rev)
3382 3386 if duplicaterevisioncb:
3383 3387 duplicaterevisioncb(self, rev)
3384 3388 empty = False
3385 3389 continue
3386 3390
3387 3391 for p in (p1, p2):
3388 3392 if not self.index.has_node(p):
3389 3393 raise error.LookupError(
3390 3394 p, self.radix, _(b'unknown parent')
3391 3395 )
3392 3396
3393 3397 if not self.index.has_node(deltabase):
3394 3398 raise error.LookupError(
3395 3399 deltabase, self.display_id, _(b'unknown delta base')
3396 3400 )
3397 3401
3398 3402 baserev = self.rev(deltabase)
3399 3403
3400 3404 if baserev != nullrev and self.iscensored(baserev):
3401 3405 # if base is censored, delta must be full replacement in a
3402 3406 # single patch operation
3403 3407 hlen = struct.calcsize(b">lll")
3404 3408 oldlen = self.rawsize(baserev)
3405 3409 newlen = len(delta) - hlen
3406 3410 if delta[:hlen] != mdiff.replacediffheader(
3407 3411 oldlen, newlen
3408 3412 ):
3409 3413 raise error.CensoredBaseError(
3410 3414 self.display_id, self.node(baserev)
3411 3415 )
3412 3416
3413 3417 if not flags and self._peek_iscensored(baserev, delta):
3414 3418 flags |= REVIDX_ISCENSORED
3415 3419
3416 3420 # We assume consumers of addrevisioncb will want to retrieve
3417 3421 # the added revision, which will require a call to
3418 3422 # revision(). revision() will fast path if there is a cache
3419 3423 # hit. So, we tell _addrevision() to always cache in this case.
3420 3424 # We're only using addgroup() in the context of changegroup
3421 3425 # generation so the revision data can always be handled as raw
3422 3426 # by the flagprocessor.
3423 3427 rev = self._addrevision(
3424 3428 node,
3425 3429 None,
3426 3430 transaction,
3427 3431 link,
3428 3432 p1,
3429 3433 p2,
3430 3434 flags,
3431 3435 (baserev, delta, delta_base_reuse_policy),
3432 3436 alwayscache=alwayscache,
3433 3437 deltacomputer=deltacomputer,
3434 3438 sidedata=sidedata,
3435 3439 )
3436 3440
3437 3441 if addrevisioncb:
3438 3442 addrevisioncb(self, rev)
3439 3443 empty = False
3440 3444 finally:
3441 3445 self._adding_group = False
3442 3446 return not empty
3443 3447
3444 3448 def iscensored(self, rev):
3445 3449 """Check if a file revision is censored."""
3446 3450 if not self.feature_config.censorable:
3447 3451 return False
3448 3452
3449 3453 return self.flags(rev) & REVIDX_ISCENSORED
3450 3454
3451 3455 def _peek_iscensored(self, baserev, delta):
3452 3456 """Quickly check if a delta produces a censored revision."""
3453 3457 if not self.feature_config.censorable:
3454 3458 return False
3455 3459
3456 3460 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3457 3461
3458 3462 def getstrippoint(self, minlink):
3459 3463 """find the minimum rev that must be stripped to strip the linkrev
3460 3464
3461 3465 Returns a tuple containing the minimum rev and a set of all revs that
3462 3466 have linkrevs that will be broken by this strip.
3463 3467 """
3464 3468 return storageutil.resolvestripinfo(
3465 3469 minlink,
3466 3470 len(self) - 1,
3467 3471 self.headrevs(),
3468 3472 self.linkrev,
3469 3473 self.parentrevs,
3470 3474 )
3471 3475
3472 3476 def strip(self, minlink, transaction):
3473 3477 """truncate the revlog on the first revision with a linkrev >= minlink
3474 3478
3475 3479 This function is called when we're stripping revision minlink and
3476 3480 its descendants from the repository.
3477 3481
3478 3482 We have to remove all revisions with linkrev >= minlink, because
3479 3483 the equivalent changelog revisions will be renumbered after the
3480 3484 strip.
3481 3485
3482 3486 So we truncate the revlog on the first of these revisions, and
3483 3487 trust that the caller has saved the revisions that shouldn't be
3484 3488 removed and that it'll re-add them after this truncation.
3485 3489 """
3486 3490 if len(self) == 0:
3487 3491 return
3488 3492
3489 3493 rev, _ = self.getstrippoint(minlink)
3490 3494 if rev == len(self):
3491 3495 return
3492 3496
3493 3497 # first truncate the files on disk
3494 3498 data_end = self.start(rev)
3495 3499 if not self._inline:
3496 3500 transaction.add(self._datafile, data_end)
3497 3501 end = rev * self.index.entry_size
3498 3502 else:
3499 3503 end = data_end + (rev * self.index.entry_size)
3500 3504
3501 3505 if self._sidedatafile:
3502 3506 sidedata_end = self.sidedata_cut_off(rev)
3503 3507 transaction.add(self._sidedatafile, sidedata_end)
3504 3508
3505 3509 transaction.add(self._indexfile, end)
3506 3510 if self._docket is not None:
3507 3511 # XXX we could, leverage the docket while stripping. However it is
3508 3512 # not powerfull enough at the time of this comment
3509 3513 self._docket.index_end = end
3510 3514 self._docket.data_end = data_end
3511 3515 self._docket.sidedata_end = sidedata_end
3512 3516 self._docket.write(transaction, stripping=True)
3513 3517
3514 3518 # then reset internal state in memory to forget those revisions
3515 3519 self._chaininfocache = util.lrucachedict(500)
3516 3520 self._inner.clear_cache()
3517 3521
3518 3522 del self.index[rev:-1]
3519 3523
3520 3524 def checksize(self):
3521 3525 """Check size of index and data files
3522 3526
3523 3527 return a (dd, di) tuple.
3524 3528 - dd: extra bytes for the "data" file
3525 3529 - di: extra bytes for the "index" file
3526 3530
3527 3531 A healthy revlog will return (0, 0).
3528 3532 """
3529 3533 expected = 0
3530 3534 if len(self):
3531 3535 expected = max(0, self.end(len(self) - 1))
3532 3536
3533 3537 try:
3534 3538 with self._datafp() as f:
3535 3539 f.seek(0, io.SEEK_END)
3536 3540 actual = f.tell()
3537 3541 dd = actual - expected
3538 3542 except FileNotFoundError:
3539 3543 dd = 0
3540 3544
3541 3545 try:
3542 3546 f = self.opener(self._indexfile)
3543 3547 f.seek(0, io.SEEK_END)
3544 3548 actual = f.tell()
3545 3549 f.close()
3546 3550 s = self.index.entry_size
3547 3551 i = max(0, actual // s)
3548 3552 di = actual - (i * s)
3549 3553 if self._inline:
3550 3554 databytes = 0
3551 3555 for r in self:
3552 3556 databytes += max(0, self.length(r))
3553 3557 dd = 0
3554 3558 di = actual - len(self) * s - databytes
3555 3559 except FileNotFoundError:
3556 3560 di = 0
3557 3561
3558 3562 return (dd, di)
3559 3563
3560 3564 def files(self):
3561 3565 """return list of files that compose this revlog"""
3562 3566 res = [self._indexfile]
3563 3567 if self._docket_file is None:
3564 3568 if not self._inline:
3565 3569 res.append(self._datafile)
3566 3570 else:
3567 3571 res.append(self._docket_file)
3568 3572 res.extend(self._docket.old_index_filepaths(include_empty=False))
3569 3573 if self._docket.data_end:
3570 3574 res.append(self._datafile)
3571 3575 res.extend(self._docket.old_data_filepaths(include_empty=False))
3572 3576 if self._docket.sidedata_end:
3573 3577 res.append(self._sidedatafile)
3574 3578 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3575 3579 return res
3576 3580
3577 3581 def emitrevisions(
3578 3582 self,
3579 3583 nodes,
3580 3584 nodesorder=None,
3581 3585 revisiondata=False,
3582 3586 assumehaveparentrevisions=False,
3583 3587 deltamode=repository.CG_DELTAMODE_STD,
3584 3588 sidedata_helpers=None,
3585 3589 debug_info=None,
3586 3590 ):
3587 3591 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3588 3592 raise error.ProgrammingError(
3589 3593 b'unhandled value for nodesorder: %s' % nodesorder
3590 3594 )
3591 3595
3592 3596 if nodesorder is None and not self.delta_config.general_delta:
3593 3597 nodesorder = b'storage'
3594 3598
3595 3599 if (
3596 3600 not self._storedeltachains
3597 3601 and deltamode != repository.CG_DELTAMODE_PREV
3598 3602 ):
3599 3603 deltamode = repository.CG_DELTAMODE_FULL
3600 3604
3601 3605 return storageutil.emitrevisions(
3602 3606 self,
3603 3607 nodes,
3604 3608 nodesorder,
3605 3609 revlogrevisiondelta,
3606 3610 deltaparentfn=self.deltaparent,
3607 3611 candeltafn=self._candelta,
3608 3612 rawsizefn=self.rawsize,
3609 3613 revdifffn=self.revdiff,
3610 3614 flagsfn=self.flags,
3611 3615 deltamode=deltamode,
3612 3616 revisiondata=revisiondata,
3613 3617 assumehaveparentrevisions=assumehaveparentrevisions,
3614 3618 sidedata_helpers=sidedata_helpers,
3615 3619 debug_info=debug_info,
3616 3620 )
3617 3621
3618 3622 DELTAREUSEALWAYS = b'always'
3619 3623 DELTAREUSESAMEREVS = b'samerevs'
3620 3624 DELTAREUSENEVER = b'never'
3621 3625
3622 3626 DELTAREUSEFULLADD = b'fulladd'
3623 3627
3624 3628 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3625 3629
3626 3630 def clone(
3627 3631 self,
3628 3632 tr,
3629 3633 destrevlog,
3630 3634 addrevisioncb=None,
3631 3635 deltareuse=DELTAREUSESAMEREVS,
3632 3636 forcedeltabothparents=None,
3633 3637 sidedata_helpers=None,
3634 3638 ):
3635 3639 """Copy this revlog to another, possibly with format changes.
3636 3640
3637 3641 The destination revlog will contain the same revisions and nodes.
3638 3642 However, it may not be bit-for-bit identical due to e.g. delta encoding
3639 3643 differences.
3640 3644
3641 3645 The ``deltareuse`` argument control how deltas from the existing revlog
3642 3646 are preserved in the destination revlog. The argument can have the
3643 3647 following values:
3644 3648
3645 3649 DELTAREUSEALWAYS
3646 3650 Deltas will always be reused (if possible), even if the destination
3647 3651 revlog would not select the same revisions for the delta. This is the
3648 3652 fastest mode of operation.
3649 3653 DELTAREUSESAMEREVS
3650 3654 Deltas will be reused if the destination revlog would pick the same
3651 3655 revisions for the delta. This mode strikes a balance between speed
3652 3656 and optimization.
3653 3657 DELTAREUSENEVER
3654 3658 Deltas will never be reused. This is the slowest mode of execution.
3655 3659 This mode can be used to recompute deltas (e.g. if the diff/delta
3656 3660 algorithm changes).
3657 3661 DELTAREUSEFULLADD
3658 3662 Revision will be re-added as if their were new content. This is
3659 3663 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3660 3664 eg: large file detection and handling.
3661 3665
3662 3666 Delta computation can be slow, so the choice of delta reuse policy can
3663 3667 significantly affect run time.
3664 3668
3665 3669 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3666 3670 two extremes. Deltas will be reused if they are appropriate. But if the
3667 3671 delta could choose a better revision, it will do so. This means if you
3668 3672 are converting a non-generaldelta revlog to a generaldelta revlog,
3669 3673 deltas will be recomputed if the delta's parent isn't a parent of the
3670 3674 revision.
3671 3675
3672 3676 In addition to the delta policy, the ``forcedeltabothparents``
3673 3677 argument controls whether to force compute deltas against both parents
3674 3678 for merges. By default, the current default is used.
3675 3679
3676 3680 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3677 3681 `sidedata_helpers`.
3678 3682 """
3679 3683 if deltareuse not in self.DELTAREUSEALL:
3680 3684 raise ValueError(
3681 3685 _(b'value for deltareuse invalid: %s') % deltareuse
3682 3686 )
3683 3687
3684 3688 if len(destrevlog):
3685 3689 raise ValueError(_(b'destination revlog is not empty'))
3686 3690
3687 3691 if getattr(self, 'filteredrevs', None):
3688 3692 raise ValueError(_(b'source revlog has filtered revisions'))
3689 3693 if getattr(destrevlog, 'filteredrevs', None):
3690 3694 raise ValueError(_(b'destination revlog has filtered revisions'))
3691 3695
3692 3696 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3693 3697 # if possible.
3694 3698 old_delta_config = destrevlog.delta_config
3695 3699 destrevlog.delta_config = destrevlog.delta_config.copy()
3696 3700
3697 3701 try:
3698 3702 if deltareuse == self.DELTAREUSEALWAYS:
3699 3703 destrevlog.delta_config.lazy_delta_base = True
3700 3704 destrevlog.delta_config.lazy_delta = True
3701 3705 elif deltareuse == self.DELTAREUSESAMEREVS:
3702 3706 destrevlog.delta_config.lazy_delta_base = False
3703 3707 destrevlog.delta_config.lazy_delta = True
3704 3708 elif deltareuse == self.DELTAREUSENEVER:
3705 3709 destrevlog.delta_config.lazy_delta_base = False
3706 3710 destrevlog.delta_config.lazy_delta = False
3707 3711
3708 3712 delta_both_parents = (
3709 3713 forcedeltabothparents or old_delta_config.delta_both_parents
3710 3714 )
3711 3715 destrevlog.delta_config.delta_both_parents = delta_both_parents
3712 3716
3713 3717 with self.reading(), destrevlog._writing(tr):
3714 3718 self._clone(
3715 3719 tr,
3716 3720 destrevlog,
3717 3721 addrevisioncb,
3718 3722 deltareuse,
3719 3723 forcedeltabothparents,
3720 3724 sidedata_helpers,
3721 3725 )
3722 3726
3723 3727 finally:
3724 3728 destrevlog.delta_config = old_delta_config
3725 3729
3726 3730 def _clone(
3727 3731 self,
3728 3732 tr,
3729 3733 destrevlog,
3730 3734 addrevisioncb,
3731 3735 deltareuse,
3732 3736 forcedeltabothparents,
3733 3737 sidedata_helpers,
3734 3738 ):
3735 3739 """perform the core duty of `revlog.clone` after parameter processing"""
3736 3740 write_debug = None
3737 3741 if self.delta_config.debug_delta:
3738 3742 write_debug = tr._report
3739 3743 deltacomputer = deltautil.deltacomputer(
3740 3744 destrevlog,
3741 3745 write_debug=write_debug,
3742 3746 )
3743 3747 index = self.index
3744 3748 for rev in self:
3745 3749 entry = index[rev]
3746 3750
3747 3751 # Some classes override linkrev to take filtered revs into
3748 3752 # account. Use raw entry from index.
3749 3753 flags = entry[0] & 0xFFFF
3750 3754 linkrev = entry[4]
3751 3755 p1 = index[entry[5]][7]
3752 3756 p2 = index[entry[6]][7]
3753 3757 node = entry[7]
3754 3758
3755 3759 # (Possibly) reuse the delta from the revlog if allowed and
3756 3760 # the revlog chunk is a delta.
3757 3761 cachedelta = None
3758 3762 rawtext = None
3759 3763 if deltareuse == self.DELTAREUSEFULLADD:
3760 3764 text = self._revisiondata(rev)
3761 3765 sidedata = self.sidedata(rev)
3762 3766
3763 3767 if sidedata_helpers is not None:
3764 3768 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3765 3769 self, sidedata_helpers, sidedata, rev
3766 3770 )
3767 3771 flags = flags | new_flags[0] & ~new_flags[1]
3768 3772
3769 3773 destrevlog.addrevision(
3770 3774 text,
3771 3775 tr,
3772 3776 linkrev,
3773 3777 p1,
3774 3778 p2,
3775 3779 cachedelta=cachedelta,
3776 3780 node=node,
3777 3781 flags=flags,
3778 3782 deltacomputer=deltacomputer,
3779 3783 sidedata=sidedata,
3780 3784 )
3781 3785 else:
3782 3786 if destrevlog.delta_config.lazy_delta:
3783 3787 dp = self.deltaparent(rev)
3784 3788 if dp != nullrev:
3785 3789 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3786 3790
3787 3791 sidedata = None
3788 3792 if not cachedelta:
3789 3793 try:
3790 3794 rawtext = self._revisiondata(rev)
3791 3795 except error.CensoredNodeError as censored:
3792 3796 assert flags & REVIDX_ISCENSORED
3793 3797 rawtext = censored.tombstone
3794 3798 sidedata = self.sidedata(rev)
3795 3799 if sidedata is None:
3796 3800 sidedata = self.sidedata(rev)
3797 3801
3798 3802 if sidedata_helpers is not None:
3799 3803 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3800 3804 self, sidedata_helpers, sidedata, rev
3801 3805 )
3802 3806 flags = flags | new_flags[0] & ~new_flags[1]
3803 3807
3804 3808 destrevlog._addrevision(
3805 3809 node,
3806 3810 rawtext,
3807 3811 tr,
3808 3812 linkrev,
3809 3813 p1,
3810 3814 p2,
3811 3815 flags,
3812 3816 cachedelta,
3813 3817 deltacomputer=deltacomputer,
3814 3818 sidedata=sidedata,
3815 3819 )
3816 3820
3817 3821 if addrevisioncb:
3818 3822 addrevisioncb(self, rev, node)
3819 3823
3820 3824 def censorrevision(self, tr, censornode, tombstone=b''):
3821 3825 if self._format_version == REVLOGV0:
3822 3826 raise error.RevlogError(
3823 3827 _(b'cannot censor with version %d revlogs')
3824 3828 % self._format_version
3825 3829 )
3826 3830 elif self._format_version == REVLOGV1:
3827 3831 rewrite.v1_censor(self, tr, censornode, tombstone)
3828 3832 else:
3829 3833 rewrite.v2_censor(self, tr, censornode, tombstone)
3830 3834
3831 3835 def verifyintegrity(self, state):
3832 3836 """Verifies the integrity of the revlog.
3833 3837
3834 3838 Yields ``revlogproblem`` instances describing problems that are
3835 3839 found.
3836 3840 """
3837 3841 dd, di = self.checksize()
3838 3842 if dd:
3839 3843 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3840 3844 if di:
3841 3845 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3842 3846
3843 3847 version = self._format_version
3844 3848
3845 3849 # The verifier tells us what version revlog we should be.
3846 3850 if version != state[b'expectedversion']:
3847 3851 yield revlogproblem(
3848 3852 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3849 3853 % (self.display_id, version, state[b'expectedversion'])
3850 3854 )
3851 3855
3852 3856 state[b'skipread'] = set()
3853 3857 state[b'safe_renamed'] = set()
3854 3858
3855 3859 for rev in self:
3856 3860 node = self.node(rev)
3857 3861
3858 3862 # Verify contents. 4 cases to care about:
3859 3863 #
3860 3864 # common: the most common case
3861 3865 # rename: with a rename
3862 3866 # meta: file content starts with b'\1\n', the metadata
3863 3867 # header defined in filelog.py, but without a rename
3864 3868 # ext: content stored externally
3865 3869 #
3866 3870 # More formally, their differences are shown below:
3867 3871 #
3868 3872 # | common | rename | meta | ext
3869 3873 # -------------------------------------------------------
3870 3874 # flags() | 0 | 0 | 0 | not 0
3871 3875 # renamed() | False | True | False | ?
3872 3876 # rawtext[0:2]=='\1\n'| False | True | True | ?
3873 3877 #
3874 3878 # "rawtext" means the raw text stored in revlog data, which
3875 3879 # could be retrieved by "rawdata(rev)". "text"
3876 3880 # mentioned below is "revision(rev)".
3877 3881 #
3878 3882 # There are 3 different lengths stored physically:
3879 3883 # 1. L1: rawsize, stored in revlog index
3880 3884 # 2. L2: len(rawtext), stored in revlog data
3881 3885 # 3. L3: len(text), stored in revlog data if flags==0, or
3882 3886 # possibly somewhere else if flags!=0
3883 3887 #
3884 3888 # L1 should be equal to L2. L3 could be different from them.
3885 3889 # "text" may or may not affect commit hash depending on flag
3886 3890 # processors (see flagutil.addflagprocessor).
3887 3891 #
3888 3892 # | common | rename | meta | ext
3889 3893 # -------------------------------------------------
3890 3894 # rawsize() | L1 | L1 | L1 | L1
3891 3895 # size() | L1 | L2-LM | L1(*) | L1 (?)
3892 3896 # len(rawtext) | L2 | L2 | L2 | L2
3893 3897 # len(text) | L2 | L2 | L2 | L3
3894 3898 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3895 3899 #
3896 3900 # LM: length of metadata, depending on rawtext
3897 3901 # (*): not ideal, see comment in filelog.size
3898 3902 # (?): could be "- len(meta)" if the resolved content has
3899 3903 # rename metadata
3900 3904 #
3901 3905 # Checks needed to be done:
3902 3906 # 1. length check: L1 == L2, in all cases.
3903 3907 # 2. hash check: depending on flag processor, we may need to
3904 3908 # use either "text" (external), or "rawtext" (in revlog).
3905 3909
3906 3910 try:
3907 3911 skipflags = state.get(b'skipflags', 0)
3908 3912 if skipflags:
3909 3913 skipflags &= self.flags(rev)
3910 3914
3911 3915 _verify_revision(self, skipflags, state, node)
3912 3916
3913 3917 l1 = self.rawsize(rev)
3914 3918 l2 = len(self.rawdata(node))
3915 3919
3916 3920 if l1 != l2:
3917 3921 yield revlogproblem(
3918 3922 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3919 3923 node=node,
3920 3924 )
3921 3925
3922 3926 except error.CensoredNodeError:
3923 3927 if state[b'erroroncensored']:
3924 3928 yield revlogproblem(
3925 3929 error=_(b'censored file data'), node=node
3926 3930 )
3927 3931 state[b'skipread'].add(node)
3928 3932 except Exception as e:
3929 3933 yield revlogproblem(
3930 3934 error=_(b'unpacking %s: %s')
3931 3935 % (short(node), stringutil.forcebytestr(e)),
3932 3936 node=node,
3933 3937 )
3934 3938 state[b'skipread'].add(node)
3935 3939
3936 3940 def storageinfo(
3937 3941 self,
3938 3942 exclusivefiles=False,
3939 3943 sharedfiles=False,
3940 3944 revisionscount=False,
3941 3945 trackedsize=False,
3942 3946 storedsize=False,
3943 3947 ):
3944 3948 d = {}
3945 3949
3946 3950 if exclusivefiles:
3947 3951 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3948 3952 if not self._inline:
3949 3953 d[b'exclusivefiles'].append((self.opener, self._datafile))
3950 3954
3951 3955 if sharedfiles:
3952 3956 d[b'sharedfiles'] = []
3953 3957
3954 3958 if revisionscount:
3955 3959 d[b'revisionscount'] = len(self)
3956 3960
3957 3961 if trackedsize:
3958 3962 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3959 3963
3960 3964 if storedsize:
3961 3965 d[b'storedsize'] = sum(
3962 3966 self.opener.stat(path).st_size for path in self.files()
3963 3967 )
3964 3968
3965 3969 return d
3966 3970
3967 3971 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3968 3972 if not self.feature_config.has_side_data:
3969 3973 return
3970 3974 # revlog formats with sidedata support does not support inline
3971 3975 assert not self._inline
3972 3976 if not helpers[1] and not helpers[2]:
3973 3977 # Nothing to generate or remove
3974 3978 return
3975 3979
3976 3980 new_entries = []
3977 3981 # append the new sidedata
3978 3982 with self._writing(transaction):
3979 3983 ifh, dfh, sdfh = self._inner._writinghandles
3980 3984 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3981 3985
3982 3986 current_offset = sdfh.tell()
3983 3987 for rev in range(startrev, endrev + 1):
3984 3988 entry = self.index[rev]
3985 3989 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3986 3990 store=self,
3987 3991 sidedata_helpers=helpers,
3988 3992 sidedata={},
3989 3993 rev=rev,
3990 3994 )
3991 3995
3992 3996 serialized_sidedata = sidedatautil.serialize_sidedata(
3993 3997 new_sidedata
3994 3998 )
3995 3999
3996 4000 sidedata_compression_mode = COMP_MODE_INLINE
3997 4001 if serialized_sidedata and self.feature_config.has_side_data:
3998 4002 sidedata_compression_mode = COMP_MODE_PLAIN
3999 4003 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4000 4004 if (
4001 4005 h != b'u'
4002 4006 and comp_sidedata[0] != b'\0'
4003 4007 and len(comp_sidedata) < len(serialized_sidedata)
4004 4008 ):
4005 4009 assert not h
4006 4010 if (
4007 4011 comp_sidedata[0]
4008 4012 == self._docket.default_compression_header
4009 4013 ):
4010 4014 sidedata_compression_mode = COMP_MODE_DEFAULT
4011 4015 serialized_sidedata = comp_sidedata
4012 4016 else:
4013 4017 sidedata_compression_mode = COMP_MODE_INLINE
4014 4018 serialized_sidedata = comp_sidedata
4015 4019 if entry[8] != 0 or entry[9] != 0:
4016 4020 # rewriting entries that already have sidedata is not
4017 4021 # supported yet, because it introduces garbage data in the
4018 4022 # revlog.
4019 4023 msg = b"rewriting existing sidedata is not supported yet"
4020 4024 raise error.Abort(msg)
4021 4025
4022 4026 # Apply (potential) flags to add and to remove after running
4023 4027 # the sidedata helpers
4024 4028 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4025 4029 entry_update = (
4026 4030 current_offset,
4027 4031 len(serialized_sidedata),
4028 4032 new_offset_flags,
4029 4033 sidedata_compression_mode,
4030 4034 )
4031 4035
4032 4036 # the sidedata computation might have move the file cursors around
4033 4037 sdfh.seek(current_offset, os.SEEK_SET)
4034 4038 sdfh.write(serialized_sidedata)
4035 4039 new_entries.append(entry_update)
4036 4040 current_offset += len(serialized_sidedata)
4037 4041 self._docket.sidedata_end = sdfh.tell()
4038 4042
4039 4043 # rewrite the new index entries
4040 4044 ifh.seek(startrev * self.index.entry_size)
4041 4045 for i, e in enumerate(new_entries):
4042 4046 rev = startrev + i
4043 4047 self.index.replace_sidedata_info(rev, *e)
4044 4048 packed = self.index.entry_binary(rev)
4045 4049 if rev == 0 and self._docket is None:
4046 4050 header = self._format_flags | self._format_version
4047 4051 header = self.index.pack_header(header)
4048 4052 packed = header + packed
4049 4053 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now