##// END OF EJS Templates
revlog: move the `rawtext` method on the inner object...
marmoute -
r51990:be56d322 default
parent child Browse files
Show More
@@ -1,3974 +1,3985 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 393 # 3-tuple of (node, rev, text) for a raw revision.
394 394 self._revisioncache = None
395 395
396 396 @property
397 397 def index_file(self):
398 398 return self.__index_file
399 399
400 400 @index_file.setter
401 401 def index_file(self, new_index_file):
402 402 self.__index_file = new_index_file
403 403 if self.inline:
404 404 self._segmentfile.filename = new_index_file
405 405
406 406 def __len__(self):
407 407 return len(self.index)
408 408
409 409 # Derived from index values.
410 410
411 411 def start(self, rev):
412 412 """the offset of the data chunk for this revision"""
413 413 return int(self.index[rev][0] >> 16)
414 414
415 415 def length(self, rev):
416 416 """the length of the data chunk for this revision"""
417 417 return self.index[rev][1]
418 418
419 419 def end(self, rev):
420 420 """the end of the data chunk for this revision"""
421 421 return self.start(rev) + self.length(rev)
422 422
423 423 def deltaparent(self, rev):
424 424 """return deltaparent of the given revision"""
425 425 base = self.index[rev][3]
426 426 if base == rev:
427 427 return nullrev
428 428 elif self.delta_config.general_delta:
429 429 return base
430 430 else:
431 431 return rev - 1
432 432
433 433 def issnapshot(self, rev):
434 434 """tells whether rev is a snapshot"""
435 435 if not self.delta_config.sparse_revlog:
436 436 return self.deltaparent(rev) == nullrev
437 437 elif hasattr(self.index, 'issnapshot'):
438 438 # directly assign the method to cache the testing and access
439 439 self.issnapshot = self.index.issnapshot
440 440 return self.issnapshot(rev)
441 441 if rev == nullrev:
442 442 return True
443 443 entry = self.index[rev]
444 444 base = entry[3]
445 445 if base == rev:
446 446 return True
447 447 if base == nullrev:
448 448 return True
449 449 p1 = entry[5]
450 450 while self.length(p1) == 0:
451 451 b = self.deltaparent(p1)
452 452 if b == p1:
453 453 break
454 454 p1 = b
455 455 p2 = entry[6]
456 456 while self.length(p2) == 0:
457 457 b = self.deltaparent(p2)
458 458 if b == p2:
459 459 break
460 460 p2 = b
461 461 if base == p1 or base == p2:
462 462 return False
463 463 return self.issnapshot(base)
464 464
465 465 def _deltachain(self, rev, stoprev=None):
466 466 """Obtain the delta chain for a revision.
467 467
468 468 ``stoprev`` specifies a revision to stop at. If not specified, we
469 469 stop at the base of the chain.
470 470
471 471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
472 472 revs in ascending order and ``stopped`` is a bool indicating whether
473 473 ``stoprev`` was hit.
474 474 """
475 475 generaldelta = self.delta_config.general_delta
476 476 # Try C implementation.
477 477 try:
478 478 return self.index.deltachain(rev, stoprev, generaldelta)
479 479 except AttributeError:
480 480 pass
481 481
482 482 chain = []
483 483
484 484 # Alias to prevent attribute lookup in tight loop.
485 485 index = self.index
486 486
487 487 iterrev = rev
488 488 e = index[iterrev]
489 489 while iterrev != e[3] and iterrev != stoprev:
490 490 chain.append(iterrev)
491 491 if generaldelta:
492 492 iterrev = e[3]
493 493 else:
494 494 iterrev -= 1
495 495 e = index[iterrev]
496 496
497 497 if iterrev == stoprev:
498 498 stopped = True
499 499 else:
500 500 chain.append(iterrev)
501 501 stopped = False
502 502
503 503 chain.reverse()
504 504 return chain, stopped
505 505
506 506 @util.propertycache
507 507 def _compressor(self):
508 508 engine = util.compengines[self.feature_config.compression_engine]
509 509 return engine.revlogcompressor(
510 510 self.feature_config.compression_engine_options
511 511 )
512 512
513 513 @util.propertycache
514 514 def _decompressor(self):
515 515 """the default decompressor"""
516 516 if self._default_compression_header is None:
517 517 return None
518 518 t = self._default_compression_header
519 519 c = self._get_decompressor(t)
520 520 return c.decompress
521 521
522 522 def _get_decompressor(self, t):
523 523 try:
524 524 compressor = self._decompressors[t]
525 525 except KeyError:
526 526 try:
527 527 engine = util.compengines.forrevlogheader(t)
528 528 compressor = engine.revlogcompressor(
529 529 self.feature_config.compression_engine_options
530 530 )
531 531 self._decompressors[t] = compressor
532 532 except KeyError:
533 533 raise error.RevlogError(
534 534 _(b'unknown compression type %s') % binascii.hexlify(t)
535 535 )
536 536 return compressor
537 537
538 538 def compress(self, data):
539 539 """Generate a possibly-compressed representation of data."""
540 540 if not data:
541 541 return b'', data
542 542
543 543 compressed = self._compressor.compress(data)
544 544
545 545 if compressed:
546 546 # The revlog compressor added the header in the returned data.
547 547 return b'', compressed
548 548
549 549 if data[0:1] == b'\0':
550 550 return b'', data
551 551 return b'u', data
552 552
553 553 def decompress(self, data):
554 554 """Decompress a revlog chunk.
555 555
556 556 The chunk is expected to begin with a header identifying the
557 557 format type so it can be routed to an appropriate decompressor.
558 558 """
559 559 if not data:
560 560 return data
561 561
562 562 # Revlogs are read much more frequently than they are written and many
563 563 # chunks only take microseconds to decompress, so performance is
564 564 # important here.
565 565 #
566 566 # We can make a few assumptions about revlogs:
567 567 #
568 568 # 1) the majority of chunks will be compressed (as opposed to inline
569 569 # raw data).
570 570 # 2) decompressing *any* data will likely by at least 10x slower than
571 571 # returning raw inline data.
572 572 # 3) we want to prioritize common and officially supported compression
573 573 # engines
574 574 #
575 575 # It follows that we want to optimize for "decompress compressed data
576 576 # when encoded with common and officially supported compression engines"
577 577 # case over "raw data" and "data encoded by less common or non-official
578 578 # compression engines." That is why we have the inline lookup first
579 579 # followed by the compengines lookup.
580 580 #
581 581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
582 582 # compressed chunks. And this matters for changelog and manifest reads.
583 583 t = data[0:1]
584 584
585 585 if t == b'x':
586 586 try:
587 587 return _zlibdecompress(data)
588 588 except zlib.error as e:
589 589 raise error.RevlogError(
590 590 _(b'revlog decompress error: %s')
591 591 % stringutil.forcebytestr(e)
592 592 )
593 593 # '\0' is more common than 'u' so it goes first.
594 594 elif t == b'\0':
595 595 return data
596 596 elif t == b'u':
597 597 return util.buffer(data, 1)
598 598
599 599 compressor = self._get_decompressor(t)
600 600
601 601 return compressor.decompress(data)
602 602
603 603 @contextlib.contextmanager
604 604 def reading(self):
605 605 """Context manager that keeps data and sidedata files open for reading"""
606 606 if len(self.index) == 0:
607 607 yield # nothing to be read
608 608 else:
609 609 with self._segmentfile.reading():
610 610 with self._segmentfile_sidedata.reading():
611 611 yield
612 612
613 613 @property
614 614 def is_writing(self):
615 615 """True is a writing context is open"""
616 616 return self._writinghandles is not None
617 617
618 618 @contextlib.contextmanager
619 619 def writing(self, transaction, data_end=None, sidedata_end=None):
620 620 """Open the revlog files for writing
621 621
622 622 Add content to a revlog should be done within such context.
623 623 """
624 624 if self.is_writing:
625 625 yield
626 626 else:
627 627 ifh = dfh = sdfh = None
628 628 try:
629 629 r = len(self.index)
630 630 # opening the data file.
631 631 dsize = 0
632 632 if r:
633 633 dsize = self.end(r - 1)
634 634 dfh = None
635 635 if not self.inline:
636 636 try:
637 637 dfh = self.opener(self.data_file, mode=b"r+")
638 638 if data_end is None:
639 639 dfh.seek(0, os.SEEK_END)
640 640 else:
641 641 dfh.seek(data_end, os.SEEK_SET)
642 642 except FileNotFoundError:
643 643 dfh = self.opener(self.data_file, mode=b"w+")
644 644 transaction.add(self.data_file, dsize)
645 645 if self.sidedata_file is not None:
646 646 assert sidedata_end is not None
647 647 # revlog-v2 does not inline, help Pytype
648 648 assert dfh is not None
649 649 try:
650 650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
651 651 dfh.seek(sidedata_end, os.SEEK_SET)
652 652 except FileNotFoundError:
653 653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
654 654 transaction.add(self.sidedata_file, sidedata_end)
655 655
656 656 # opening the index file.
657 657 isize = r * self.index.entry_size
658 658 ifh = self.__index_write_fp()
659 659 if self.inline:
660 660 transaction.add(self.index_file, dsize + isize)
661 661 else:
662 662 transaction.add(self.index_file, isize)
663 663 # exposing all file handle for writing.
664 664 self._writinghandles = (ifh, dfh, sdfh)
665 665 self._segmentfile.writing_handle = ifh if self.inline else dfh
666 666 self._segmentfile_sidedata.writing_handle = sdfh
667 667 yield
668 668 finally:
669 669 self._writinghandles = None
670 670 self._segmentfile.writing_handle = None
671 671 self._segmentfile_sidedata.writing_handle = None
672 672 if dfh is not None:
673 673 dfh.close()
674 674 if sdfh is not None:
675 675 sdfh.close()
676 676 # closing the index file last to avoid exposing referent to
677 677 # potential unflushed data content.
678 678 if ifh is not None:
679 679 ifh.close()
680 680
681 681 def __index_write_fp(self, index_end=None):
682 682 """internal method to open the index file for writing
683 683
684 684 You should not use this directly and use `_writing` instead
685 685 """
686 686 try:
687 687 f = self.opener(
688 688 self.index_file,
689 689 mode=b"r+",
690 690 checkambig=self.data_config.check_ambig,
691 691 )
692 692 if index_end is None:
693 693 f.seek(0, os.SEEK_END)
694 694 else:
695 695 f.seek(index_end, os.SEEK_SET)
696 696 return f
697 697 except FileNotFoundError:
698 698 return self.opener(
699 699 self.index_file,
700 700 mode=b"w+",
701 701 checkambig=self.data_config.check_ambig,
702 702 )
703 703
704 704 def __index_new_fp(self):
705 705 """internal method to create a new index file for writing
706 706
707 707 You should not use this unless you are upgrading from inline revlog
708 708 """
709 709 return self.opener(
710 710 self.index_file,
711 711 mode=b"w",
712 712 checkambig=self.data_config.check_ambig,
713 713 atomictemp=True,
714 714 )
715 715
716 716 def split_inline(self, tr, header, new_index_file_path=None):
717 717 """split the data of an inline revlog into an index and a data file"""
718 718 existing_handles = False
719 719 if self._writinghandles is not None:
720 720 existing_handles = True
721 721 fp = self._writinghandles[0]
722 722 fp.flush()
723 723 fp.close()
724 724 # We can't use the cached file handle after close(). So prevent
725 725 # its usage.
726 726 self._writinghandles = None
727 727 self._segmentfile.writing_handle = None
728 728 # No need to deal with sidedata writing handle as it is only
729 729 # relevant with revlog-v2 which is never inline, not reaching
730 730 # this code
731 731
732 732 new_dfh = self.opener(self.data_file, mode=b"w+")
733 733 new_dfh.truncate(0) # drop any potentially existing data
734 734 try:
735 735 with self.reading():
736 736 for r in range(len(self.index)):
737 737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
738 738 new_dfh.flush()
739 739
740 740 if new_index_file_path is not None:
741 741 self.index_file = new_index_file_path
742 742 with self.__index_new_fp() as fp:
743 743 self.inline = False
744 744 for i in range(len(self.index)):
745 745 e = self.index.entry_binary(i)
746 746 if i == 0:
747 747 packed_header = self.index.pack_header(header)
748 748 e = packed_header + e
749 749 fp.write(e)
750 750
751 751 # If we don't use side-write, the temp file replace the real
752 752 # index when we exit the context manager
753 753
754 754 self._segmentfile = randomaccessfile.randomaccessfile(
755 755 self.opener,
756 756 self.data_file,
757 757 self.data_config.chunk_cache_size,
758 758 )
759 759
760 760 if existing_handles:
761 761 # switched from inline to conventional reopen the index
762 762 ifh = self.__index_write_fp()
763 763 self._writinghandles = (ifh, new_dfh, None)
764 764 self._segmentfile.writing_handle = new_dfh
765 765 new_dfh = None
766 766 # No need to deal with sidedata writing handle as it is only
767 767 # relevant with revlog-v2 which is never inline, not reaching
768 768 # this code
769 769 finally:
770 770 if new_dfh is not None:
771 771 new_dfh.close()
772 772 return self.index_file
773 773
774 774 def get_segment_for_revs(self, startrev, endrev):
775 775 """Obtain a segment of raw data corresponding to a range of revisions.
776 776
777 777 Accepts the start and end revisions and an optional already-open
778 778 file handle to be used for reading. If the file handle is read, its
779 779 seek position will not be preserved.
780 780
781 781 Requests for data may be satisfied by a cache.
782 782
783 783 Returns a 2-tuple of (offset, data) for the requested range of
784 784 revisions. Offset is the integer offset from the beginning of the
785 785 revlog and data is a str or buffer of the raw byte data.
786 786
787 787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
788 788 to determine where each revision's data begins and ends.
789 789
790 790 API: we should consider making this a private part of the InnerRevlog
791 791 at some point.
792 792 """
793 793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
794 794 # (functions are expensive).
795 795 index = self.index
796 796 istart = index[startrev]
797 797 start = int(istart[0] >> 16)
798 798 if startrev == endrev:
799 799 end = start + istart[1]
800 800 else:
801 801 iend = index[endrev]
802 802 end = int(iend[0] >> 16) + iend[1]
803 803
804 804 if self.inline:
805 805 start += (startrev + 1) * self.index.entry_size
806 806 end += (endrev + 1) * self.index.entry_size
807 807 length = end - start
808 808
809 809 return start, self._segmentfile.read_chunk(start, length)
810 810
811 811 def _chunk(self, rev):
812 812 """Obtain a single decompressed chunk for a revision.
813 813
814 814 Accepts an integer revision and an optional already-open file handle
815 815 to be used for reading. If used, the seek position of the file will not
816 816 be preserved.
817 817
818 818 Returns a str holding uncompressed data for the requested revision.
819 819 """
820 820 compression_mode = self.index[rev][10]
821 821 data = self.get_segment_for_revs(rev, rev)[1]
822 822 if compression_mode == COMP_MODE_PLAIN:
823 823 return data
824 824 elif compression_mode == COMP_MODE_DEFAULT:
825 825 return self._decompressor(data)
826 826 elif compression_mode == COMP_MODE_INLINE:
827 827 return self.decompress(data)
828 828 else:
829 829 msg = b'unknown compression mode %d'
830 830 msg %= compression_mode
831 831 raise error.RevlogError(msg)
832 832
833 833 def _chunks(self, revs, targetsize=None):
834 834 """Obtain decompressed chunks for the specified revisions.
835 835
836 836 Accepts an iterable of numeric revisions that are assumed to be in
837 837 ascending order. Also accepts an optional already-open file handle
838 838 to be used for reading. If used, the seek position of the file will
839 839 not be preserved.
840 840
841 841 This function is similar to calling ``self._chunk()`` multiple times,
842 842 but is faster.
843 843
844 844 Returns a list with decompressed data for each requested revision.
845 845 """
846 846 if not revs:
847 847 return []
848 848 start = self.start
849 849 length = self.length
850 850 inline = self.inline
851 851 iosize = self.index.entry_size
852 852 buffer = util.buffer
853 853
854 854 l = []
855 855 ladd = l.append
856 856
857 857 if not self.data_config.with_sparse_read:
858 858 slicedchunks = (revs,)
859 859 else:
860 860 slicedchunks = deltautil.slicechunk(
861 861 self,
862 862 revs,
863 863 targetsize=targetsize,
864 864 )
865 865
866 866 for revschunk in slicedchunks:
867 867 firstrev = revschunk[0]
868 868 # Skip trailing revisions with empty diff
869 869 for lastrev in revschunk[::-1]:
870 870 if length(lastrev) != 0:
871 871 break
872 872
873 873 try:
874 874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
875 875 except OverflowError:
876 876 # issue4215 - we can't cache a run of chunks greater than
877 877 # 2G on Windows
878 878 return [self._chunk(rev) for rev in revschunk]
879 879
880 880 decomp = self.decompress
881 881 # self._decompressor might be None, but will not be used in that case
882 882 def_decomp = self._decompressor
883 883 for rev in revschunk:
884 884 chunkstart = start(rev)
885 885 if inline:
886 886 chunkstart += (rev + 1) * iosize
887 887 chunklength = length(rev)
888 888 comp_mode = self.index[rev][10]
889 889 c = buffer(data, chunkstart - offset, chunklength)
890 890 if comp_mode == COMP_MODE_PLAIN:
891 891 ladd(c)
892 892 elif comp_mode == COMP_MODE_INLINE:
893 893 ladd(decomp(c))
894 894 elif comp_mode == COMP_MODE_DEFAULT:
895 895 ladd(def_decomp(c))
896 896 else:
897 897 msg = b'unknown compression mode %d'
898 898 msg %= comp_mode
899 899 raise error.RevlogError(msg)
900 900
901 901 return l
902 902
903 def raw_text(self, node, rev):
904 """return the possibly unvalidated rawtext for a revision
905
906 returns (rev, rawtext, validated)
907 """
908
909 # revision in the cache (could be useful to apply delta)
910 cachedrev = None
911 # An intermediate text to apply deltas to
912 basetext = None
913
914 # Check if we have the entry in cache
915 # The cache entry looks like (node, rev, rawtext)
916 if self._revisioncache:
917 cachedrev = self._revisioncache[1]
918
919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
920 if stopped:
921 basetext = self._revisioncache[2]
922
923 # drop cache to save memory, the caller is expected to
924 # update self._inner._revisioncache after validating the text
925 self._revisioncache = None
926
927 targetsize = None
928 rawsize = self.index[rev][2]
929 if 0 <= rawsize:
930 targetsize = 4 * rawsize
931
932 bins = self._chunks(chain, targetsize=targetsize)
933 if basetext is None:
934 basetext = bytes(bins[0])
935 bins = bins[1:]
936
937 rawtext = mdiff.patches(basetext, bins)
938 del basetext # let us have a chance to free memory early
939 return (rev, rawtext, False)
940
903 941
904 942 class revlog:
905 943 """
906 944 the underlying revision storage object
907 945
908 946 A revlog consists of two parts, an index and the revision data.
909 947
910 948 The index is a file with a fixed record size containing
911 949 information on each revision, including its nodeid (hash), the
912 950 nodeids of its parents, the position and offset of its data within
913 951 the data file, and the revision it's based on. Finally, each entry
914 952 contains a linkrev entry that can serve as a pointer to external
915 953 data.
916 954
917 955 The revision data itself is a linear collection of data chunks.
918 956 Each chunk represents a revision and is usually represented as a
919 957 delta against the previous chunk. To bound lookup time, runs of
920 958 deltas are limited to about 2 times the length of the original
921 959 version data. This makes retrieval of a version proportional to
922 960 its size, or O(1) relative to the number of revisions.
923 961
924 962 Both pieces of the revlog are written to in an append-only
925 963 fashion, which means we never need to rewrite a file to insert or
926 964 remove data, and can use some simple techniques to avoid the need
927 965 for locking while reading.
928 966
929 967 If checkambig, indexfile is opened with checkambig=True at
930 968 writing, to avoid file stat ambiguity.
931 969
932 970 If mmaplargeindex is True, and an mmapindexthreshold is set, the
933 971 index will be mmapped rather than read if it is larger than the
934 972 configured threshold.
935 973
936 974 If censorable is True, the revlog can have censored revisions.
937 975
938 976 If `upperboundcomp` is not None, this is the expected maximal gain from
939 977 compression for the data content.
940 978
941 979 `concurrencychecker` is an optional function that receives 3 arguments: a
942 980 file handle, a filename, and an expected position. It should check whether
943 981 the current position in the file handle is valid, and log/warn/fail (by
944 982 raising).
945 983
946 984 See mercurial/revlogutils/contants.py for details about the content of an
947 985 index entry.
948 986 """
949 987
950 988 _flagserrorclass = error.RevlogError
951 989
952 990 @staticmethod
953 991 def is_inline_index(header_bytes):
954 992 """Determine if a revlog is inline from the initial bytes of the index"""
955 993 header = INDEX_HEADER.unpack(header_bytes)[0]
956 994
957 995 _format_flags = header & ~0xFFFF
958 996 _format_version = header & 0xFFFF
959 997
960 998 features = FEATURES_BY_VERSION[_format_version]
961 999 return features[b'inline'](_format_flags)
962 1000
963 1001 def __init__(
964 1002 self,
965 1003 opener,
966 1004 target,
967 1005 radix,
968 1006 postfix=None, # only exist for `tmpcensored` now
969 1007 checkambig=False,
970 1008 mmaplargeindex=False,
971 1009 censorable=False,
972 1010 upperboundcomp=None,
973 1011 persistentnodemap=False,
974 1012 concurrencychecker=None,
975 1013 trypending=False,
976 1014 try_split=False,
977 1015 canonical_parent_order=True,
978 1016 ):
979 1017 """
980 1018 create a revlog object
981 1019
982 1020 opener is a function that abstracts the file opening operation
983 1021 and can be used to implement COW semantics or the like.
984 1022
985 1023 `target`: a (KIND, ID) tuple that identify the content stored in
986 1024 this revlog. It help the rest of the code to understand what the revlog
987 1025 is about without having to resort to heuristic and index filename
988 1026 analysis. Note: that this must be reliably be set by normal code, but
989 1027 that test, debug, or performance measurement code might not set this to
990 1028 accurate value.
991 1029 """
992 1030
993 1031 self.radix = radix
994 1032
995 1033 self._docket_file = None
996 1034 self._indexfile = None
997 1035 self._datafile = None
998 1036 self._sidedatafile = None
999 1037 self._nodemap_file = None
1000 1038 self.postfix = postfix
1001 1039 self._trypending = trypending
1002 1040 self._try_split = try_split
1003 1041 self.opener = opener
1004 1042 if persistentnodemap:
1005 1043 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1006 1044
1007 1045 assert target[0] in ALL_KINDS
1008 1046 assert len(target) == 2
1009 1047 self.target = target
1010 1048 if b'feature-config' in self.opener.options:
1011 1049 self.feature_config = self.opener.options[b'feature-config'].copy()
1012 1050 else:
1013 1051 self.feature_config = FeatureConfig()
1014 1052 self.feature_config.censorable = censorable
1015 1053 self.feature_config.canonical_parent_order = canonical_parent_order
1016 1054 if b'data-config' in self.opener.options:
1017 1055 self.data_config = self.opener.options[b'data-config'].copy()
1018 1056 else:
1019 1057 self.data_config = DataConfig()
1020 1058 self.data_config.check_ambig = checkambig
1021 1059 self.data_config.mmap_large_index = mmaplargeindex
1022 1060 if b'delta-config' in self.opener.options:
1023 1061 self.delta_config = self.opener.options[b'delta-config'].copy()
1024 1062 else:
1025 1063 self.delta_config = DeltaConfig()
1026 1064 self.delta_config.upper_bound_comp = upperboundcomp
1027 1065
1028 1066 # Maps rev to chain base rev.
1029 1067 self._chainbasecache = util.lrucachedict(100)
1030 1068
1031 1069 self.index = None
1032 1070 self._docket = None
1033 1071 self._nodemap_docket = None
1034 1072 # Mapping of partial identifiers to full nodes.
1035 1073 self._pcache = {}
1036 1074
1037 1075 # other optionnals features
1038 1076
1039 1077 # Make copy of flag processors so each revlog instance can support
1040 1078 # custom flags.
1041 1079 self._flagprocessors = dict(flagutil.flagprocessors)
1042 1080 # prevent nesting of addgroup
1043 1081 self._adding_group = None
1044 1082
1045 1083 chunk_cache = self._loadindex()
1046 1084 self._load_inner(chunk_cache)
1047 1085 self._concurrencychecker = concurrencychecker
1048 1086
1049 1087 @property
1050 1088 def _generaldelta(self):
1051 1089 """temporary compatibility proxy"""
1052 1090 util.nouideprecwarn(
1053 1091 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1054 1092 )
1055 1093 return self.delta_config.general_delta
1056 1094
1057 1095 @property
1058 1096 def _checkambig(self):
1059 1097 """temporary compatibility proxy"""
1060 1098 util.nouideprecwarn(
1061 1099 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1062 1100 )
1063 1101 return self.data_config.check_ambig
1064 1102
1065 1103 @property
1066 1104 def _mmaplargeindex(self):
1067 1105 """temporary compatibility proxy"""
1068 1106 util.nouideprecwarn(
1069 1107 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1070 1108 )
1071 1109 return self.data_config.mmap_large_index
1072 1110
1073 1111 @property
1074 1112 def _censorable(self):
1075 1113 """temporary compatibility proxy"""
1076 1114 util.nouideprecwarn(
1077 1115 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1078 1116 )
1079 1117 return self.feature_config.censorable
1080 1118
1081 1119 @property
1082 1120 def _chunkcachesize(self):
1083 1121 """temporary compatibility proxy"""
1084 1122 util.nouideprecwarn(
1085 1123 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1086 1124 )
1087 1125 return self.data_config.chunk_cache_size
1088 1126
1089 1127 @property
1090 1128 def _maxchainlen(self):
1091 1129 """temporary compatibility proxy"""
1092 1130 util.nouideprecwarn(
1093 1131 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1094 1132 )
1095 1133 return self.delta_config.max_chain_len
1096 1134
1097 1135 @property
1098 1136 def _deltabothparents(self):
1099 1137 """temporary compatibility proxy"""
1100 1138 util.nouideprecwarn(
1101 1139 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1102 1140 )
1103 1141 return self.delta_config.delta_both_parents
1104 1142
1105 1143 @property
1106 1144 def _candidate_group_chunk_size(self):
1107 1145 """temporary compatibility proxy"""
1108 1146 util.nouideprecwarn(
1109 1147 b"use revlog.delta_config.candidate_group_chunk_size",
1110 1148 b"6.6",
1111 1149 stacklevel=2,
1112 1150 )
1113 1151 return self.delta_config.candidate_group_chunk_size
1114 1152
1115 1153 @property
1116 1154 def _debug_delta(self):
1117 1155 """temporary compatibility proxy"""
1118 1156 util.nouideprecwarn(
1119 1157 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1120 1158 )
1121 1159 return self.delta_config.debug_delta
1122 1160
1123 1161 @property
1124 1162 def _compengine(self):
1125 1163 """temporary compatibility proxy"""
1126 1164 util.nouideprecwarn(
1127 1165 b"use revlog.feature_config.compression_engine",
1128 1166 b"6.6",
1129 1167 stacklevel=2,
1130 1168 )
1131 1169 return self.feature_config.compression_engine
1132 1170
1133 1171 @property
1134 1172 def upperboundcomp(self):
1135 1173 """temporary compatibility proxy"""
1136 1174 util.nouideprecwarn(
1137 1175 b"use revlog.delta_config.upper_bound_comp",
1138 1176 b"6.6",
1139 1177 stacklevel=2,
1140 1178 )
1141 1179 return self.delta_config.upper_bound_comp
1142 1180
1143 1181 @property
1144 1182 def _compengineopts(self):
1145 1183 """temporary compatibility proxy"""
1146 1184 util.nouideprecwarn(
1147 1185 b"use revlog.feature_config.compression_engine_options",
1148 1186 b"6.6",
1149 1187 stacklevel=2,
1150 1188 )
1151 1189 return self.feature_config.compression_engine_options
1152 1190
1153 1191 @property
1154 1192 def _maxdeltachainspan(self):
1155 1193 """temporary compatibility proxy"""
1156 1194 util.nouideprecwarn(
1157 1195 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1158 1196 )
1159 1197 return self.delta_config.max_deltachain_span
1160 1198
1161 1199 @property
1162 1200 def _withsparseread(self):
1163 1201 """temporary compatibility proxy"""
1164 1202 util.nouideprecwarn(
1165 1203 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1166 1204 )
1167 1205 return self.data_config.with_sparse_read
1168 1206
1169 1207 @property
1170 1208 def _sparserevlog(self):
1171 1209 """temporary compatibility proxy"""
1172 1210 util.nouideprecwarn(
1173 1211 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1174 1212 )
1175 1213 return self.delta_config.sparse_revlog
1176 1214
1177 1215 @property
1178 1216 def hassidedata(self):
1179 1217 """temporary compatibility proxy"""
1180 1218 util.nouideprecwarn(
1181 1219 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1182 1220 )
1183 1221 return self.feature_config.has_side_data
1184 1222
1185 1223 @property
1186 1224 def _srdensitythreshold(self):
1187 1225 """temporary compatibility proxy"""
1188 1226 util.nouideprecwarn(
1189 1227 b"use revlog.data_config.sr_density_threshold",
1190 1228 b"6.6",
1191 1229 stacklevel=2,
1192 1230 )
1193 1231 return self.data_config.sr_density_threshold
1194 1232
1195 1233 @property
1196 1234 def _srmingapsize(self):
1197 1235 """temporary compatibility proxy"""
1198 1236 util.nouideprecwarn(
1199 1237 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1200 1238 )
1201 1239 return self.data_config.sr_min_gap_size
1202 1240
1203 1241 @property
1204 1242 def _compute_rank(self):
1205 1243 """temporary compatibility proxy"""
1206 1244 util.nouideprecwarn(
1207 1245 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1208 1246 )
1209 1247 return self.feature_config.compute_rank
1210 1248
1211 1249 @property
1212 1250 def canonical_parent_order(self):
1213 1251 """temporary compatibility proxy"""
1214 1252 util.nouideprecwarn(
1215 1253 b"use revlog.feature_config.canonical_parent_order",
1216 1254 b"6.6",
1217 1255 stacklevel=2,
1218 1256 )
1219 1257 return self.feature_config.canonical_parent_order
1220 1258
1221 1259 @property
1222 1260 def _lazydelta(self):
1223 1261 """temporary compatibility proxy"""
1224 1262 util.nouideprecwarn(
1225 1263 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1226 1264 )
1227 1265 return self.delta_config.lazy_delta
1228 1266
1229 1267 @property
1230 1268 def _lazydeltabase(self):
1231 1269 """temporary compatibility proxy"""
1232 1270 util.nouideprecwarn(
1233 1271 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1234 1272 )
1235 1273 return self.delta_config.lazy_delta_base
1236 1274
1237 1275 def _init_opts(self):
1238 1276 """process options (from above/config) to setup associated default revlog mode
1239 1277
1240 1278 These values might be affected when actually reading on disk information.
1241 1279
1242 1280 The relevant values are returned for use in _loadindex().
1243 1281
1244 1282 * newversionflags:
1245 1283 version header to use if we need to create a new revlog
1246 1284
1247 1285 * mmapindexthreshold:
1248 1286 minimal index size for start to use mmap
1249 1287
1250 1288 * force_nodemap:
1251 1289 force the usage of a "development" version of the nodemap code
1252 1290 """
1253 1291 opts = self.opener.options
1254 1292
1255 1293 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1256 1294 new_header = CHANGELOGV2
1257 1295 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1258 1296 self.feature_config.compute_rank = compute_rank
1259 1297 elif b'revlogv2' in opts:
1260 1298 new_header = REVLOGV2
1261 1299 elif b'revlogv1' in opts:
1262 1300 new_header = REVLOGV1 | FLAG_INLINE_DATA
1263 1301 if b'generaldelta' in opts:
1264 1302 new_header |= FLAG_GENERALDELTA
1265 1303 elif b'revlogv0' in self.opener.options:
1266 1304 new_header = REVLOGV0
1267 1305 else:
1268 1306 new_header = REVLOG_DEFAULT_VERSION
1269 1307
1270 1308 mmapindexthreshold = None
1271 1309 if self.data_config.mmap_large_index:
1272 1310 mmapindexthreshold = self.data_config.mmap_index_threshold
1273 1311 if self.feature_config.enable_ellipsis:
1274 1312 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1275 1313
1276 1314 # revlog v0 doesn't have flag processors
1277 1315 for flag, processor in opts.get(b'flagprocessors', {}).items():
1278 1316 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1279 1317
1280 1318 chunk_cache_size = self.data_config.chunk_cache_size
1281 1319 if chunk_cache_size <= 0:
1282 1320 raise error.RevlogError(
1283 1321 _(b'revlog chunk cache size %r is not greater than 0')
1284 1322 % chunk_cache_size
1285 1323 )
1286 1324 elif chunk_cache_size & (chunk_cache_size - 1):
1287 1325 raise error.RevlogError(
1288 1326 _(b'revlog chunk cache size %r is not a power of 2')
1289 1327 % chunk_cache_size
1290 1328 )
1291 1329 force_nodemap = opts.get(b'devel-force-nodemap', False)
1292 1330 return new_header, mmapindexthreshold, force_nodemap
1293 1331
1294 1332 def _get_data(self, filepath, mmap_threshold, size=None):
1295 1333 """return a file content with or without mmap
1296 1334
1297 1335 If the file is missing return the empty string"""
1298 1336 try:
1299 1337 with self.opener(filepath) as fp:
1300 1338 if mmap_threshold is not None:
1301 1339 file_size = self.opener.fstat(fp).st_size
1302 1340 if file_size >= mmap_threshold:
1303 1341 if size is not None:
1304 1342 # avoid potentiel mmap crash
1305 1343 size = min(file_size, size)
1306 1344 # TODO: should .close() to release resources without
1307 1345 # relying on Python GC
1308 1346 if size is None:
1309 1347 return util.buffer(util.mmapread(fp))
1310 1348 else:
1311 1349 return util.buffer(util.mmapread(fp, size))
1312 1350 if size is None:
1313 1351 return fp.read()
1314 1352 else:
1315 1353 return fp.read(size)
1316 1354 except FileNotFoundError:
1317 1355 return b''
1318 1356
1319 1357 def get_streams(self, max_linkrev, force_inline=False):
1320 1358 """return a list of streams that represent this revlog
1321 1359
1322 1360 This is used by stream-clone to do bytes to bytes copies of a repository.
1323 1361
1324 1362 This streams data for all revisions that refer to a changelog revision up
1325 1363 to `max_linkrev`.
1326 1364
1327 1365 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1328 1366
1329 1367 It returns is a list of three-tuple:
1330 1368
1331 1369 [
1332 1370 (filename, bytes_stream, stream_size),
1333 1371 …
1334 1372 ]
1335 1373 """
1336 1374 n = len(self)
1337 1375 index = self.index
1338 1376 while n > 0:
1339 1377 linkrev = index[n - 1][4]
1340 1378 if linkrev < max_linkrev:
1341 1379 break
1342 1380 # note: this loop will rarely go through multiple iterations, since
1343 1381 # it only traverses commits created during the current streaming
1344 1382 # pull operation.
1345 1383 #
1346 1384 # If this become a problem, using a binary search should cap the
1347 1385 # runtime of this.
1348 1386 n = n - 1
1349 1387 if n == 0:
1350 1388 # no data to send
1351 1389 return []
1352 1390 index_size = n * index.entry_size
1353 1391 data_size = self.end(n - 1)
1354 1392
1355 1393 # XXX we might have been split (or stripped) since the object
1356 1394 # initialization, We need to close this race too, but having a way to
1357 1395 # pre-open the file we feed to the revlog and never closing them before
1358 1396 # we are done streaming.
1359 1397
1360 1398 if self._inline:
1361 1399
1362 1400 def get_stream():
1363 1401 with self.opener(self._indexfile, mode=b"r") as fp:
1364 1402 yield None
1365 1403 size = index_size + data_size
1366 1404 if size <= 65536:
1367 1405 yield fp.read(size)
1368 1406 else:
1369 1407 yield from util.filechunkiter(fp, limit=size)
1370 1408
1371 1409 inline_stream = get_stream()
1372 1410 next(inline_stream)
1373 1411 return [
1374 1412 (self._indexfile, inline_stream, index_size + data_size),
1375 1413 ]
1376 1414 elif force_inline:
1377 1415
1378 1416 def get_stream():
1379 1417 with self.reading():
1380 1418 yield None
1381 1419
1382 1420 for rev in range(n):
1383 1421 idx = self.index.entry_binary(rev)
1384 1422 if rev == 0 and self._docket is None:
1385 1423 # re-inject the inline flag
1386 1424 header = self._format_flags
1387 1425 header |= self._format_version
1388 1426 header |= FLAG_INLINE_DATA
1389 1427 header = self.index.pack_header(header)
1390 1428 idx = header + idx
1391 1429 yield idx
1392 1430 yield self._inner.get_segment_for_revs(rev, rev)[1]
1393 1431
1394 1432 inline_stream = get_stream()
1395 1433 next(inline_stream)
1396 1434 return [
1397 1435 (self._indexfile, inline_stream, index_size + data_size),
1398 1436 ]
1399 1437 else:
1400 1438
1401 1439 def get_index_stream():
1402 1440 with self.opener(self._indexfile, mode=b"r") as fp:
1403 1441 yield None
1404 1442 if index_size <= 65536:
1405 1443 yield fp.read(index_size)
1406 1444 else:
1407 1445 yield from util.filechunkiter(fp, limit=index_size)
1408 1446
1409 1447 def get_data_stream():
1410 1448 with self._datafp() as fp:
1411 1449 yield None
1412 1450 if data_size <= 65536:
1413 1451 yield fp.read(data_size)
1414 1452 else:
1415 1453 yield from util.filechunkiter(fp, limit=data_size)
1416 1454
1417 1455 index_stream = get_index_stream()
1418 1456 next(index_stream)
1419 1457 data_stream = get_data_stream()
1420 1458 next(data_stream)
1421 1459 return [
1422 1460 (self._datafile, data_stream, data_size),
1423 1461 (self._indexfile, index_stream, index_size),
1424 1462 ]
1425 1463
1426 1464 def _loadindex(self, docket=None):
1427 1465
1428 1466 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1429 1467
1430 1468 if self.postfix is not None:
1431 1469 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1432 1470 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1433 1471 entry_point = b'%s.i.a' % self.radix
1434 1472 elif self._try_split and self.opener.exists(self._split_index_file):
1435 1473 entry_point = self._split_index_file
1436 1474 else:
1437 1475 entry_point = b'%s.i' % self.radix
1438 1476
1439 1477 if docket is not None:
1440 1478 self._docket = docket
1441 1479 self._docket_file = entry_point
1442 1480 else:
1443 1481 self._initempty = True
1444 1482 entry_data = self._get_data(entry_point, mmapindexthreshold)
1445 1483 if len(entry_data) > 0:
1446 1484 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1447 1485 self._initempty = False
1448 1486 else:
1449 1487 header = new_header
1450 1488
1451 1489 self._format_flags = header & ~0xFFFF
1452 1490 self._format_version = header & 0xFFFF
1453 1491
1454 1492 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1455 1493 if supported_flags is None:
1456 1494 msg = _(b'unknown version (%d) in revlog %s')
1457 1495 msg %= (self._format_version, self.display_id)
1458 1496 raise error.RevlogError(msg)
1459 1497 elif self._format_flags & ~supported_flags:
1460 1498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1461 1499 display_flag = self._format_flags >> 16
1462 1500 msg %= (display_flag, self._format_version, self.display_id)
1463 1501 raise error.RevlogError(msg)
1464 1502
1465 1503 features = FEATURES_BY_VERSION[self._format_version]
1466 1504 self._inline = features[b'inline'](self._format_flags)
1467 1505 self.delta_config.general_delta = features[b'generaldelta'](
1468 1506 self._format_flags
1469 1507 )
1470 1508 self.feature_config.has_side_data = features[b'sidedata']
1471 1509
1472 1510 if not features[b'docket']:
1473 1511 self._indexfile = entry_point
1474 1512 index_data = entry_data
1475 1513 else:
1476 1514 self._docket_file = entry_point
1477 1515 if self._initempty:
1478 1516 self._docket = docketutil.default_docket(self, header)
1479 1517 else:
1480 1518 self._docket = docketutil.parse_docket(
1481 1519 self, entry_data, use_pending=self._trypending
1482 1520 )
1483 1521
1484 1522 if self._docket is not None:
1485 1523 self._indexfile = self._docket.index_filepath()
1486 1524 index_data = b''
1487 1525 index_size = self._docket.index_end
1488 1526 if index_size > 0:
1489 1527 index_data = self._get_data(
1490 1528 self._indexfile, mmapindexthreshold, size=index_size
1491 1529 )
1492 1530 if len(index_data) < index_size:
1493 1531 msg = _(b'too few index data for %s: got %d, expected %d')
1494 1532 msg %= (self.display_id, len(index_data), index_size)
1495 1533 raise error.RevlogError(msg)
1496 1534
1497 1535 self._inline = False
1498 1536 # generaldelta implied by version 2 revlogs.
1499 1537 self.delta_config.general_delta = True
1500 1538 # the logic for persistent nodemap will be dealt with within the
1501 1539 # main docket, so disable it for now.
1502 1540 self._nodemap_file = None
1503 1541
1504 1542 if self._docket is not None:
1505 1543 self._datafile = self._docket.data_filepath()
1506 1544 self._sidedatafile = self._docket.sidedata_filepath()
1507 1545 elif self.postfix is None:
1508 1546 self._datafile = b'%s.d' % self.radix
1509 1547 else:
1510 1548 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1511 1549
1512 1550 self.nodeconstants = sha1nodeconstants
1513 1551 self.nullid = self.nodeconstants.nullid
1514 1552
1515 1553 # sparse-revlog can't be on without general-delta (issue6056)
1516 1554 if not self.delta_config.general_delta:
1517 1555 self.delta_config.sparse_revlog = False
1518 1556
1519 1557 self._storedeltachains = True
1520 1558
1521 1559 devel_nodemap = (
1522 1560 self._nodemap_file
1523 1561 and force_nodemap
1524 1562 and parse_index_v1_nodemap is not None
1525 1563 )
1526 1564
1527 1565 use_rust_index = False
1528 1566 if rustrevlog is not None:
1529 1567 if self._nodemap_file is not None:
1530 1568 use_rust_index = True
1531 1569 else:
1532 1570 use_rust_index = self.opener.options.get(b'rust.index')
1533 1571
1534 1572 self._parse_index = parse_index_v1
1535 1573 if self._format_version == REVLOGV0:
1536 1574 self._parse_index = revlogv0.parse_index_v0
1537 1575 elif self._format_version == REVLOGV2:
1538 1576 self._parse_index = parse_index_v2
1539 1577 elif self._format_version == CHANGELOGV2:
1540 1578 self._parse_index = parse_index_cl_v2
1541 1579 elif devel_nodemap:
1542 1580 self._parse_index = parse_index_v1_nodemap
1543 1581 elif use_rust_index:
1544 1582 self._parse_index = parse_index_v1_mixed
1545 1583 try:
1546 1584 d = self._parse_index(index_data, self._inline)
1547 1585 index, chunkcache = d
1548 1586 use_nodemap = (
1549 1587 not self._inline
1550 1588 and self._nodemap_file is not None
1551 1589 and hasattr(index, 'update_nodemap_data')
1552 1590 )
1553 1591 if use_nodemap:
1554 1592 nodemap_data = nodemaputil.persisted_data(self)
1555 1593 if nodemap_data is not None:
1556 1594 docket = nodemap_data[0]
1557 1595 if (
1558 1596 len(d[0]) > docket.tip_rev
1559 1597 and d[0][docket.tip_rev][7] == docket.tip_node
1560 1598 ):
1561 1599 # no changelog tampering
1562 1600 self._nodemap_docket = docket
1563 1601 index.update_nodemap_data(*nodemap_data)
1564 1602 except (ValueError, IndexError):
1565 1603 raise error.RevlogError(
1566 1604 _(b"index %s is corrupted") % self.display_id
1567 1605 )
1568 1606 self.index = index
1569 1607 # revnum -> (chain-length, sum-delta-length)
1570 1608 self._chaininfocache = util.lrucachedict(500)
1571 1609
1572 1610 return chunkcache
1573 1611
1574 1612 def _load_inner(self, chunk_cache):
1575 1613 if self._docket is None:
1576 1614 default_compression_header = None
1577 1615 else:
1578 1616 default_compression_header = self._docket.default_compression_header
1579 1617
1580 1618 self._inner = _InnerRevlog(
1581 1619 opener=self.opener,
1582 1620 index=self.index,
1583 1621 index_file=self._indexfile,
1584 1622 data_file=self._datafile,
1585 1623 sidedata_file=self._sidedatafile,
1586 1624 inline=self._inline,
1587 1625 data_config=self.data_config,
1588 1626 delta_config=self.delta_config,
1589 1627 feature_config=self.feature_config,
1590 1628 chunk_cache=chunk_cache,
1591 1629 default_compression_header=default_compression_header,
1592 1630 )
1593 1631
1594 1632 def get_revlog(self):
1595 1633 """simple function to mirror API of other not-really-revlog API"""
1596 1634 return self
1597 1635
1598 1636 @util.propertycache
1599 1637 def revlog_kind(self):
1600 1638 return self.target[0]
1601 1639
1602 1640 @util.propertycache
1603 1641 def display_id(self):
1604 1642 """The public facing "ID" of the revlog that we use in message"""
1605 1643 if self.revlog_kind == KIND_FILELOG:
1606 1644 # Reference the file without the "data/" prefix, so it is familiar
1607 1645 # to the user.
1608 1646 return self.target[1]
1609 1647 else:
1610 1648 return self.radix
1611 1649
1612 1650 def _datafp(self, mode=b'r'):
1613 1651 """file object for the revlog's data file"""
1614 1652 return self.opener(self._datafile, mode=mode)
1615 1653
1616 1654 def tiprev(self):
1617 1655 return len(self.index) - 1
1618 1656
1619 1657 def tip(self):
1620 1658 return self.node(self.tiprev())
1621 1659
1622 1660 def __contains__(self, rev):
1623 1661 return 0 <= rev < len(self)
1624 1662
1625 1663 def __len__(self):
1626 1664 return len(self.index)
1627 1665
1628 1666 def __iter__(self):
1629 1667 return iter(range(len(self)))
1630 1668
1631 1669 def revs(self, start=0, stop=None):
1632 1670 """iterate over all rev in this revlog (from start to stop)"""
1633 1671 return storageutil.iterrevs(len(self), start=start, stop=stop)
1634 1672
1635 1673 def hasnode(self, node):
1636 1674 try:
1637 1675 self.rev(node)
1638 1676 return True
1639 1677 except KeyError:
1640 1678 return False
1641 1679
1642 1680 def _candelta(self, baserev, rev):
1643 1681 """whether two revisions (baserev, rev) can be delta-ed or not"""
1644 1682 # Disable delta if either rev requires a content-changing flag
1645 1683 # processor (ex. LFS). This is because such flag processor can alter
1646 1684 # the rawtext content that the delta will be based on, and two clients
1647 1685 # could have a same revlog node with different flags (i.e. different
1648 1686 # rawtext contents) and the delta could be incompatible.
1649 1687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1650 1688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1651 1689 ):
1652 1690 return False
1653 1691 return True
1654 1692
1655 1693 def update_caches(self, transaction):
1656 1694 """update on disk cache
1657 1695
1658 1696 If a transaction is passed, the update may be delayed to transaction
1659 1697 commit."""
1660 1698 if self._nodemap_file is not None:
1661 1699 if transaction is None:
1662 1700 nodemaputil.update_persistent_nodemap(self)
1663 1701 else:
1664 1702 nodemaputil.setup_persistent_nodemap(transaction, self)
1665 1703
1666 1704 def clearcaches(self):
1667 1705 """Clear in-memory caches"""
1668 1706 self._inner._revisioncache = None
1669 1707 self._chainbasecache.clear()
1670 1708 self._inner._segmentfile.clear_cache()
1671 1709 self._inner._segmentfile_sidedata.clear_cache()
1672 1710 self._pcache = {}
1673 1711 self._nodemap_docket = None
1674 1712 self.index.clearcaches()
1675 1713 # The python code is the one responsible for validating the docket, we
1676 1714 # end up having to refresh it here.
1677 1715 use_nodemap = (
1678 1716 not self._inline
1679 1717 and self._nodemap_file is not None
1680 1718 and hasattr(self.index, 'update_nodemap_data')
1681 1719 )
1682 1720 if use_nodemap:
1683 1721 nodemap_data = nodemaputil.persisted_data(self)
1684 1722 if nodemap_data is not None:
1685 1723 self._nodemap_docket = nodemap_data[0]
1686 1724 self.index.update_nodemap_data(*nodemap_data)
1687 1725
1688 1726 def rev(self, node):
1689 1727 """return the revision number associated with a <nodeid>"""
1690 1728 try:
1691 1729 return self.index.rev(node)
1692 1730 except TypeError:
1693 1731 raise
1694 1732 except error.RevlogError:
1695 1733 # parsers.c radix tree lookup failed
1696 1734 if (
1697 1735 node == self.nodeconstants.wdirid
1698 1736 or node in self.nodeconstants.wdirfilenodeids
1699 1737 ):
1700 1738 raise error.WdirUnsupported
1701 1739 raise error.LookupError(node, self.display_id, _(b'no node'))
1702 1740
1703 1741 # Accessors for index entries.
1704 1742
1705 1743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1706 1744 # are flags.
1707 1745 def start(self, rev):
1708 1746 return int(self.index[rev][0] >> 16)
1709 1747
1710 1748 def sidedata_cut_off(self, rev):
1711 1749 sd_cut_off = self.index[rev][8]
1712 1750 if sd_cut_off != 0:
1713 1751 return sd_cut_off
1714 1752 # This is some annoying dance, because entries without sidedata
1715 1753 # currently use 0 as their ofsset. (instead of previous-offset +
1716 1754 # previous-size)
1717 1755 #
1718 1756 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1719 1757 # In the meantime, we need this.
1720 1758 while 0 <= rev:
1721 1759 e = self.index[rev]
1722 1760 if e[9] != 0:
1723 1761 return e[8] + e[9]
1724 1762 rev -= 1
1725 1763 return 0
1726 1764
1727 1765 def flags(self, rev):
1728 1766 return self.index[rev][0] & 0xFFFF
1729 1767
1730 1768 def length(self, rev):
1731 1769 return self.index[rev][1]
1732 1770
1733 1771 def sidedata_length(self, rev):
1734 1772 if not self.feature_config.has_side_data:
1735 1773 return 0
1736 1774 return self.index[rev][9]
1737 1775
1738 1776 def rawsize(self, rev):
1739 1777 """return the length of the uncompressed text for a given revision"""
1740 1778 l = self.index[rev][2]
1741 1779 if l >= 0:
1742 1780 return l
1743 1781
1744 1782 t = self.rawdata(rev)
1745 1783 return len(t)
1746 1784
1747 1785 def size(self, rev):
1748 1786 """length of non-raw text (processed by a "read" flag processor)"""
1749 1787 # fast path: if no "read" flag processor could change the content,
1750 1788 # size is rawsize. note: ELLIPSIS is known to not change the content.
1751 1789 flags = self.flags(rev)
1752 1790 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1753 1791 return self.rawsize(rev)
1754 1792
1755 1793 return len(self.revision(rev))
1756 1794
1757 1795 def fast_rank(self, rev):
1758 1796 """Return the rank of a revision if already known, or None otherwise.
1759 1797
1760 1798 The rank of a revision is the size of the sub-graph it defines as a
1761 1799 head. Equivalently, the rank of a revision `r` is the size of the set
1762 1800 `ancestors(r)`, `r` included.
1763 1801
1764 1802 This method returns the rank retrieved from the revlog in constant
1765 1803 time. It makes no attempt at computing unknown values for versions of
1766 1804 the revlog which do not persist the rank.
1767 1805 """
1768 1806 rank = self.index[rev][ENTRY_RANK]
1769 1807 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1770 1808 return None
1771 1809 if rev == nullrev:
1772 1810 return 0 # convention
1773 1811 return rank
1774 1812
1775 1813 def chainbase(self, rev):
1776 1814 base = self._chainbasecache.get(rev)
1777 1815 if base is not None:
1778 1816 return base
1779 1817
1780 1818 index = self.index
1781 1819 iterrev = rev
1782 1820 base = index[iterrev][3]
1783 1821 while base != iterrev:
1784 1822 iterrev = base
1785 1823 base = index[iterrev][3]
1786 1824
1787 1825 self._chainbasecache[rev] = base
1788 1826 return base
1789 1827
1790 1828 def linkrev(self, rev):
1791 1829 return self.index[rev][4]
1792 1830
1793 1831 def parentrevs(self, rev):
1794 1832 try:
1795 1833 entry = self.index[rev]
1796 1834 except IndexError:
1797 1835 if rev == wdirrev:
1798 1836 raise error.WdirUnsupported
1799 1837 raise
1800 1838
1801 1839 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1802 1840 return entry[6], entry[5]
1803 1841 else:
1804 1842 return entry[5], entry[6]
1805 1843
1806 1844 # fast parentrevs(rev) where rev isn't filtered
1807 1845 _uncheckedparentrevs = parentrevs
1808 1846
1809 1847 def node(self, rev):
1810 1848 try:
1811 1849 return self.index[rev][7]
1812 1850 except IndexError:
1813 1851 if rev == wdirrev:
1814 1852 raise error.WdirUnsupported
1815 1853 raise
1816 1854
1817 1855 # Derived from index values.
1818 1856
1819 1857 def end(self, rev):
1820 1858 return self.start(rev) + self.length(rev)
1821 1859
1822 1860 def parents(self, node):
1823 1861 i = self.index
1824 1862 d = i[self.rev(node)]
1825 1863 # inline node() to avoid function call overhead
1826 1864 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1827 1865 return i[d[6]][7], i[d[5]][7]
1828 1866 else:
1829 1867 return i[d[5]][7], i[d[6]][7]
1830 1868
1831 1869 def chainlen(self, rev):
1832 1870 return self._chaininfo(rev)[0]
1833 1871
1834 1872 def _chaininfo(self, rev):
1835 1873 chaininfocache = self._chaininfocache
1836 1874 if rev in chaininfocache:
1837 1875 return chaininfocache[rev]
1838 1876 index = self.index
1839 1877 generaldelta = self.delta_config.general_delta
1840 1878 iterrev = rev
1841 1879 e = index[iterrev]
1842 1880 clen = 0
1843 1881 compresseddeltalen = 0
1844 1882 while iterrev != e[3]:
1845 1883 clen += 1
1846 1884 compresseddeltalen += e[1]
1847 1885 if generaldelta:
1848 1886 iterrev = e[3]
1849 1887 else:
1850 1888 iterrev -= 1
1851 1889 if iterrev in chaininfocache:
1852 1890 t = chaininfocache[iterrev]
1853 1891 clen += t[0]
1854 1892 compresseddeltalen += t[1]
1855 1893 break
1856 1894 e = index[iterrev]
1857 1895 else:
1858 1896 # Add text length of base since decompressing that also takes
1859 1897 # work. For cache hits the length is already included.
1860 1898 compresseddeltalen += e[1]
1861 1899 r = (clen, compresseddeltalen)
1862 1900 chaininfocache[rev] = r
1863 1901 return r
1864 1902
1865 1903 def _deltachain(self, rev, stoprev=None):
1866 1904 return self._inner._deltachain(rev, stoprev=stoprev)
1867 1905
1868 1906 def ancestors(self, revs, stoprev=0, inclusive=False):
1869 1907 """Generate the ancestors of 'revs' in reverse revision order.
1870 1908 Does not generate revs lower than stoprev.
1871 1909
1872 1910 See the documentation for ancestor.lazyancestors for more details."""
1873 1911
1874 1912 # first, make sure start revisions aren't filtered
1875 1913 revs = list(revs)
1876 1914 checkrev = self.node
1877 1915 for r in revs:
1878 1916 checkrev(r)
1879 1917 # and we're sure ancestors aren't filtered as well
1880 1918
1881 1919 if rustancestor is not None and self.index.rust_ext_compat:
1882 1920 lazyancestors = rustancestor.LazyAncestors
1883 1921 arg = self.index
1884 1922 else:
1885 1923 lazyancestors = ancestor.lazyancestors
1886 1924 arg = self._uncheckedparentrevs
1887 1925 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1888 1926
1889 1927 def descendants(self, revs):
1890 1928 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1891 1929
1892 1930 def findcommonmissing(self, common=None, heads=None):
1893 1931 """Return a tuple of the ancestors of common and the ancestors of heads
1894 1932 that are not ancestors of common. In revset terminology, we return the
1895 1933 tuple:
1896 1934
1897 1935 ::common, (::heads) - (::common)
1898 1936
1899 1937 The list is sorted by revision number, meaning it is
1900 1938 topologically sorted.
1901 1939
1902 1940 'heads' and 'common' are both lists of node IDs. If heads is
1903 1941 not supplied, uses all of the revlog's heads. If common is not
1904 1942 supplied, uses nullid."""
1905 1943 if common is None:
1906 1944 common = [self.nullid]
1907 1945 if heads is None:
1908 1946 heads = self.heads()
1909 1947
1910 1948 common = [self.rev(n) for n in common]
1911 1949 heads = [self.rev(n) for n in heads]
1912 1950
1913 1951 # we want the ancestors, but inclusive
1914 1952 class lazyset:
1915 1953 def __init__(self, lazyvalues):
1916 1954 self.addedvalues = set()
1917 1955 self.lazyvalues = lazyvalues
1918 1956
1919 1957 def __contains__(self, value):
1920 1958 return value in self.addedvalues or value in self.lazyvalues
1921 1959
1922 1960 def __iter__(self):
1923 1961 added = self.addedvalues
1924 1962 for r in added:
1925 1963 yield r
1926 1964 for r in self.lazyvalues:
1927 1965 if not r in added:
1928 1966 yield r
1929 1967
1930 1968 def add(self, value):
1931 1969 self.addedvalues.add(value)
1932 1970
1933 1971 def update(self, values):
1934 1972 self.addedvalues.update(values)
1935 1973
1936 1974 has = lazyset(self.ancestors(common))
1937 1975 has.add(nullrev)
1938 1976 has.update(common)
1939 1977
1940 1978 # take all ancestors from heads that aren't in has
1941 1979 missing = set()
1942 1980 visit = collections.deque(r for r in heads if r not in has)
1943 1981 while visit:
1944 1982 r = visit.popleft()
1945 1983 if r in missing:
1946 1984 continue
1947 1985 else:
1948 1986 missing.add(r)
1949 1987 for p in self.parentrevs(r):
1950 1988 if p not in has:
1951 1989 visit.append(p)
1952 1990 missing = list(missing)
1953 1991 missing.sort()
1954 1992 return has, [self.node(miss) for miss in missing]
1955 1993
1956 1994 def incrementalmissingrevs(self, common=None):
1957 1995 """Return an object that can be used to incrementally compute the
1958 1996 revision numbers of the ancestors of arbitrary sets that are not
1959 1997 ancestors of common. This is an ancestor.incrementalmissingancestors
1960 1998 object.
1961 1999
1962 2000 'common' is a list of revision numbers. If common is not supplied, uses
1963 2001 nullrev.
1964 2002 """
1965 2003 if common is None:
1966 2004 common = [nullrev]
1967 2005
1968 2006 if rustancestor is not None and self.index.rust_ext_compat:
1969 2007 return rustancestor.MissingAncestors(self.index, common)
1970 2008 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1971 2009
1972 2010 def findmissingrevs(self, common=None, heads=None):
1973 2011 """Return the revision numbers of the ancestors of heads that
1974 2012 are not ancestors of common.
1975 2013
1976 2014 More specifically, return a list of revision numbers corresponding to
1977 2015 nodes N such that every N satisfies the following constraints:
1978 2016
1979 2017 1. N is an ancestor of some node in 'heads'
1980 2018 2. N is not an ancestor of any node in 'common'
1981 2019
1982 2020 The list is sorted by revision number, meaning it is
1983 2021 topologically sorted.
1984 2022
1985 2023 'heads' and 'common' are both lists of revision numbers. If heads is
1986 2024 not supplied, uses all of the revlog's heads. If common is not
1987 2025 supplied, uses nullid."""
1988 2026 if common is None:
1989 2027 common = [nullrev]
1990 2028 if heads is None:
1991 2029 heads = self.headrevs()
1992 2030
1993 2031 inc = self.incrementalmissingrevs(common=common)
1994 2032 return inc.missingancestors(heads)
1995 2033
1996 2034 def findmissing(self, common=None, heads=None):
1997 2035 """Return the ancestors of heads that are not ancestors of common.
1998 2036
1999 2037 More specifically, return a list of nodes N such that every N
2000 2038 satisfies the following constraints:
2001 2039
2002 2040 1. N is an ancestor of some node in 'heads'
2003 2041 2. N is not an ancestor of any node in 'common'
2004 2042
2005 2043 The list is sorted by revision number, meaning it is
2006 2044 topologically sorted.
2007 2045
2008 2046 'heads' and 'common' are both lists of node IDs. If heads is
2009 2047 not supplied, uses all of the revlog's heads. If common is not
2010 2048 supplied, uses nullid."""
2011 2049 if common is None:
2012 2050 common = [self.nullid]
2013 2051 if heads is None:
2014 2052 heads = self.heads()
2015 2053
2016 2054 common = [self.rev(n) for n in common]
2017 2055 heads = [self.rev(n) for n in heads]
2018 2056
2019 2057 inc = self.incrementalmissingrevs(common=common)
2020 2058 return [self.node(r) for r in inc.missingancestors(heads)]
2021 2059
2022 2060 def nodesbetween(self, roots=None, heads=None):
2023 2061 """Return a topological path from 'roots' to 'heads'.
2024 2062
2025 2063 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2026 2064 topologically sorted list of all nodes N that satisfy both of
2027 2065 these constraints:
2028 2066
2029 2067 1. N is a descendant of some node in 'roots'
2030 2068 2. N is an ancestor of some node in 'heads'
2031 2069
2032 2070 Every node is considered to be both a descendant and an ancestor
2033 2071 of itself, so every reachable node in 'roots' and 'heads' will be
2034 2072 included in 'nodes'.
2035 2073
2036 2074 'outroots' is the list of reachable nodes in 'roots', i.e., the
2037 2075 subset of 'roots' that is returned in 'nodes'. Likewise,
2038 2076 'outheads' is the subset of 'heads' that is also in 'nodes'.
2039 2077
2040 2078 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2041 2079 unspecified, uses nullid as the only root. If 'heads' is
2042 2080 unspecified, uses list of all of the revlog's heads."""
2043 2081 nonodes = ([], [], [])
2044 2082 if roots is not None:
2045 2083 roots = list(roots)
2046 2084 if not roots:
2047 2085 return nonodes
2048 2086 lowestrev = min([self.rev(n) for n in roots])
2049 2087 else:
2050 2088 roots = [self.nullid] # Everybody's a descendant of nullid
2051 2089 lowestrev = nullrev
2052 2090 if (lowestrev == nullrev) and (heads is None):
2053 2091 # We want _all_ the nodes!
2054 2092 return (
2055 2093 [self.node(r) for r in self],
2056 2094 [self.nullid],
2057 2095 list(self.heads()),
2058 2096 )
2059 2097 if heads is None:
2060 2098 # All nodes are ancestors, so the latest ancestor is the last
2061 2099 # node.
2062 2100 highestrev = len(self) - 1
2063 2101 # Set ancestors to None to signal that every node is an ancestor.
2064 2102 ancestors = None
2065 2103 # Set heads to an empty dictionary for later discovery of heads
2066 2104 heads = {}
2067 2105 else:
2068 2106 heads = list(heads)
2069 2107 if not heads:
2070 2108 return nonodes
2071 2109 ancestors = set()
2072 2110 # Turn heads into a dictionary so we can remove 'fake' heads.
2073 2111 # Also, later we will be using it to filter out the heads we can't
2074 2112 # find from roots.
2075 2113 heads = dict.fromkeys(heads, False)
2076 2114 # Start at the top and keep marking parents until we're done.
2077 2115 nodestotag = set(heads)
2078 2116 # Remember where the top was so we can use it as a limit later.
2079 2117 highestrev = max([self.rev(n) for n in nodestotag])
2080 2118 while nodestotag:
2081 2119 # grab a node to tag
2082 2120 n = nodestotag.pop()
2083 2121 # Never tag nullid
2084 2122 if n == self.nullid:
2085 2123 continue
2086 2124 # A node's revision number represents its place in a
2087 2125 # topologically sorted list of nodes.
2088 2126 r = self.rev(n)
2089 2127 if r >= lowestrev:
2090 2128 if n not in ancestors:
2091 2129 # If we are possibly a descendant of one of the roots
2092 2130 # and we haven't already been marked as an ancestor
2093 2131 ancestors.add(n) # Mark as ancestor
2094 2132 # Add non-nullid parents to list of nodes to tag.
2095 2133 nodestotag.update(
2096 2134 [p for p in self.parents(n) if p != self.nullid]
2097 2135 )
2098 2136 elif n in heads: # We've seen it before, is it a fake head?
2099 2137 # So it is, real heads should not be the ancestors of
2100 2138 # any other heads.
2101 2139 heads.pop(n)
2102 2140 if not ancestors:
2103 2141 return nonodes
2104 2142 # Now that we have our set of ancestors, we want to remove any
2105 2143 # roots that are not ancestors.
2106 2144
2107 2145 # If one of the roots was nullid, everything is included anyway.
2108 2146 if lowestrev > nullrev:
2109 2147 # But, since we weren't, let's recompute the lowest rev to not
2110 2148 # include roots that aren't ancestors.
2111 2149
2112 2150 # Filter out roots that aren't ancestors of heads
2113 2151 roots = [root for root in roots if root in ancestors]
2114 2152 # Recompute the lowest revision
2115 2153 if roots:
2116 2154 lowestrev = min([self.rev(root) for root in roots])
2117 2155 else:
2118 2156 # No more roots? Return empty list
2119 2157 return nonodes
2120 2158 else:
2121 2159 # We are descending from nullid, and don't need to care about
2122 2160 # any other roots.
2123 2161 lowestrev = nullrev
2124 2162 roots = [self.nullid]
2125 2163 # Transform our roots list into a set.
2126 2164 descendants = set(roots)
2127 2165 # Also, keep the original roots so we can filter out roots that aren't
2128 2166 # 'real' roots (i.e. are descended from other roots).
2129 2167 roots = descendants.copy()
2130 2168 # Our topologically sorted list of output nodes.
2131 2169 orderedout = []
2132 2170 # Don't start at nullid since we don't want nullid in our output list,
2133 2171 # and if nullid shows up in descendants, empty parents will look like
2134 2172 # they're descendants.
2135 2173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2136 2174 n = self.node(r)
2137 2175 isdescendant = False
2138 2176 if lowestrev == nullrev: # Everybody is a descendant of nullid
2139 2177 isdescendant = True
2140 2178 elif n in descendants:
2141 2179 # n is already a descendant
2142 2180 isdescendant = True
2143 2181 # This check only needs to be done here because all the roots
2144 2182 # will start being marked is descendants before the loop.
2145 2183 if n in roots:
2146 2184 # If n was a root, check if it's a 'real' root.
2147 2185 p = tuple(self.parents(n))
2148 2186 # If any of its parents are descendants, it's not a root.
2149 2187 if (p[0] in descendants) or (p[1] in descendants):
2150 2188 roots.remove(n)
2151 2189 else:
2152 2190 p = tuple(self.parents(n))
2153 2191 # A node is a descendant if either of its parents are
2154 2192 # descendants. (We seeded the dependents list with the roots
2155 2193 # up there, remember?)
2156 2194 if (p[0] in descendants) or (p[1] in descendants):
2157 2195 descendants.add(n)
2158 2196 isdescendant = True
2159 2197 if isdescendant and ((ancestors is None) or (n in ancestors)):
2160 2198 # Only include nodes that are both descendants and ancestors.
2161 2199 orderedout.append(n)
2162 2200 if (ancestors is not None) and (n in heads):
2163 2201 # We're trying to figure out which heads are reachable
2164 2202 # from roots.
2165 2203 # Mark this head as having been reached
2166 2204 heads[n] = True
2167 2205 elif ancestors is None:
2168 2206 # Otherwise, we're trying to discover the heads.
2169 2207 # Assume this is a head because if it isn't, the next step
2170 2208 # will eventually remove it.
2171 2209 heads[n] = True
2172 2210 # But, obviously its parents aren't.
2173 2211 for p in self.parents(n):
2174 2212 heads.pop(p, None)
2175 2213 heads = [head for head, flag in heads.items() if flag]
2176 2214 roots = list(roots)
2177 2215 assert orderedout
2178 2216 assert roots
2179 2217 assert heads
2180 2218 return (orderedout, roots, heads)
2181 2219
2182 2220 def headrevs(self, revs=None):
2183 2221 if revs is None:
2184 2222 try:
2185 2223 return self.index.headrevs()
2186 2224 except AttributeError:
2187 2225 return self._headrevs()
2188 2226 if rustdagop is not None and self.index.rust_ext_compat:
2189 2227 return rustdagop.headrevs(self.index, revs)
2190 2228 return dagop.headrevs(revs, self._uncheckedparentrevs)
2191 2229
2192 2230 def computephases(self, roots):
2193 2231 return self.index.computephasesmapsets(roots)
2194 2232
2195 2233 def _headrevs(self):
2196 2234 count = len(self)
2197 2235 if not count:
2198 2236 return [nullrev]
2199 2237 # we won't iter over filtered rev so nobody is a head at start
2200 2238 ishead = [0] * (count + 1)
2201 2239 index = self.index
2202 2240 for r in self:
2203 2241 ishead[r] = 1 # I may be an head
2204 2242 e = index[r]
2205 2243 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2206 2244 return [r for r, val in enumerate(ishead) if val]
2207 2245
2208 2246 def heads(self, start=None, stop=None):
2209 2247 """return the list of all nodes that have no children
2210 2248
2211 2249 if start is specified, only heads that are descendants of
2212 2250 start will be returned
2213 2251 if stop is specified, it will consider all the revs from stop
2214 2252 as if they had no children
2215 2253 """
2216 2254 if start is None and stop is None:
2217 2255 if not len(self):
2218 2256 return [self.nullid]
2219 2257 return [self.node(r) for r in self.headrevs()]
2220 2258
2221 2259 if start is None:
2222 2260 start = nullrev
2223 2261 else:
2224 2262 start = self.rev(start)
2225 2263
2226 2264 stoprevs = {self.rev(n) for n in stop or []}
2227 2265
2228 2266 revs = dagop.headrevssubset(
2229 2267 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2230 2268 )
2231 2269
2232 2270 return [self.node(rev) for rev in revs]
2233 2271
2234 2272 def children(self, node):
2235 2273 """find the children of a given node"""
2236 2274 c = []
2237 2275 p = self.rev(node)
2238 2276 for r in self.revs(start=p + 1):
2239 2277 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2240 2278 if prevs:
2241 2279 for pr in prevs:
2242 2280 if pr == p:
2243 2281 c.append(self.node(r))
2244 2282 elif p == nullrev:
2245 2283 c.append(self.node(r))
2246 2284 return c
2247 2285
2248 2286 def commonancestorsheads(self, a, b):
2249 2287 """calculate all the heads of the common ancestors of nodes a and b"""
2250 2288 a, b = self.rev(a), self.rev(b)
2251 2289 ancs = self._commonancestorsheads(a, b)
2252 2290 return pycompat.maplist(self.node, ancs)
2253 2291
2254 2292 def _commonancestorsheads(self, *revs):
2255 2293 """calculate all the heads of the common ancestors of revs"""
2256 2294 try:
2257 2295 ancs = self.index.commonancestorsheads(*revs)
2258 2296 except (AttributeError, OverflowError): # C implementation failed
2259 2297 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2260 2298 return ancs
2261 2299
2262 2300 def isancestor(self, a, b):
2263 2301 """return True if node a is an ancestor of node b
2264 2302
2265 2303 A revision is considered an ancestor of itself."""
2266 2304 a, b = self.rev(a), self.rev(b)
2267 2305 return self.isancestorrev(a, b)
2268 2306
2269 2307 def isancestorrev(self, a, b):
2270 2308 """return True if revision a is an ancestor of revision b
2271 2309
2272 2310 A revision is considered an ancestor of itself.
2273 2311
2274 2312 The implementation of this is trivial but the use of
2275 2313 reachableroots is not."""
2276 2314 if a == nullrev:
2277 2315 return True
2278 2316 elif a == b:
2279 2317 return True
2280 2318 elif a > b:
2281 2319 return False
2282 2320 return bool(self.reachableroots(a, [b], [a], includepath=False))
2283 2321
2284 2322 def reachableroots(self, minroot, heads, roots, includepath=False):
2285 2323 """return (heads(::(<roots> and <roots>::<heads>)))
2286 2324
2287 2325 If includepath is True, return (<roots>::<heads>)."""
2288 2326 try:
2289 2327 return self.index.reachableroots2(
2290 2328 minroot, heads, roots, includepath
2291 2329 )
2292 2330 except AttributeError:
2293 2331 return dagop._reachablerootspure(
2294 2332 self.parentrevs, minroot, roots, heads, includepath
2295 2333 )
2296 2334
2297 2335 def ancestor(self, a, b):
2298 2336 """calculate the "best" common ancestor of nodes a and b"""
2299 2337
2300 2338 a, b = self.rev(a), self.rev(b)
2301 2339 try:
2302 2340 ancs = self.index.ancestors(a, b)
2303 2341 except (AttributeError, OverflowError):
2304 2342 ancs = ancestor.ancestors(self.parentrevs, a, b)
2305 2343 if ancs:
2306 2344 # choose a consistent winner when there's a tie
2307 2345 return min(map(self.node, ancs))
2308 2346 return self.nullid
2309 2347
2310 2348 def _match(self, id):
2311 2349 if isinstance(id, int):
2312 2350 # rev
2313 2351 return self.node(id)
2314 2352 if len(id) == self.nodeconstants.nodelen:
2315 2353 # possibly a binary node
2316 2354 # odds of a binary node being all hex in ASCII are 1 in 10**25
2317 2355 try:
2318 2356 node = id
2319 2357 self.rev(node) # quick search the index
2320 2358 return node
2321 2359 except error.LookupError:
2322 2360 pass # may be partial hex id
2323 2361 try:
2324 2362 # str(rev)
2325 2363 rev = int(id)
2326 2364 if b"%d" % rev != id:
2327 2365 raise ValueError
2328 2366 if rev < 0:
2329 2367 rev = len(self) + rev
2330 2368 if rev < 0 or rev >= len(self):
2331 2369 raise ValueError
2332 2370 return self.node(rev)
2333 2371 except (ValueError, OverflowError):
2334 2372 pass
2335 2373 if len(id) == 2 * self.nodeconstants.nodelen:
2336 2374 try:
2337 2375 # a full hex nodeid?
2338 2376 node = bin(id)
2339 2377 self.rev(node)
2340 2378 return node
2341 2379 except (binascii.Error, error.LookupError):
2342 2380 pass
2343 2381
2344 2382 def _partialmatch(self, id):
2345 2383 # we don't care wdirfilenodeids as they should be always full hash
2346 2384 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2347 2385 ambiguous = False
2348 2386 try:
2349 2387 partial = self.index.partialmatch(id)
2350 2388 if partial and self.hasnode(partial):
2351 2389 if maybewdir:
2352 2390 # single 'ff...' match in radix tree, ambiguous with wdir
2353 2391 ambiguous = True
2354 2392 else:
2355 2393 return partial
2356 2394 elif maybewdir:
2357 2395 # no 'ff...' match in radix tree, wdir identified
2358 2396 raise error.WdirUnsupported
2359 2397 else:
2360 2398 return None
2361 2399 except error.RevlogError:
2362 2400 # parsers.c radix tree lookup gave multiple matches
2363 2401 # fast path: for unfiltered changelog, radix tree is accurate
2364 2402 if not getattr(self, 'filteredrevs', None):
2365 2403 ambiguous = True
2366 2404 # fall through to slow path that filters hidden revisions
2367 2405 except (AttributeError, ValueError):
2368 2406 # we are pure python, or key is not hex
2369 2407 pass
2370 2408 if ambiguous:
2371 2409 raise error.AmbiguousPrefixLookupError(
2372 2410 id, self.display_id, _(b'ambiguous identifier')
2373 2411 )
2374 2412
2375 2413 if id in self._pcache:
2376 2414 return self._pcache[id]
2377 2415
2378 2416 if len(id) <= 40:
2379 2417 # hex(node)[:...]
2380 2418 l = len(id) // 2 * 2 # grab an even number of digits
2381 2419 try:
2382 2420 # we're dropping the last digit, so let's check that it's hex,
2383 2421 # to avoid the expensive computation below if it's not
2384 2422 if len(id) % 2 > 0:
2385 2423 if not (id[-1] in hexdigits):
2386 2424 return None
2387 2425 prefix = bin(id[:l])
2388 2426 except binascii.Error:
2389 2427 pass
2390 2428 else:
2391 2429 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2392 2430 nl = [
2393 2431 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2394 2432 ]
2395 2433 if self.nodeconstants.nullhex.startswith(id):
2396 2434 nl.append(self.nullid)
2397 2435 if len(nl) > 0:
2398 2436 if len(nl) == 1 and not maybewdir:
2399 2437 self._pcache[id] = nl[0]
2400 2438 return nl[0]
2401 2439 raise error.AmbiguousPrefixLookupError(
2402 2440 id, self.display_id, _(b'ambiguous identifier')
2403 2441 )
2404 2442 if maybewdir:
2405 2443 raise error.WdirUnsupported
2406 2444 return None
2407 2445
2408 2446 def lookup(self, id):
2409 2447 """locate a node based on:
2410 2448 - revision number or str(revision number)
2411 2449 - nodeid or subset of hex nodeid
2412 2450 """
2413 2451 n = self._match(id)
2414 2452 if n is not None:
2415 2453 return n
2416 2454 n = self._partialmatch(id)
2417 2455 if n:
2418 2456 return n
2419 2457
2420 2458 raise error.LookupError(id, self.display_id, _(b'no match found'))
2421 2459
2422 2460 def shortest(self, node, minlength=1):
2423 2461 """Find the shortest unambiguous prefix that matches node."""
2424 2462
2425 2463 def isvalid(prefix):
2426 2464 try:
2427 2465 matchednode = self._partialmatch(prefix)
2428 2466 except error.AmbiguousPrefixLookupError:
2429 2467 return False
2430 2468 except error.WdirUnsupported:
2431 2469 # single 'ff...' match
2432 2470 return True
2433 2471 if matchednode is None:
2434 2472 raise error.LookupError(node, self.display_id, _(b'no node'))
2435 2473 return True
2436 2474
2437 2475 def maybewdir(prefix):
2438 2476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2439 2477
2440 2478 hexnode = hex(node)
2441 2479
2442 2480 def disambiguate(hexnode, minlength):
2443 2481 """Disambiguate against wdirid."""
2444 2482 for length in range(minlength, len(hexnode) + 1):
2445 2483 prefix = hexnode[:length]
2446 2484 if not maybewdir(prefix):
2447 2485 return prefix
2448 2486
2449 2487 if not getattr(self, 'filteredrevs', None):
2450 2488 try:
2451 2489 length = max(self.index.shortest(node), minlength)
2452 2490 return disambiguate(hexnode, length)
2453 2491 except error.RevlogError:
2454 2492 if node != self.nodeconstants.wdirid:
2455 2493 raise error.LookupError(
2456 2494 node, self.display_id, _(b'no node')
2457 2495 )
2458 2496 except AttributeError:
2459 2497 # Fall through to pure code
2460 2498 pass
2461 2499
2462 2500 if node == self.nodeconstants.wdirid:
2463 2501 for length in range(minlength, len(hexnode) + 1):
2464 2502 prefix = hexnode[:length]
2465 2503 if isvalid(prefix):
2466 2504 return prefix
2467 2505
2468 2506 for length in range(minlength, len(hexnode) + 1):
2469 2507 prefix = hexnode[:length]
2470 2508 if isvalid(prefix):
2471 2509 return disambiguate(hexnode, length)
2472 2510
2473 2511 def cmp(self, node, text):
2474 2512 """compare text with a given file revision
2475 2513
2476 2514 returns True if text is different than what is stored.
2477 2515 """
2478 2516 p1, p2 = self.parents(node)
2479 2517 return storageutil.hashrevisionsha1(text, p1, p2) != node
2480 2518
2481 2519 def deltaparent(self, rev):
2482 2520 """return deltaparent of the given revision"""
2483 2521 base = self.index[rev][3]
2484 2522 if base == rev:
2485 2523 return nullrev
2486 2524 elif self.delta_config.general_delta:
2487 2525 return base
2488 2526 else:
2489 2527 return rev - 1
2490 2528
2491 2529 def issnapshot(self, rev):
2492 2530 """tells whether rev is a snapshot"""
2493 2531 ret = self._inner.issnapshot(rev)
2494 2532 self.issnapshot = self._inner.issnapshot
2495 2533 return ret
2496 2534
2497 2535 def snapshotdepth(self, rev):
2498 2536 """number of snapshot in the chain before this one"""
2499 2537 if not self.issnapshot(rev):
2500 2538 raise error.ProgrammingError(b'revision %d not a snapshot')
2501 2539 return len(self._inner._deltachain(rev)[0]) - 1
2502 2540
2503 2541 def revdiff(self, rev1, rev2):
2504 2542 """return or calculate a delta between two revisions
2505 2543
2506 2544 The delta calculated is in binary form and is intended to be written to
2507 2545 revlog data directly. So this function needs raw revision data.
2508 2546 """
2509 2547 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2510 2548 return bytes(self._inner._chunk(rev2))
2511 2549
2512 2550 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2513 2551
2514 2552 def revision(self, nodeorrev):
2515 2553 """return an uncompressed revision of a given node or revision
2516 2554 number.
2517 2555 """
2518 2556 return self._revisiondata(nodeorrev)
2519 2557
2520 2558 def sidedata(self, nodeorrev):
2521 2559 """a map of extra data related to the changeset but not part of the hash
2522 2560
2523 2561 This function currently return a dictionary. However, more advanced
2524 2562 mapping object will likely be used in the future for a more
2525 2563 efficient/lazy code.
2526 2564 """
2527 2565 # deal with <nodeorrev> argument type
2528 2566 if isinstance(nodeorrev, int):
2529 2567 rev = nodeorrev
2530 2568 else:
2531 2569 rev = self.rev(nodeorrev)
2532 2570 return self._sidedata(rev)
2533 2571
2572 def _rawtext(self, node, rev):
2573 """return the possibly unvalidated rawtext for a revision
2574
2575 returns (rev, rawtext, validated)
2576 """
2577 # Check if we have the entry in cache
2578 # The cache entry looks like (node, rev, rawtext)
2579 if self._inner._revisioncache:
2580 if self._inner._revisioncache[0] == node:
2581 return (rev, self._inner._revisioncache[2], True)
2582
2583 if rev is None:
2584 rev = self.rev(node)
2585
2586 return self._inner.raw_text(node, rev)
2587
2534 2588 def _revisiondata(self, nodeorrev, raw=False):
2535 2589 # deal with <nodeorrev> argument type
2536 2590 if isinstance(nodeorrev, int):
2537 2591 rev = nodeorrev
2538 2592 node = self.node(rev)
2539 2593 else:
2540 2594 node = nodeorrev
2541 2595 rev = None
2542 2596
2543 2597 # fast path the special `nullid` rev
2544 2598 if node == self.nullid:
2545 2599 return b""
2546 2600
2547 2601 # ``rawtext`` is the text as stored inside the revlog. Might be the
2548 2602 # revision or might need to be processed to retrieve the revision.
2549 2603 rev, rawtext, validated = self._rawtext(node, rev)
2550 2604
2551 2605 if raw and validated:
2552 2606 # if we don't want to process the raw text and that raw
2553 2607 # text is cached, we can exit early.
2554 2608 return rawtext
2555 2609 if rev is None:
2556 2610 rev = self.rev(node)
2557 2611 # the revlog's flag for this revision
2558 2612 # (usually alter its state or content)
2559 2613 flags = self.flags(rev)
2560 2614
2561 2615 if validated and flags == REVIDX_DEFAULT_FLAGS:
2562 2616 # no extra flags set, no flag processor runs, text = rawtext
2563 2617 return rawtext
2564 2618
2565 2619 if raw:
2566 2620 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2567 2621 text = rawtext
2568 2622 else:
2569 2623 r = flagutil.processflagsread(self, rawtext, flags)
2570 2624 text, validatehash = r
2571 2625 if validatehash:
2572 2626 self.checkhash(text, node, rev=rev)
2573 2627 if not validated:
2574 2628 self._inner._revisioncache = (node, rev, rawtext)
2575 2629
2576 2630 return text
2577 2631
2578 def _rawtext(self, node, rev):
2579 """return the possibly unvalidated rawtext for a revision
2580
2581 returns (rev, rawtext, validated)
2582 """
2583
2584 # revision in the cache (could be useful to apply delta)
2585 cachedrev = None
2586 # An intermediate text to apply deltas to
2587 basetext = None
2588
2589 # Check if we have the entry in cache
2590 # The cache entry looks like (node, rev, rawtext)
2591 if self._inner._revisioncache:
2592 if self._inner._revisioncache[0] == node:
2593 return (rev, self._inner._revisioncache[2], True)
2594 cachedrev = self._inner._revisioncache[1]
2595
2596 if rev is None:
2597 rev = self.rev(node)
2598
2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2600 if stopped:
2601 basetext = self._inner._revisioncache[2]
2602
2603 # drop cache to save memory, the caller is expected to
2604 # update self._inner._revisioncache after validating the text
2605 self._inner._revisioncache = None
2606
2607 targetsize = None
2608 rawsize = self.index[rev][2]
2609 if 0 <= rawsize:
2610 targetsize = 4 * rawsize
2611
2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2613 if basetext is None:
2614 basetext = bytes(bins[0])
2615 bins = bins[1:]
2616
2617 rawtext = mdiff.patches(basetext, bins)
2618 del basetext # let us have a chance to free memory early
2619 return (rev, rawtext, False)
2620
2621 2632 def _sidedata(self, rev):
2622 2633 """Return the sidedata for a given revision number."""
2623 2634 index_entry = self.index[rev]
2624 2635 sidedata_offset = index_entry[8]
2625 2636 sidedata_size = index_entry[9]
2626 2637
2627 2638 if self._inline:
2628 2639 sidedata_offset += self.index.entry_size * (1 + rev)
2629 2640 if sidedata_size == 0:
2630 2641 return {}
2631 2642
2632 2643 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2633 2644 filename = self._sidedatafile
2634 2645 end = self._docket.sidedata_end
2635 2646 offset = sidedata_offset
2636 2647 length = sidedata_size
2637 2648 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2638 2649 raise error.RevlogError(m)
2639 2650
2640 2651 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2641 2652 sidedata_offset, sidedata_size
2642 2653 )
2643 2654
2644 2655 comp = self.index[rev][11]
2645 2656 if comp == COMP_MODE_PLAIN:
2646 2657 segment = comp_segment
2647 2658 elif comp == COMP_MODE_DEFAULT:
2648 2659 segment = self._inner._decompressor(comp_segment)
2649 2660 elif comp == COMP_MODE_INLINE:
2650 2661 segment = self._inner.decompress(comp_segment)
2651 2662 else:
2652 2663 msg = b'unknown compression mode %d'
2653 2664 msg %= comp
2654 2665 raise error.RevlogError(msg)
2655 2666
2656 2667 sidedata = sidedatautil.deserialize_sidedata(segment)
2657 2668 return sidedata
2658 2669
2659 2670 def rawdata(self, nodeorrev):
2660 2671 """return an uncompressed raw data of a given node or revision number."""
2661 2672 return self._revisiondata(nodeorrev, raw=True)
2662 2673
2663 2674 def hash(self, text, p1, p2):
2664 2675 """Compute a node hash.
2665 2676
2666 2677 Available as a function so that subclasses can replace the hash
2667 2678 as needed.
2668 2679 """
2669 2680 return storageutil.hashrevisionsha1(text, p1, p2)
2670 2681
2671 2682 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2672 2683 """Check node hash integrity.
2673 2684
2674 2685 Available as a function so that subclasses can extend hash mismatch
2675 2686 behaviors as needed.
2676 2687 """
2677 2688 try:
2678 2689 if p1 is None and p2 is None:
2679 2690 p1, p2 = self.parents(node)
2680 2691 if node != self.hash(text, p1, p2):
2681 2692 # Clear the revision cache on hash failure. The revision cache
2682 2693 # only stores the raw revision and clearing the cache does have
2683 2694 # the side-effect that we won't have a cache hit when the raw
2684 2695 # revision data is accessed. But this case should be rare and
2685 2696 # it is extra work to teach the cache about the hash
2686 2697 # verification state.
2687 2698 if (
2688 2699 self._inner._revisioncache
2689 2700 and self._inner._revisioncache[0] == node
2690 2701 ):
2691 2702 self._inner._revisioncache = None
2692 2703
2693 2704 revornode = rev
2694 2705 if revornode is None:
2695 2706 revornode = templatefilters.short(hex(node))
2696 2707 raise error.RevlogError(
2697 2708 _(b"integrity check failed on %s:%s")
2698 2709 % (self.display_id, pycompat.bytestr(revornode))
2699 2710 )
2700 2711 except error.RevlogError:
2701 2712 if self.feature_config.censorable and storageutil.iscensoredtext(
2702 2713 text
2703 2714 ):
2704 2715 raise error.CensoredNodeError(self.display_id, node, text)
2705 2716 raise
2706 2717
2707 2718 @property
2708 2719 def _split_index_file(self):
2709 2720 """the path where to expect the index of an ongoing splitting operation
2710 2721
2711 2722 The file will only exist if a splitting operation is in progress, but
2712 2723 it is always expected at the same location."""
2713 2724 parts = self.radix.split(b'/')
2714 2725 if len(parts) > 1:
2715 2726 # adds a '-s' prefix to the ``data/` or `meta/` base
2716 2727 head = parts[0] + b'-s'
2717 2728 mids = parts[1:-1]
2718 2729 tail = parts[-1] + b'.i'
2719 2730 pieces = [head] + mids + [tail]
2720 2731 return b'/'.join(pieces)
2721 2732 else:
2722 2733 # the revlog is stored at the root of the store (changelog or
2723 2734 # manifest), no risk of collision.
2724 2735 return self.radix + b'.i.s'
2725 2736
2726 2737 def _enforceinlinesize(self, tr, side_write=True):
2727 2738 """Check if the revlog is too big for inline and convert if so.
2728 2739
2729 2740 This should be called after revisions are added to the revlog. If the
2730 2741 revlog has grown too large to be an inline revlog, it will convert it
2731 2742 to use multiple index and data files.
2732 2743 """
2733 2744 tiprev = len(self) - 1
2734 2745 total_size = self.start(tiprev) + self.length(tiprev)
2735 2746 if not self._inline or total_size < _maxinline:
2736 2747 return
2737 2748
2738 2749 if self._docket is not None:
2739 2750 msg = b"inline revlog should not have a docket"
2740 2751 raise error.ProgrammingError(msg)
2741 2752
2742 2753 troffset = tr.findoffset(self._indexfile)
2743 2754 if troffset is None:
2744 2755 raise error.RevlogError(
2745 2756 _(b"%s not found in the transaction") % self._indexfile
2746 2757 )
2747 2758 if troffset:
2748 2759 tr.addbackup(self._indexfile, for_offset=True)
2749 2760 tr.add(self._datafile, 0)
2750 2761
2751 2762 new_index_file_path = None
2752 2763 if side_write:
2753 2764 old_index_file_path = self._indexfile
2754 2765 new_index_file_path = self._split_index_file
2755 2766 opener = self.opener
2756 2767 weak_self = weakref.ref(self)
2757 2768
2758 2769 # the "split" index replace the real index when the transaction is
2759 2770 # finalized
2760 2771 def finalize_callback(tr):
2761 2772 opener.rename(
2762 2773 new_index_file_path,
2763 2774 old_index_file_path,
2764 2775 checkambig=True,
2765 2776 )
2766 2777 maybe_self = weak_self()
2767 2778 if maybe_self is not None:
2768 2779 maybe_self._indexfile = old_index_file_path
2769 2780 maybe_self._inner.index_file = maybe_self._indexfile
2770 2781
2771 2782 def abort_callback(tr):
2772 2783 maybe_self = weak_self()
2773 2784 if maybe_self is not None:
2774 2785 maybe_self._indexfile = old_index_file_path
2775 2786 maybe_self._inner.inline = True
2776 2787 maybe_self._inner.index_file = old_index_file_path
2777 2788
2778 2789 tr.registertmp(new_index_file_path)
2779 2790 if self.target[1] is not None:
2780 2791 callback_id = b'000-revlog-split-%d-%s' % self.target
2781 2792 else:
2782 2793 callback_id = b'000-revlog-split-%d' % self.target[0]
2783 2794 tr.addfinalize(callback_id, finalize_callback)
2784 2795 tr.addabort(callback_id, abort_callback)
2785 2796
2786 2797 self._format_flags &= ~FLAG_INLINE_DATA
2787 2798 self._inner.split_inline(
2788 2799 tr,
2789 2800 self._format_flags | self._format_version,
2790 2801 new_index_file_path=new_index_file_path,
2791 2802 )
2792 2803
2793 2804 self._inline = False
2794 2805 if new_index_file_path is not None:
2795 2806 self._indexfile = new_index_file_path
2796 2807
2797 2808 nodemaputil.setup_persistent_nodemap(tr, self)
2798 2809
2799 2810 def _nodeduplicatecallback(self, transaction, node):
2800 2811 """called when trying to add a node already stored."""
2801 2812
2802 2813 @contextlib.contextmanager
2803 2814 def reading(self):
2804 2815 with self._inner.reading():
2805 2816 yield
2806 2817
2807 2818 @contextlib.contextmanager
2808 2819 def _writing(self, transaction):
2809 2820 if self._trypending:
2810 2821 msg = b'try to write in a `trypending` revlog: %s'
2811 2822 msg %= self.display_id
2812 2823 raise error.ProgrammingError(msg)
2813 2824 if self._inner.is_writing:
2814 2825 yield
2815 2826 else:
2816 2827 data_end = None
2817 2828 sidedata_end = None
2818 2829 if self._docket is not None:
2819 2830 data_end = self._docket.data_end
2820 2831 sidedata_end = self._docket.sidedata_end
2821 2832 with self._inner.writing(
2822 2833 transaction,
2823 2834 data_end=data_end,
2824 2835 sidedata_end=sidedata_end,
2825 2836 ):
2826 2837 yield
2827 2838 if self._docket is not None:
2828 2839 self._write_docket(transaction)
2829 2840
2830 2841 def _write_docket(self, transaction):
2831 2842 """write the current docket on disk
2832 2843
2833 2844 Exist as a method to help changelog to implement transaction logic
2834 2845
2835 2846 We could also imagine using the same transaction logic for all revlog
2836 2847 since docket are cheap."""
2837 2848 self._docket.write(transaction)
2838 2849
2839 2850 def addrevision(
2840 2851 self,
2841 2852 text,
2842 2853 transaction,
2843 2854 link,
2844 2855 p1,
2845 2856 p2,
2846 2857 cachedelta=None,
2847 2858 node=None,
2848 2859 flags=REVIDX_DEFAULT_FLAGS,
2849 2860 deltacomputer=None,
2850 2861 sidedata=None,
2851 2862 ):
2852 2863 """add a revision to the log
2853 2864
2854 2865 text - the revision data to add
2855 2866 transaction - the transaction object used for rollback
2856 2867 link - the linkrev data to add
2857 2868 p1, p2 - the parent nodeids of the revision
2858 2869 cachedelta - an optional precomputed delta
2859 2870 node - nodeid of revision; typically node is not specified, and it is
2860 2871 computed by default as hash(text, p1, p2), however subclasses might
2861 2872 use different hashing method (and override checkhash() in such case)
2862 2873 flags - the known flags to set on the revision
2863 2874 deltacomputer - an optional deltacomputer instance shared between
2864 2875 multiple calls
2865 2876 """
2866 2877 if link == nullrev:
2867 2878 raise error.RevlogError(
2868 2879 _(b"attempted to add linkrev -1 to %s") % self.display_id
2869 2880 )
2870 2881
2871 2882 if sidedata is None:
2872 2883 sidedata = {}
2873 2884 elif sidedata and not self.feature_config.has_side_data:
2874 2885 raise error.ProgrammingError(
2875 2886 _(b"trying to add sidedata to a revlog who don't support them")
2876 2887 )
2877 2888
2878 2889 if flags:
2879 2890 node = node or self.hash(text, p1, p2)
2880 2891
2881 2892 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2882 2893
2883 2894 # If the flag processor modifies the revision data, ignore any provided
2884 2895 # cachedelta.
2885 2896 if rawtext != text:
2886 2897 cachedelta = None
2887 2898
2888 2899 if len(rawtext) > _maxentrysize:
2889 2900 raise error.RevlogError(
2890 2901 _(
2891 2902 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2892 2903 )
2893 2904 % (self.display_id, len(rawtext))
2894 2905 )
2895 2906
2896 2907 node = node or self.hash(rawtext, p1, p2)
2897 2908 rev = self.index.get_rev(node)
2898 2909 if rev is not None:
2899 2910 return rev
2900 2911
2901 2912 if validatehash:
2902 2913 self.checkhash(rawtext, node, p1=p1, p2=p2)
2903 2914
2904 2915 return self.addrawrevision(
2905 2916 rawtext,
2906 2917 transaction,
2907 2918 link,
2908 2919 p1,
2909 2920 p2,
2910 2921 node,
2911 2922 flags,
2912 2923 cachedelta=cachedelta,
2913 2924 deltacomputer=deltacomputer,
2914 2925 sidedata=sidedata,
2915 2926 )
2916 2927
2917 2928 def addrawrevision(
2918 2929 self,
2919 2930 rawtext,
2920 2931 transaction,
2921 2932 link,
2922 2933 p1,
2923 2934 p2,
2924 2935 node,
2925 2936 flags,
2926 2937 cachedelta=None,
2927 2938 deltacomputer=None,
2928 2939 sidedata=None,
2929 2940 ):
2930 2941 """add a raw revision with known flags, node and parents
2931 2942 useful when reusing a revision not stored in this revlog (ex: received
2932 2943 over wire, or read from an external bundle).
2933 2944 """
2934 2945 with self._writing(transaction):
2935 2946 return self._addrevision(
2936 2947 node,
2937 2948 rawtext,
2938 2949 transaction,
2939 2950 link,
2940 2951 p1,
2941 2952 p2,
2942 2953 flags,
2943 2954 cachedelta,
2944 2955 deltacomputer=deltacomputer,
2945 2956 sidedata=sidedata,
2946 2957 )
2947 2958
2948 2959 def compress(self, data):
2949 2960 return self._inner.compress(data)
2950 2961
2951 2962 def decompress(self, data):
2952 2963 return self._inner.decompress(data)
2953 2964
2954 2965 def _addrevision(
2955 2966 self,
2956 2967 node,
2957 2968 rawtext,
2958 2969 transaction,
2959 2970 link,
2960 2971 p1,
2961 2972 p2,
2962 2973 flags,
2963 2974 cachedelta,
2964 2975 alwayscache=False,
2965 2976 deltacomputer=None,
2966 2977 sidedata=None,
2967 2978 ):
2968 2979 """internal function to add revisions to the log
2969 2980
2970 2981 see addrevision for argument descriptions.
2971 2982
2972 2983 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2973 2984
2974 2985 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2975 2986 be used.
2976 2987
2977 2988 invariants:
2978 2989 - rawtext is optional (can be None); if not set, cachedelta must be set.
2979 2990 if both are set, they must correspond to each other.
2980 2991 """
2981 2992 if node == self.nullid:
2982 2993 raise error.RevlogError(
2983 2994 _(b"%s: attempt to add null revision") % self.display_id
2984 2995 )
2985 2996 if (
2986 2997 node == self.nodeconstants.wdirid
2987 2998 or node in self.nodeconstants.wdirfilenodeids
2988 2999 ):
2989 3000 raise error.RevlogError(
2990 3001 _(b"%s: attempt to add wdir revision") % self.display_id
2991 3002 )
2992 3003 if self._inner._writinghandles is None:
2993 3004 msg = b'adding revision outside `revlog._writing` context'
2994 3005 raise error.ProgrammingError(msg)
2995 3006
2996 3007 btext = [rawtext]
2997 3008
2998 3009 curr = len(self)
2999 3010 prev = curr - 1
3000 3011
3001 3012 offset = self._get_data_offset(prev)
3002 3013
3003 3014 if self._concurrencychecker:
3004 3015 ifh, dfh, sdfh = self._inner._writinghandles
3005 3016 # XXX no checking for the sidedata file
3006 3017 if self._inline:
3007 3018 # offset is "as if" it were in the .d file, so we need to add on
3008 3019 # the size of the entry metadata.
3009 3020 self._concurrencychecker(
3010 3021 ifh, self._indexfile, offset + curr * self.index.entry_size
3011 3022 )
3012 3023 else:
3013 3024 # Entries in the .i are a consistent size.
3014 3025 self._concurrencychecker(
3015 3026 ifh, self._indexfile, curr * self.index.entry_size
3016 3027 )
3017 3028 self._concurrencychecker(dfh, self._datafile, offset)
3018 3029
3019 3030 p1r, p2r = self.rev(p1), self.rev(p2)
3020 3031
3021 3032 # full versions are inserted when the needed deltas
3022 3033 # become comparable to the uncompressed text
3023 3034 if rawtext is None:
3024 3035 # need rawtext size, before changed by flag processors, which is
3025 3036 # the non-raw size. use revlog explicitly to avoid filelog's extra
3026 3037 # logic that might remove metadata size.
3027 3038 textlen = mdiff.patchedsize(
3028 3039 revlog.size(self, cachedelta[0]), cachedelta[1]
3029 3040 )
3030 3041 else:
3031 3042 textlen = len(rawtext)
3032 3043
3033 3044 if deltacomputer is None:
3034 3045 write_debug = None
3035 3046 if self.delta_config.debug_delta:
3036 3047 write_debug = transaction._report
3037 3048 deltacomputer = deltautil.deltacomputer(
3038 3049 self, write_debug=write_debug
3039 3050 )
3040 3051
3041 3052 if cachedelta is not None and len(cachedelta) == 2:
3042 3053 # If the cached delta has no information about how it should be
3043 3054 # reused, add the default reuse instruction according to the
3044 3055 # revlog's configuration.
3045 3056 if (
3046 3057 self.delta_config.general_delta
3047 3058 and self.delta_config.lazy_delta_base
3048 3059 ):
3049 3060 delta_base_reuse = DELTA_BASE_REUSE_TRY
3050 3061 else:
3051 3062 delta_base_reuse = DELTA_BASE_REUSE_NO
3052 3063 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3053 3064
3054 3065 revinfo = revlogutils.revisioninfo(
3055 3066 node,
3056 3067 p1,
3057 3068 p2,
3058 3069 btext,
3059 3070 textlen,
3060 3071 cachedelta,
3061 3072 flags,
3062 3073 )
3063 3074
3064 3075 deltainfo = deltacomputer.finddeltainfo(revinfo)
3065 3076
3066 3077 compression_mode = COMP_MODE_INLINE
3067 3078 if self._docket is not None:
3068 3079 default_comp = self._docket.default_compression_header
3069 3080 r = deltautil.delta_compression(default_comp, deltainfo)
3070 3081 compression_mode, deltainfo = r
3071 3082
3072 3083 sidedata_compression_mode = COMP_MODE_INLINE
3073 3084 if sidedata and self.feature_config.has_side_data:
3074 3085 sidedata_compression_mode = COMP_MODE_PLAIN
3075 3086 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3076 3087 sidedata_offset = self._docket.sidedata_end
3077 3088 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3078 3089 if (
3079 3090 h != b'u'
3080 3091 and comp_sidedata[0:1] != b'\0'
3081 3092 and len(comp_sidedata) < len(serialized_sidedata)
3082 3093 ):
3083 3094 assert not h
3084 3095 if (
3085 3096 comp_sidedata[0:1]
3086 3097 == self._docket.default_compression_header
3087 3098 ):
3088 3099 sidedata_compression_mode = COMP_MODE_DEFAULT
3089 3100 serialized_sidedata = comp_sidedata
3090 3101 else:
3091 3102 sidedata_compression_mode = COMP_MODE_INLINE
3092 3103 serialized_sidedata = comp_sidedata
3093 3104 else:
3094 3105 serialized_sidedata = b""
3095 3106 # Don't store the offset if the sidedata is empty, that way
3096 3107 # we can easily detect empty sidedata and they will be no different
3097 3108 # than ones we manually add.
3098 3109 sidedata_offset = 0
3099 3110
3100 3111 rank = RANK_UNKNOWN
3101 3112 if self.feature_config.compute_rank:
3102 3113 if (p1r, p2r) == (nullrev, nullrev):
3103 3114 rank = 1
3104 3115 elif p1r != nullrev and p2r == nullrev:
3105 3116 rank = 1 + self.fast_rank(p1r)
3106 3117 elif p1r == nullrev and p2r != nullrev:
3107 3118 rank = 1 + self.fast_rank(p2r)
3108 3119 else: # merge node
3109 3120 if rustdagop is not None and self.index.rust_ext_compat:
3110 3121 rank = rustdagop.rank(self.index, p1r, p2r)
3111 3122 else:
3112 3123 pmin, pmax = sorted((p1r, p2r))
3113 3124 rank = 1 + self.fast_rank(pmax)
3114 3125 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3115 3126
3116 3127 e = revlogutils.entry(
3117 3128 flags=flags,
3118 3129 data_offset=offset,
3119 3130 data_compressed_length=deltainfo.deltalen,
3120 3131 data_uncompressed_length=textlen,
3121 3132 data_compression_mode=compression_mode,
3122 3133 data_delta_base=deltainfo.base,
3123 3134 link_rev=link,
3124 3135 parent_rev_1=p1r,
3125 3136 parent_rev_2=p2r,
3126 3137 node_id=node,
3127 3138 sidedata_offset=sidedata_offset,
3128 3139 sidedata_compressed_length=len(serialized_sidedata),
3129 3140 sidedata_compression_mode=sidedata_compression_mode,
3130 3141 rank=rank,
3131 3142 )
3132 3143
3133 3144 self.index.append(e)
3134 3145 entry = self.index.entry_binary(curr)
3135 3146 if curr == 0 and self._docket is None:
3136 3147 header = self._format_flags | self._format_version
3137 3148 header = self.index.pack_header(header)
3138 3149 entry = header + entry
3139 3150 self._writeentry(
3140 3151 transaction,
3141 3152 entry,
3142 3153 deltainfo.data,
3143 3154 link,
3144 3155 offset,
3145 3156 serialized_sidedata,
3146 3157 sidedata_offset,
3147 3158 )
3148 3159
3149 3160 rawtext = btext[0]
3150 3161
3151 3162 if alwayscache and rawtext is None:
3152 3163 rawtext = deltacomputer.buildtext(revinfo)
3153 3164
3154 3165 if type(rawtext) == bytes: # only accept immutable objects
3155 3166 self._inner._revisioncache = (node, curr, rawtext)
3156 3167 self._chainbasecache[curr] = deltainfo.chainbase
3157 3168 return curr
3158 3169
3159 3170 def _get_data_offset(self, prev):
3160 3171 """Returns the current offset in the (in-transaction) data file.
3161 3172 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3162 3173 file to store that information: since sidedata can be rewritten to the
3163 3174 end of the data file within a transaction, you can have cases where, for
3164 3175 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3165 3176 to `n - 1`'s sidedata being written after `n`'s data.
3166 3177
3167 3178 TODO cache this in a docket file before getting out of experimental."""
3168 3179 if self._docket is None:
3169 3180 return self.end(prev)
3170 3181 else:
3171 3182 return self._docket.data_end
3172 3183
3173 3184 def _writeentry(
3174 3185 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3175 3186 ):
3176 3187 # Files opened in a+ mode have inconsistent behavior on various
3177 3188 # platforms. Windows requires that a file positioning call be made
3178 3189 # when the file handle transitions between reads and writes. See
3179 3190 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3180 3191 # platforms, Python or the platform itself can be buggy. Some versions
3181 3192 # of Solaris have been observed to not append at the end of the file
3182 3193 # if the file was seeked to before the end. See issue4943 for more.
3183 3194 #
3184 3195 # We work around this issue by inserting a seek() before writing.
3185 3196 # Note: This is likely not necessary on Python 3. However, because
3186 3197 # the file handle is reused for reads and may be seeked there, we need
3187 3198 # to be careful before changing this.
3188 3199 if self._inner._writinghandles is None:
3189 3200 msg = b'adding revision outside `revlog._writing` context'
3190 3201 raise error.ProgrammingError(msg)
3191 3202 ifh, dfh, sdfh = self._inner._writinghandles
3192 3203 if self._docket is None:
3193 3204 ifh.seek(0, os.SEEK_END)
3194 3205 else:
3195 3206 ifh.seek(self._docket.index_end, os.SEEK_SET)
3196 3207 if dfh:
3197 3208 if self._docket is None:
3198 3209 dfh.seek(0, os.SEEK_END)
3199 3210 else:
3200 3211 dfh.seek(self._docket.data_end, os.SEEK_SET)
3201 3212 if sdfh:
3202 3213 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3203 3214
3204 3215 curr = len(self) - 1
3205 3216 if not self._inline:
3206 3217 transaction.add(self._datafile, offset)
3207 3218 if self._sidedatafile:
3208 3219 transaction.add(self._sidedatafile, sidedata_offset)
3209 3220 transaction.add(self._indexfile, curr * len(entry))
3210 3221 if data[0]:
3211 3222 dfh.write(data[0])
3212 3223 dfh.write(data[1])
3213 3224 if sidedata:
3214 3225 sdfh.write(sidedata)
3215 3226 ifh.write(entry)
3216 3227 else:
3217 3228 offset += curr * self.index.entry_size
3218 3229 transaction.add(self._indexfile, offset)
3219 3230 ifh.write(entry)
3220 3231 ifh.write(data[0])
3221 3232 ifh.write(data[1])
3222 3233 assert not sidedata
3223 3234 self._enforceinlinesize(transaction)
3224 3235 if self._docket is not None:
3225 3236 # revlog-v2 always has 3 writing handles, help Pytype
3226 3237 wh1 = self._inner._writinghandles[0]
3227 3238 wh2 = self._inner._writinghandles[1]
3228 3239 wh3 = self._inner._writinghandles[2]
3229 3240 assert wh1 is not None
3230 3241 assert wh2 is not None
3231 3242 assert wh3 is not None
3232 3243 self._docket.index_end = wh1.tell()
3233 3244 self._docket.data_end = wh2.tell()
3234 3245 self._docket.sidedata_end = wh3.tell()
3235 3246
3236 3247 nodemaputil.setup_persistent_nodemap(transaction, self)
3237 3248
3238 3249 def addgroup(
3239 3250 self,
3240 3251 deltas,
3241 3252 linkmapper,
3242 3253 transaction,
3243 3254 alwayscache=False,
3244 3255 addrevisioncb=None,
3245 3256 duplicaterevisioncb=None,
3246 3257 debug_info=None,
3247 3258 delta_base_reuse_policy=None,
3248 3259 ):
3249 3260 """
3250 3261 add a delta group
3251 3262
3252 3263 given a set of deltas, add them to the revision log. the
3253 3264 first delta is against its parent, which should be in our
3254 3265 log, the rest are against the previous delta.
3255 3266
3256 3267 If ``addrevisioncb`` is defined, it will be called with arguments of
3257 3268 this revlog and the node that was added.
3258 3269 """
3259 3270
3260 3271 if self._adding_group:
3261 3272 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3262 3273
3263 3274 # read the default delta-base reuse policy from revlog config if the
3264 3275 # group did not specify one.
3265 3276 if delta_base_reuse_policy is None:
3266 3277 if (
3267 3278 self.delta_config.general_delta
3268 3279 and self.delta_config.lazy_delta_base
3269 3280 ):
3270 3281 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3271 3282 else:
3272 3283 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3273 3284
3274 3285 self._adding_group = True
3275 3286 empty = True
3276 3287 try:
3277 3288 with self._writing(transaction):
3278 3289 write_debug = None
3279 3290 if self.delta_config.debug_delta:
3280 3291 write_debug = transaction._report
3281 3292 deltacomputer = deltautil.deltacomputer(
3282 3293 self,
3283 3294 write_debug=write_debug,
3284 3295 debug_info=debug_info,
3285 3296 )
3286 3297 # loop through our set of deltas
3287 3298 for data in deltas:
3288 3299 (
3289 3300 node,
3290 3301 p1,
3291 3302 p2,
3292 3303 linknode,
3293 3304 deltabase,
3294 3305 delta,
3295 3306 flags,
3296 3307 sidedata,
3297 3308 ) = data
3298 3309 link = linkmapper(linknode)
3299 3310 flags = flags or REVIDX_DEFAULT_FLAGS
3300 3311
3301 3312 rev = self.index.get_rev(node)
3302 3313 if rev is not None:
3303 3314 # this can happen if two branches make the same change
3304 3315 self._nodeduplicatecallback(transaction, rev)
3305 3316 if duplicaterevisioncb:
3306 3317 duplicaterevisioncb(self, rev)
3307 3318 empty = False
3308 3319 continue
3309 3320
3310 3321 for p in (p1, p2):
3311 3322 if not self.index.has_node(p):
3312 3323 raise error.LookupError(
3313 3324 p, self.radix, _(b'unknown parent')
3314 3325 )
3315 3326
3316 3327 if not self.index.has_node(deltabase):
3317 3328 raise error.LookupError(
3318 3329 deltabase, self.display_id, _(b'unknown delta base')
3319 3330 )
3320 3331
3321 3332 baserev = self.rev(deltabase)
3322 3333
3323 3334 if baserev != nullrev and self.iscensored(baserev):
3324 3335 # if base is censored, delta must be full replacement in a
3325 3336 # single patch operation
3326 3337 hlen = struct.calcsize(b">lll")
3327 3338 oldlen = self.rawsize(baserev)
3328 3339 newlen = len(delta) - hlen
3329 3340 if delta[:hlen] != mdiff.replacediffheader(
3330 3341 oldlen, newlen
3331 3342 ):
3332 3343 raise error.CensoredBaseError(
3333 3344 self.display_id, self.node(baserev)
3334 3345 )
3335 3346
3336 3347 if not flags and self._peek_iscensored(baserev, delta):
3337 3348 flags |= REVIDX_ISCENSORED
3338 3349
3339 3350 # We assume consumers of addrevisioncb will want to retrieve
3340 3351 # the added revision, which will require a call to
3341 3352 # revision(). revision() will fast path if there is a cache
3342 3353 # hit. So, we tell _addrevision() to always cache in this case.
3343 3354 # We're only using addgroup() in the context of changegroup
3344 3355 # generation so the revision data can always be handled as raw
3345 3356 # by the flagprocessor.
3346 3357 rev = self._addrevision(
3347 3358 node,
3348 3359 None,
3349 3360 transaction,
3350 3361 link,
3351 3362 p1,
3352 3363 p2,
3353 3364 flags,
3354 3365 (baserev, delta, delta_base_reuse_policy),
3355 3366 alwayscache=alwayscache,
3356 3367 deltacomputer=deltacomputer,
3357 3368 sidedata=sidedata,
3358 3369 )
3359 3370
3360 3371 if addrevisioncb:
3361 3372 addrevisioncb(self, rev)
3362 3373 empty = False
3363 3374 finally:
3364 3375 self._adding_group = False
3365 3376 return not empty
3366 3377
3367 3378 def iscensored(self, rev):
3368 3379 """Check if a file revision is censored."""
3369 3380 if not self.feature_config.censorable:
3370 3381 return False
3371 3382
3372 3383 return self.flags(rev) & REVIDX_ISCENSORED
3373 3384
3374 3385 def _peek_iscensored(self, baserev, delta):
3375 3386 """Quickly check if a delta produces a censored revision."""
3376 3387 if not self.feature_config.censorable:
3377 3388 return False
3378 3389
3379 3390 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3380 3391
3381 3392 def getstrippoint(self, minlink):
3382 3393 """find the minimum rev that must be stripped to strip the linkrev
3383 3394
3384 3395 Returns a tuple containing the minimum rev and a set of all revs that
3385 3396 have linkrevs that will be broken by this strip.
3386 3397 """
3387 3398 return storageutil.resolvestripinfo(
3388 3399 minlink,
3389 3400 len(self) - 1,
3390 3401 self.headrevs(),
3391 3402 self.linkrev,
3392 3403 self.parentrevs,
3393 3404 )
3394 3405
3395 3406 def strip(self, minlink, transaction):
3396 3407 """truncate the revlog on the first revision with a linkrev >= minlink
3397 3408
3398 3409 This function is called when we're stripping revision minlink and
3399 3410 its descendants from the repository.
3400 3411
3401 3412 We have to remove all revisions with linkrev >= minlink, because
3402 3413 the equivalent changelog revisions will be renumbered after the
3403 3414 strip.
3404 3415
3405 3416 So we truncate the revlog on the first of these revisions, and
3406 3417 trust that the caller has saved the revisions that shouldn't be
3407 3418 removed and that it'll re-add them after this truncation.
3408 3419 """
3409 3420 if len(self) == 0:
3410 3421 return
3411 3422
3412 3423 rev, _ = self.getstrippoint(minlink)
3413 3424 if rev == len(self):
3414 3425 return
3415 3426
3416 3427 # first truncate the files on disk
3417 3428 data_end = self.start(rev)
3418 3429 if not self._inline:
3419 3430 transaction.add(self._datafile, data_end)
3420 3431 end = rev * self.index.entry_size
3421 3432 else:
3422 3433 end = data_end + (rev * self.index.entry_size)
3423 3434
3424 3435 if self._sidedatafile:
3425 3436 sidedata_end = self.sidedata_cut_off(rev)
3426 3437 transaction.add(self._sidedatafile, sidedata_end)
3427 3438
3428 3439 transaction.add(self._indexfile, end)
3429 3440 if self._docket is not None:
3430 3441 # XXX we could, leverage the docket while stripping. However it is
3431 3442 # not powerfull enough at the time of this comment
3432 3443 self._docket.index_end = end
3433 3444 self._docket.data_end = data_end
3434 3445 self._docket.sidedata_end = sidedata_end
3435 3446 self._docket.write(transaction, stripping=True)
3436 3447
3437 3448 # then reset internal state in memory to forget those revisions
3438 3449 self._inner._revisioncache = None
3439 3450 self._chaininfocache = util.lrucachedict(500)
3440 3451 self._inner._segmentfile.clear_cache()
3441 3452 self._inner._segmentfile_sidedata.clear_cache()
3442 3453
3443 3454 del self.index[rev:-1]
3444 3455
3445 3456 def checksize(self):
3446 3457 """Check size of index and data files
3447 3458
3448 3459 return a (dd, di) tuple.
3449 3460 - dd: extra bytes for the "data" file
3450 3461 - di: extra bytes for the "index" file
3451 3462
3452 3463 A healthy revlog will return (0, 0).
3453 3464 """
3454 3465 expected = 0
3455 3466 if len(self):
3456 3467 expected = max(0, self.end(len(self) - 1))
3457 3468
3458 3469 try:
3459 3470 with self._datafp() as f:
3460 3471 f.seek(0, io.SEEK_END)
3461 3472 actual = f.tell()
3462 3473 dd = actual - expected
3463 3474 except FileNotFoundError:
3464 3475 dd = 0
3465 3476
3466 3477 try:
3467 3478 f = self.opener(self._indexfile)
3468 3479 f.seek(0, io.SEEK_END)
3469 3480 actual = f.tell()
3470 3481 f.close()
3471 3482 s = self.index.entry_size
3472 3483 i = max(0, actual // s)
3473 3484 di = actual - (i * s)
3474 3485 if self._inline:
3475 3486 databytes = 0
3476 3487 for r in self:
3477 3488 databytes += max(0, self.length(r))
3478 3489 dd = 0
3479 3490 di = actual - len(self) * s - databytes
3480 3491 except FileNotFoundError:
3481 3492 di = 0
3482 3493
3483 3494 return (dd, di)
3484 3495
3485 3496 def files(self):
3486 3497 """return list of files that compose this revlog"""
3487 3498 res = [self._indexfile]
3488 3499 if self._docket_file is None:
3489 3500 if not self._inline:
3490 3501 res.append(self._datafile)
3491 3502 else:
3492 3503 res.append(self._docket_file)
3493 3504 res.extend(self._docket.old_index_filepaths(include_empty=False))
3494 3505 if self._docket.data_end:
3495 3506 res.append(self._datafile)
3496 3507 res.extend(self._docket.old_data_filepaths(include_empty=False))
3497 3508 if self._docket.sidedata_end:
3498 3509 res.append(self._sidedatafile)
3499 3510 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3500 3511 return res
3501 3512
3502 3513 def emitrevisions(
3503 3514 self,
3504 3515 nodes,
3505 3516 nodesorder=None,
3506 3517 revisiondata=False,
3507 3518 assumehaveparentrevisions=False,
3508 3519 deltamode=repository.CG_DELTAMODE_STD,
3509 3520 sidedata_helpers=None,
3510 3521 debug_info=None,
3511 3522 ):
3512 3523 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3513 3524 raise error.ProgrammingError(
3514 3525 b'unhandled value for nodesorder: %s' % nodesorder
3515 3526 )
3516 3527
3517 3528 if nodesorder is None and not self.delta_config.general_delta:
3518 3529 nodesorder = b'storage'
3519 3530
3520 3531 if (
3521 3532 not self._storedeltachains
3522 3533 and deltamode != repository.CG_DELTAMODE_PREV
3523 3534 ):
3524 3535 deltamode = repository.CG_DELTAMODE_FULL
3525 3536
3526 3537 return storageutil.emitrevisions(
3527 3538 self,
3528 3539 nodes,
3529 3540 nodesorder,
3530 3541 revlogrevisiondelta,
3531 3542 deltaparentfn=self.deltaparent,
3532 3543 candeltafn=self._candelta,
3533 3544 rawsizefn=self.rawsize,
3534 3545 revdifffn=self.revdiff,
3535 3546 flagsfn=self.flags,
3536 3547 deltamode=deltamode,
3537 3548 revisiondata=revisiondata,
3538 3549 assumehaveparentrevisions=assumehaveparentrevisions,
3539 3550 sidedata_helpers=sidedata_helpers,
3540 3551 debug_info=debug_info,
3541 3552 )
3542 3553
3543 3554 DELTAREUSEALWAYS = b'always'
3544 3555 DELTAREUSESAMEREVS = b'samerevs'
3545 3556 DELTAREUSENEVER = b'never'
3546 3557
3547 3558 DELTAREUSEFULLADD = b'fulladd'
3548 3559
3549 3560 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3550 3561
3551 3562 def clone(
3552 3563 self,
3553 3564 tr,
3554 3565 destrevlog,
3555 3566 addrevisioncb=None,
3556 3567 deltareuse=DELTAREUSESAMEREVS,
3557 3568 forcedeltabothparents=None,
3558 3569 sidedata_helpers=None,
3559 3570 ):
3560 3571 """Copy this revlog to another, possibly with format changes.
3561 3572
3562 3573 The destination revlog will contain the same revisions and nodes.
3563 3574 However, it may not be bit-for-bit identical due to e.g. delta encoding
3564 3575 differences.
3565 3576
3566 3577 The ``deltareuse`` argument control how deltas from the existing revlog
3567 3578 are preserved in the destination revlog. The argument can have the
3568 3579 following values:
3569 3580
3570 3581 DELTAREUSEALWAYS
3571 3582 Deltas will always be reused (if possible), even if the destination
3572 3583 revlog would not select the same revisions for the delta. This is the
3573 3584 fastest mode of operation.
3574 3585 DELTAREUSESAMEREVS
3575 3586 Deltas will be reused if the destination revlog would pick the same
3576 3587 revisions for the delta. This mode strikes a balance between speed
3577 3588 and optimization.
3578 3589 DELTAREUSENEVER
3579 3590 Deltas will never be reused. This is the slowest mode of execution.
3580 3591 This mode can be used to recompute deltas (e.g. if the diff/delta
3581 3592 algorithm changes).
3582 3593 DELTAREUSEFULLADD
3583 3594 Revision will be re-added as if their were new content. This is
3584 3595 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3585 3596 eg: large file detection and handling.
3586 3597
3587 3598 Delta computation can be slow, so the choice of delta reuse policy can
3588 3599 significantly affect run time.
3589 3600
3590 3601 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3591 3602 two extremes. Deltas will be reused if they are appropriate. But if the
3592 3603 delta could choose a better revision, it will do so. This means if you
3593 3604 are converting a non-generaldelta revlog to a generaldelta revlog,
3594 3605 deltas will be recomputed if the delta's parent isn't a parent of the
3595 3606 revision.
3596 3607
3597 3608 In addition to the delta policy, the ``forcedeltabothparents``
3598 3609 argument controls whether to force compute deltas against both parents
3599 3610 for merges. By default, the current default is used.
3600 3611
3601 3612 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3602 3613 `sidedata_helpers`.
3603 3614 """
3604 3615 if deltareuse not in self.DELTAREUSEALL:
3605 3616 raise ValueError(
3606 3617 _(b'value for deltareuse invalid: %s') % deltareuse
3607 3618 )
3608 3619
3609 3620 if len(destrevlog):
3610 3621 raise ValueError(_(b'destination revlog is not empty'))
3611 3622
3612 3623 if getattr(self, 'filteredrevs', None):
3613 3624 raise ValueError(_(b'source revlog has filtered revisions'))
3614 3625 if getattr(destrevlog, 'filteredrevs', None):
3615 3626 raise ValueError(_(b'destination revlog has filtered revisions'))
3616 3627
3617 3628 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3618 3629 # if possible.
3619 3630 old_delta_config = destrevlog.delta_config
3620 3631 destrevlog.delta_config = destrevlog.delta_config.copy()
3621 3632
3622 3633 try:
3623 3634 if deltareuse == self.DELTAREUSEALWAYS:
3624 3635 destrevlog.delta_config.lazy_delta_base = True
3625 3636 destrevlog.delta_config.lazy_delta = True
3626 3637 elif deltareuse == self.DELTAREUSESAMEREVS:
3627 3638 destrevlog.delta_config.lazy_delta_base = False
3628 3639 destrevlog.delta_config.lazy_delta = True
3629 3640 elif deltareuse == self.DELTAREUSENEVER:
3630 3641 destrevlog.delta_config.lazy_delta_base = False
3631 3642 destrevlog.delta_config.lazy_delta = False
3632 3643
3633 3644 delta_both_parents = (
3634 3645 forcedeltabothparents or old_delta_config.delta_both_parents
3635 3646 )
3636 3647 destrevlog.delta_config.delta_both_parents = delta_both_parents
3637 3648
3638 3649 with self.reading(), destrevlog._writing(tr):
3639 3650 self._clone(
3640 3651 tr,
3641 3652 destrevlog,
3642 3653 addrevisioncb,
3643 3654 deltareuse,
3644 3655 forcedeltabothparents,
3645 3656 sidedata_helpers,
3646 3657 )
3647 3658
3648 3659 finally:
3649 3660 destrevlog.delta_config = old_delta_config
3650 3661
3651 3662 def _clone(
3652 3663 self,
3653 3664 tr,
3654 3665 destrevlog,
3655 3666 addrevisioncb,
3656 3667 deltareuse,
3657 3668 forcedeltabothparents,
3658 3669 sidedata_helpers,
3659 3670 ):
3660 3671 """perform the core duty of `revlog.clone` after parameter processing"""
3661 3672 write_debug = None
3662 3673 if self.delta_config.debug_delta:
3663 3674 write_debug = tr._report
3664 3675 deltacomputer = deltautil.deltacomputer(
3665 3676 destrevlog,
3666 3677 write_debug=write_debug,
3667 3678 )
3668 3679 index = self.index
3669 3680 for rev in self:
3670 3681 entry = index[rev]
3671 3682
3672 3683 # Some classes override linkrev to take filtered revs into
3673 3684 # account. Use raw entry from index.
3674 3685 flags = entry[0] & 0xFFFF
3675 3686 linkrev = entry[4]
3676 3687 p1 = index[entry[5]][7]
3677 3688 p2 = index[entry[6]][7]
3678 3689 node = entry[7]
3679 3690
3680 3691 # (Possibly) reuse the delta from the revlog if allowed and
3681 3692 # the revlog chunk is a delta.
3682 3693 cachedelta = None
3683 3694 rawtext = None
3684 3695 if deltareuse == self.DELTAREUSEFULLADD:
3685 3696 text = self._revisiondata(rev)
3686 3697 sidedata = self.sidedata(rev)
3687 3698
3688 3699 if sidedata_helpers is not None:
3689 3700 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3690 3701 self, sidedata_helpers, sidedata, rev
3691 3702 )
3692 3703 flags = flags | new_flags[0] & ~new_flags[1]
3693 3704
3694 3705 destrevlog.addrevision(
3695 3706 text,
3696 3707 tr,
3697 3708 linkrev,
3698 3709 p1,
3699 3710 p2,
3700 3711 cachedelta=cachedelta,
3701 3712 node=node,
3702 3713 flags=flags,
3703 3714 deltacomputer=deltacomputer,
3704 3715 sidedata=sidedata,
3705 3716 )
3706 3717 else:
3707 3718 if destrevlog.delta_config.lazy_delta:
3708 3719 dp = self.deltaparent(rev)
3709 3720 if dp != nullrev:
3710 3721 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3711 3722
3712 3723 sidedata = None
3713 3724 if not cachedelta:
3714 3725 try:
3715 3726 rawtext = self._revisiondata(rev)
3716 3727 except error.CensoredNodeError as censored:
3717 3728 assert flags & REVIDX_ISCENSORED
3718 3729 rawtext = censored.tombstone
3719 3730 sidedata = self.sidedata(rev)
3720 3731 if sidedata is None:
3721 3732 sidedata = self.sidedata(rev)
3722 3733
3723 3734 if sidedata_helpers is not None:
3724 3735 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3725 3736 self, sidedata_helpers, sidedata, rev
3726 3737 )
3727 3738 flags = flags | new_flags[0] & ~new_flags[1]
3728 3739
3729 3740 destrevlog._addrevision(
3730 3741 node,
3731 3742 rawtext,
3732 3743 tr,
3733 3744 linkrev,
3734 3745 p1,
3735 3746 p2,
3736 3747 flags,
3737 3748 cachedelta,
3738 3749 deltacomputer=deltacomputer,
3739 3750 sidedata=sidedata,
3740 3751 )
3741 3752
3742 3753 if addrevisioncb:
3743 3754 addrevisioncb(self, rev, node)
3744 3755
3745 3756 def censorrevision(self, tr, censornode, tombstone=b''):
3746 3757 if self._format_version == REVLOGV0:
3747 3758 raise error.RevlogError(
3748 3759 _(b'cannot censor with version %d revlogs')
3749 3760 % self._format_version
3750 3761 )
3751 3762 elif self._format_version == REVLOGV1:
3752 3763 rewrite.v1_censor(self, tr, censornode, tombstone)
3753 3764 else:
3754 3765 rewrite.v2_censor(self, tr, censornode, tombstone)
3755 3766
3756 3767 def verifyintegrity(self, state):
3757 3768 """Verifies the integrity of the revlog.
3758 3769
3759 3770 Yields ``revlogproblem`` instances describing problems that are
3760 3771 found.
3761 3772 """
3762 3773 dd, di = self.checksize()
3763 3774 if dd:
3764 3775 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3765 3776 if di:
3766 3777 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3767 3778
3768 3779 version = self._format_version
3769 3780
3770 3781 # The verifier tells us what version revlog we should be.
3771 3782 if version != state[b'expectedversion']:
3772 3783 yield revlogproblem(
3773 3784 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3774 3785 % (self.display_id, version, state[b'expectedversion'])
3775 3786 )
3776 3787
3777 3788 state[b'skipread'] = set()
3778 3789 state[b'safe_renamed'] = set()
3779 3790
3780 3791 for rev in self:
3781 3792 node = self.node(rev)
3782 3793
3783 3794 # Verify contents. 4 cases to care about:
3784 3795 #
3785 3796 # common: the most common case
3786 3797 # rename: with a rename
3787 3798 # meta: file content starts with b'\1\n', the metadata
3788 3799 # header defined in filelog.py, but without a rename
3789 3800 # ext: content stored externally
3790 3801 #
3791 3802 # More formally, their differences are shown below:
3792 3803 #
3793 3804 # | common | rename | meta | ext
3794 3805 # -------------------------------------------------------
3795 3806 # flags() | 0 | 0 | 0 | not 0
3796 3807 # renamed() | False | True | False | ?
3797 3808 # rawtext[0:2]=='\1\n'| False | True | True | ?
3798 3809 #
3799 3810 # "rawtext" means the raw text stored in revlog data, which
3800 3811 # could be retrieved by "rawdata(rev)". "text"
3801 3812 # mentioned below is "revision(rev)".
3802 3813 #
3803 3814 # There are 3 different lengths stored physically:
3804 3815 # 1. L1: rawsize, stored in revlog index
3805 3816 # 2. L2: len(rawtext), stored in revlog data
3806 3817 # 3. L3: len(text), stored in revlog data if flags==0, or
3807 3818 # possibly somewhere else if flags!=0
3808 3819 #
3809 3820 # L1 should be equal to L2. L3 could be different from them.
3810 3821 # "text" may or may not affect commit hash depending on flag
3811 3822 # processors (see flagutil.addflagprocessor).
3812 3823 #
3813 3824 # | common | rename | meta | ext
3814 3825 # -------------------------------------------------
3815 3826 # rawsize() | L1 | L1 | L1 | L1
3816 3827 # size() | L1 | L2-LM | L1(*) | L1 (?)
3817 3828 # len(rawtext) | L2 | L2 | L2 | L2
3818 3829 # len(text) | L2 | L2 | L2 | L3
3819 3830 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3820 3831 #
3821 3832 # LM: length of metadata, depending on rawtext
3822 3833 # (*): not ideal, see comment in filelog.size
3823 3834 # (?): could be "- len(meta)" if the resolved content has
3824 3835 # rename metadata
3825 3836 #
3826 3837 # Checks needed to be done:
3827 3838 # 1. length check: L1 == L2, in all cases.
3828 3839 # 2. hash check: depending on flag processor, we may need to
3829 3840 # use either "text" (external), or "rawtext" (in revlog).
3830 3841
3831 3842 try:
3832 3843 skipflags = state.get(b'skipflags', 0)
3833 3844 if skipflags:
3834 3845 skipflags &= self.flags(rev)
3835 3846
3836 3847 _verify_revision(self, skipflags, state, node)
3837 3848
3838 3849 l1 = self.rawsize(rev)
3839 3850 l2 = len(self.rawdata(node))
3840 3851
3841 3852 if l1 != l2:
3842 3853 yield revlogproblem(
3843 3854 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3844 3855 node=node,
3845 3856 )
3846 3857
3847 3858 except error.CensoredNodeError:
3848 3859 if state[b'erroroncensored']:
3849 3860 yield revlogproblem(
3850 3861 error=_(b'censored file data'), node=node
3851 3862 )
3852 3863 state[b'skipread'].add(node)
3853 3864 except Exception as e:
3854 3865 yield revlogproblem(
3855 3866 error=_(b'unpacking %s: %s')
3856 3867 % (short(node), stringutil.forcebytestr(e)),
3857 3868 node=node,
3858 3869 )
3859 3870 state[b'skipread'].add(node)
3860 3871
3861 3872 def storageinfo(
3862 3873 self,
3863 3874 exclusivefiles=False,
3864 3875 sharedfiles=False,
3865 3876 revisionscount=False,
3866 3877 trackedsize=False,
3867 3878 storedsize=False,
3868 3879 ):
3869 3880 d = {}
3870 3881
3871 3882 if exclusivefiles:
3872 3883 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3873 3884 if not self._inline:
3874 3885 d[b'exclusivefiles'].append((self.opener, self._datafile))
3875 3886
3876 3887 if sharedfiles:
3877 3888 d[b'sharedfiles'] = []
3878 3889
3879 3890 if revisionscount:
3880 3891 d[b'revisionscount'] = len(self)
3881 3892
3882 3893 if trackedsize:
3883 3894 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3884 3895
3885 3896 if storedsize:
3886 3897 d[b'storedsize'] = sum(
3887 3898 self.opener.stat(path).st_size for path in self.files()
3888 3899 )
3889 3900
3890 3901 return d
3891 3902
3892 3903 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3893 3904 if not self.feature_config.has_side_data:
3894 3905 return
3895 3906 # revlog formats with sidedata support does not support inline
3896 3907 assert not self._inline
3897 3908 if not helpers[1] and not helpers[2]:
3898 3909 # Nothing to generate or remove
3899 3910 return
3900 3911
3901 3912 new_entries = []
3902 3913 # append the new sidedata
3903 3914 with self._writing(transaction):
3904 3915 ifh, dfh, sdfh = self._inner._writinghandles
3905 3916 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3906 3917
3907 3918 current_offset = sdfh.tell()
3908 3919 for rev in range(startrev, endrev + 1):
3909 3920 entry = self.index[rev]
3910 3921 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3911 3922 store=self,
3912 3923 sidedata_helpers=helpers,
3913 3924 sidedata={},
3914 3925 rev=rev,
3915 3926 )
3916 3927
3917 3928 serialized_sidedata = sidedatautil.serialize_sidedata(
3918 3929 new_sidedata
3919 3930 )
3920 3931
3921 3932 sidedata_compression_mode = COMP_MODE_INLINE
3922 3933 if serialized_sidedata and self.feature_config.has_side_data:
3923 3934 sidedata_compression_mode = COMP_MODE_PLAIN
3924 3935 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3925 3936 if (
3926 3937 h != b'u'
3927 3938 and comp_sidedata[0] != b'\0'
3928 3939 and len(comp_sidedata) < len(serialized_sidedata)
3929 3940 ):
3930 3941 assert not h
3931 3942 if (
3932 3943 comp_sidedata[0]
3933 3944 == self._docket.default_compression_header
3934 3945 ):
3935 3946 sidedata_compression_mode = COMP_MODE_DEFAULT
3936 3947 serialized_sidedata = comp_sidedata
3937 3948 else:
3938 3949 sidedata_compression_mode = COMP_MODE_INLINE
3939 3950 serialized_sidedata = comp_sidedata
3940 3951 if entry[8] != 0 or entry[9] != 0:
3941 3952 # rewriting entries that already have sidedata is not
3942 3953 # supported yet, because it introduces garbage data in the
3943 3954 # revlog.
3944 3955 msg = b"rewriting existing sidedata is not supported yet"
3945 3956 raise error.Abort(msg)
3946 3957
3947 3958 # Apply (potential) flags to add and to remove after running
3948 3959 # the sidedata helpers
3949 3960 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3950 3961 entry_update = (
3951 3962 current_offset,
3952 3963 len(serialized_sidedata),
3953 3964 new_offset_flags,
3954 3965 sidedata_compression_mode,
3955 3966 )
3956 3967
3957 3968 # the sidedata computation might have move the file cursors around
3958 3969 sdfh.seek(current_offset, os.SEEK_SET)
3959 3970 sdfh.write(serialized_sidedata)
3960 3971 new_entries.append(entry_update)
3961 3972 current_offset += len(serialized_sidedata)
3962 3973 self._docket.sidedata_end = sdfh.tell()
3963 3974
3964 3975 # rewrite the new index entries
3965 3976 ifh.seek(startrev * self.index.entry_size)
3966 3977 for i, e in enumerate(new_entries):
3967 3978 rev = startrev + i
3968 3979 self.index.replace_sidedata_info(rev, *e)
3969 3980 packed = self.index.entry_binary(rev)
3970 3981 if rev == 0 and self._docket is None:
3971 3982 header = self._format_flags | self._format_version
3972 3983 header = self.index.pack_header(header)
3973 3984 packed = header + packed
3974 3985 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now