##// END OF EJS Templates
revlog: move `sidedata` in the inner object...
marmoute -
r51991:49d75cc1 default
parent child Browse files
Show More
@@ -1,3985 +1,3992 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 393 # 3-tuple of (node, rev, text) for a raw revision.
394 394 self._revisioncache = None
395 395
396 396 @property
397 397 def index_file(self):
398 398 return self.__index_file
399 399
400 400 @index_file.setter
401 401 def index_file(self, new_index_file):
402 402 self.__index_file = new_index_file
403 403 if self.inline:
404 404 self._segmentfile.filename = new_index_file
405 405
406 406 def __len__(self):
407 407 return len(self.index)
408 408
409 409 # Derived from index values.
410 410
411 411 def start(self, rev):
412 412 """the offset of the data chunk for this revision"""
413 413 return int(self.index[rev][0] >> 16)
414 414
415 415 def length(self, rev):
416 416 """the length of the data chunk for this revision"""
417 417 return self.index[rev][1]
418 418
419 419 def end(self, rev):
420 420 """the end of the data chunk for this revision"""
421 421 return self.start(rev) + self.length(rev)
422 422
423 423 def deltaparent(self, rev):
424 424 """return deltaparent of the given revision"""
425 425 base = self.index[rev][3]
426 426 if base == rev:
427 427 return nullrev
428 428 elif self.delta_config.general_delta:
429 429 return base
430 430 else:
431 431 return rev - 1
432 432
433 433 def issnapshot(self, rev):
434 434 """tells whether rev is a snapshot"""
435 435 if not self.delta_config.sparse_revlog:
436 436 return self.deltaparent(rev) == nullrev
437 437 elif hasattr(self.index, 'issnapshot'):
438 438 # directly assign the method to cache the testing and access
439 439 self.issnapshot = self.index.issnapshot
440 440 return self.issnapshot(rev)
441 441 if rev == nullrev:
442 442 return True
443 443 entry = self.index[rev]
444 444 base = entry[3]
445 445 if base == rev:
446 446 return True
447 447 if base == nullrev:
448 448 return True
449 449 p1 = entry[5]
450 450 while self.length(p1) == 0:
451 451 b = self.deltaparent(p1)
452 452 if b == p1:
453 453 break
454 454 p1 = b
455 455 p2 = entry[6]
456 456 while self.length(p2) == 0:
457 457 b = self.deltaparent(p2)
458 458 if b == p2:
459 459 break
460 460 p2 = b
461 461 if base == p1 or base == p2:
462 462 return False
463 463 return self.issnapshot(base)
464 464
465 465 def _deltachain(self, rev, stoprev=None):
466 466 """Obtain the delta chain for a revision.
467 467
468 468 ``stoprev`` specifies a revision to stop at. If not specified, we
469 469 stop at the base of the chain.
470 470
471 471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
472 472 revs in ascending order and ``stopped`` is a bool indicating whether
473 473 ``stoprev`` was hit.
474 474 """
475 475 generaldelta = self.delta_config.general_delta
476 476 # Try C implementation.
477 477 try:
478 478 return self.index.deltachain(rev, stoprev, generaldelta)
479 479 except AttributeError:
480 480 pass
481 481
482 482 chain = []
483 483
484 484 # Alias to prevent attribute lookup in tight loop.
485 485 index = self.index
486 486
487 487 iterrev = rev
488 488 e = index[iterrev]
489 489 while iterrev != e[3] and iterrev != stoprev:
490 490 chain.append(iterrev)
491 491 if generaldelta:
492 492 iterrev = e[3]
493 493 else:
494 494 iterrev -= 1
495 495 e = index[iterrev]
496 496
497 497 if iterrev == stoprev:
498 498 stopped = True
499 499 else:
500 500 chain.append(iterrev)
501 501 stopped = False
502 502
503 503 chain.reverse()
504 504 return chain, stopped
505 505
506 506 @util.propertycache
507 507 def _compressor(self):
508 508 engine = util.compengines[self.feature_config.compression_engine]
509 509 return engine.revlogcompressor(
510 510 self.feature_config.compression_engine_options
511 511 )
512 512
513 513 @util.propertycache
514 514 def _decompressor(self):
515 515 """the default decompressor"""
516 516 if self._default_compression_header is None:
517 517 return None
518 518 t = self._default_compression_header
519 519 c = self._get_decompressor(t)
520 520 return c.decompress
521 521
522 522 def _get_decompressor(self, t):
523 523 try:
524 524 compressor = self._decompressors[t]
525 525 except KeyError:
526 526 try:
527 527 engine = util.compengines.forrevlogheader(t)
528 528 compressor = engine.revlogcompressor(
529 529 self.feature_config.compression_engine_options
530 530 )
531 531 self._decompressors[t] = compressor
532 532 except KeyError:
533 533 raise error.RevlogError(
534 534 _(b'unknown compression type %s') % binascii.hexlify(t)
535 535 )
536 536 return compressor
537 537
538 538 def compress(self, data):
539 539 """Generate a possibly-compressed representation of data."""
540 540 if not data:
541 541 return b'', data
542 542
543 543 compressed = self._compressor.compress(data)
544 544
545 545 if compressed:
546 546 # The revlog compressor added the header in the returned data.
547 547 return b'', compressed
548 548
549 549 if data[0:1] == b'\0':
550 550 return b'', data
551 551 return b'u', data
552 552
553 553 def decompress(self, data):
554 554 """Decompress a revlog chunk.
555 555
556 556 The chunk is expected to begin with a header identifying the
557 557 format type so it can be routed to an appropriate decompressor.
558 558 """
559 559 if not data:
560 560 return data
561 561
562 562 # Revlogs are read much more frequently than they are written and many
563 563 # chunks only take microseconds to decompress, so performance is
564 564 # important here.
565 565 #
566 566 # We can make a few assumptions about revlogs:
567 567 #
568 568 # 1) the majority of chunks will be compressed (as opposed to inline
569 569 # raw data).
570 570 # 2) decompressing *any* data will likely by at least 10x slower than
571 571 # returning raw inline data.
572 572 # 3) we want to prioritize common and officially supported compression
573 573 # engines
574 574 #
575 575 # It follows that we want to optimize for "decompress compressed data
576 576 # when encoded with common and officially supported compression engines"
577 577 # case over "raw data" and "data encoded by less common or non-official
578 578 # compression engines." That is why we have the inline lookup first
579 579 # followed by the compengines lookup.
580 580 #
581 581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
582 582 # compressed chunks. And this matters for changelog and manifest reads.
583 583 t = data[0:1]
584 584
585 585 if t == b'x':
586 586 try:
587 587 return _zlibdecompress(data)
588 588 except zlib.error as e:
589 589 raise error.RevlogError(
590 590 _(b'revlog decompress error: %s')
591 591 % stringutil.forcebytestr(e)
592 592 )
593 593 # '\0' is more common than 'u' so it goes first.
594 594 elif t == b'\0':
595 595 return data
596 596 elif t == b'u':
597 597 return util.buffer(data, 1)
598 598
599 599 compressor = self._get_decompressor(t)
600 600
601 601 return compressor.decompress(data)
602 602
603 603 @contextlib.contextmanager
604 604 def reading(self):
605 605 """Context manager that keeps data and sidedata files open for reading"""
606 606 if len(self.index) == 0:
607 607 yield # nothing to be read
608 608 else:
609 609 with self._segmentfile.reading():
610 610 with self._segmentfile_sidedata.reading():
611 611 yield
612 612
613 613 @property
614 614 def is_writing(self):
615 615 """True is a writing context is open"""
616 616 return self._writinghandles is not None
617 617
618 618 @contextlib.contextmanager
619 619 def writing(self, transaction, data_end=None, sidedata_end=None):
620 620 """Open the revlog files for writing
621 621
622 622 Add content to a revlog should be done within such context.
623 623 """
624 624 if self.is_writing:
625 625 yield
626 626 else:
627 627 ifh = dfh = sdfh = None
628 628 try:
629 629 r = len(self.index)
630 630 # opening the data file.
631 631 dsize = 0
632 632 if r:
633 633 dsize = self.end(r - 1)
634 634 dfh = None
635 635 if not self.inline:
636 636 try:
637 637 dfh = self.opener(self.data_file, mode=b"r+")
638 638 if data_end is None:
639 639 dfh.seek(0, os.SEEK_END)
640 640 else:
641 641 dfh.seek(data_end, os.SEEK_SET)
642 642 except FileNotFoundError:
643 643 dfh = self.opener(self.data_file, mode=b"w+")
644 644 transaction.add(self.data_file, dsize)
645 645 if self.sidedata_file is not None:
646 646 assert sidedata_end is not None
647 647 # revlog-v2 does not inline, help Pytype
648 648 assert dfh is not None
649 649 try:
650 650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
651 651 dfh.seek(sidedata_end, os.SEEK_SET)
652 652 except FileNotFoundError:
653 653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
654 654 transaction.add(self.sidedata_file, sidedata_end)
655 655
656 656 # opening the index file.
657 657 isize = r * self.index.entry_size
658 658 ifh = self.__index_write_fp()
659 659 if self.inline:
660 660 transaction.add(self.index_file, dsize + isize)
661 661 else:
662 662 transaction.add(self.index_file, isize)
663 663 # exposing all file handle for writing.
664 664 self._writinghandles = (ifh, dfh, sdfh)
665 665 self._segmentfile.writing_handle = ifh if self.inline else dfh
666 666 self._segmentfile_sidedata.writing_handle = sdfh
667 667 yield
668 668 finally:
669 669 self._writinghandles = None
670 670 self._segmentfile.writing_handle = None
671 671 self._segmentfile_sidedata.writing_handle = None
672 672 if dfh is not None:
673 673 dfh.close()
674 674 if sdfh is not None:
675 675 sdfh.close()
676 676 # closing the index file last to avoid exposing referent to
677 677 # potential unflushed data content.
678 678 if ifh is not None:
679 679 ifh.close()
680 680
681 681 def __index_write_fp(self, index_end=None):
682 682 """internal method to open the index file for writing
683 683
684 684 You should not use this directly and use `_writing` instead
685 685 """
686 686 try:
687 687 f = self.opener(
688 688 self.index_file,
689 689 mode=b"r+",
690 690 checkambig=self.data_config.check_ambig,
691 691 )
692 692 if index_end is None:
693 693 f.seek(0, os.SEEK_END)
694 694 else:
695 695 f.seek(index_end, os.SEEK_SET)
696 696 return f
697 697 except FileNotFoundError:
698 698 return self.opener(
699 699 self.index_file,
700 700 mode=b"w+",
701 701 checkambig=self.data_config.check_ambig,
702 702 )
703 703
704 704 def __index_new_fp(self):
705 705 """internal method to create a new index file for writing
706 706
707 707 You should not use this unless you are upgrading from inline revlog
708 708 """
709 709 return self.opener(
710 710 self.index_file,
711 711 mode=b"w",
712 712 checkambig=self.data_config.check_ambig,
713 713 atomictemp=True,
714 714 )
715 715
716 716 def split_inline(self, tr, header, new_index_file_path=None):
717 717 """split the data of an inline revlog into an index and a data file"""
718 718 existing_handles = False
719 719 if self._writinghandles is not None:
720 720 existing_handles = True
721 721 fp = self._writinghandles[0]
722 722 fp.flush()
723 723 fp.close()
724 724 # We can't use the cached file handle after close(). So prevent
725 725 # its usage.
726 726 self._writinghandles = None
727 727 self._segmentfile.writing_handle = None
728 728 # No need to deal with sidedata writing handle as it is only
729 729 # relevant with revlog-v2 which is never inline, not reaching
730 730 # this code
731 731
732 732 new_dfh = self.opener(self.data_file, mode=b"w+")
733 733 new_dfh.truncate(0) # drop any potentially existing data
734 734 try:
735 735 with self.reading():
736 736 for r in range(len(self.index)):
737 737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
738 738 new_dfh.flush()
739 739
740 740 if new_index_file_path is not None:
741 741 self.index_file = new_index_file_path
742 742 with self.__index_new_fp() as fp:
743 743 self.inline = False
744 744 for i in range(len(self.index)):
745 745 e = self.index.entry_binary(i)
746 746 if i == 0:
747 747 packed_header = self.index.pack_header(header)
748 748 e = packed_header + e
749 749 fp.write(e)
750 750
751 751 # If we don't use side-write, the temp file replace the real
752 752 # index when we exit the context manager
753 753
754 754 self._segmentfile = randomaccessfile.randomaccessfile(
755 755 self.opener,
756 756 self.data_file,
757 757 self.data_config.chunk_cache_size,
758 758 )
759 759
760 760 if existing_handles:
761 761 # switched from inline to conventional reopen the index
762 762 ifh = self.__index_write_fp()
763 763 self._writinghandles = (ifh, new_dfh, None)
764 764 self._segmentfile.writing_handle = new_dfh
765 765 new_dfh = None
766 766 # No need to deal with sidedata writing handle as it is only
767 767 # relevant with revlog-v2 which is never inline, not reaching
768 768 # this code
769 769 finally:
770 770 if new_dfh is not None:
771 771 new_dfh.close()
772 772 return self.index_file
773 773
774 774 def get_segment_for_revs(self, startrev, endrev):
775 775 """Obtain a segment of raw data corresponding to a range of revisions.
776 776
777 777 Accepts the start and end revisions and an optional already-open
778 778 file handle to be used for reading. If the file handle is read, its
779 779 seek position will not be preserved.
780 780
781 781 Requests for data may be satisfied by a cache.
782 782
783 783 Returns a 2-tuple of (offset, data) for the requested range of
784 784 revisions. Offset is the integer offset from the beginning of the
785 785 revlog and data is a str or buffer of the raw byte data.
786 786
787 787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
788 788 to determine where each revision's data begins and ends.
789 789
790 790 API: we should consider making this a private part of the InnerRevlog
791 791 at some point.
792 792 """
793 793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
794 794 # (functions are expensive).
795 795 index = self.index
796 796 istart = index[startrev]
797 797 start = int(istart[0] >> 16)
798 798 if startrev == endrev:
799 799 end = start + istart[1]
800 800 else:
801 801 iend = index[endrev]
802 802 end = int(iend[0] >> 16) + iend[1]
803 803
804 804 if self.inline:
805 805 start += (startrev + 1) * self.index.entry_size
806 806 end += (endrev + 1) * self.index.entry_size
807 807 length = end - start
808 808
809 809 return start, self._segmentfile.read_chunk(start, length)
810 810
811 811 def _chunk(self, rev):
812 812 """Obtain a single decompressed chunk for a revision.
813 813
814 814 Accepts an integer revision and an optional already-open file handle
815 815 to be used for reading. If used, the seek position of the file will not
816 816 be preserved.
817 817
818 818 Returns a str holding uncompressed data for the requested revision.
819 819 """
820 820 compression_mode = self.index[rev][10]
821 821 data = self.get_segment_for_revs(rev, rev)[1]
822 822 if compression_mode == COMP_MODE_PLAIN:
823 823 return data
824 824 elif compression_mode == COMP_MODE_DEFAULT:
825 825 return self._decompressor(data)
826 826 elif compression_mode == COMP_MODE_INLINE:
827 827 return self.decompress(data)
828 828 else:
829 829 msg = b'unknown compression mode %d'
830 830 msg %= compression_mode
831 831 raise error.RevlogError(msg)
832 832
833 833 def _chunks(self, revs, targetsize=None):
834 834 """Obtain decompressed chunks for the specified revisions.
835 835
836 836 Accepts an iterable of numeric revisions that are assumed to be in
837 837 ascending order. Also accepts an optional already-open file handle
838 838 to be used for reading. If used, the seek position of the file will
839 839 not be preserved.
840 840
841 841 This function is similar to calling ``self._chunk()`` multiple times,
842 842 but is faster.
843 843
844 844 Returns a list with decompressed data for each requested revision.
845 845 """
846 846 if not revs:
847 847 return []
848 848 start = self.start
849 849 length = self.length
850 850 inline = self.inline
851 851 iosize = self.index.entry_size
852 852 buffer = util.buffer
853 853
854 854 l = []
855 855 ladd = l.append
856 856
857 857 if not self.data_config.with_sparse_read:
858 858 slicedchunks = (revs,)
859 859 else:
860 860 slicedchunks = deltautil.slicechunk(
861 861 self,
862 862 revs,
863 863 targetsize=targetsize,
864 864 )
865 865
866 866 for revschunk in slicedchunks:
867 867 firstrev = revschunk[0]
868 868 # Skip trailing revisions with empty diff
869 869 for lastrev in revschunk[::-1]:
870 870 if length(lastrev) != 0:
871 871 break
872 872
873 873 try:
874 874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
875 875 except OverflowError:
876 876 # issue4215 - we can't cache a run of chunks greater than
877 877 # 2G on Windows
878 878 return [self._chunk(rev) for rev in revschunk]
879 879
880 880 decomp = self.decompress
881 881 # self._decompressor might be None, but will not be used in that case
882 882 def_decomp = self._decompressor
883 883 for rev in revschunk:
884 884 chunkstart = start(rev)
885 885 if inline:
886 886 chunkstart += (rev + 1) * iosize
887 887 chunklength = length(rev)
888 888 comp_mode = self.index[rev][10]
889 889 c = buffer(data, chunkstart - offset, chunklength)
890 890 if comp_mode == COMP_MODE_PLAIN:
891 891 ladd(c)
892 892 elif comp_mode == COMP_MODE_INLINE:
893 893 ladd(decomp(c))
894 894 elif comp_mode == COMP_MODE_DEFAULT:
895 895 ladd(def_decomp(c))
896 896 else:
897 897 msg = b'unknown compression mode %d'
898 898 msg %= comp_mode
899 899 raise error.RevlogError(msg)
900 900
901 901 return l
902 902
903 903 def raw_text(self, node, rev):
904 904 """return the possibly unvalidated rawtext for a revision
905 905
906 906 returns (rev, rawtext, validated)
907 907 """
908 908
909 909 # revision in the cache (could be useful to apply delta)
910 910 cachedrev = None
911 911 # An intermediate text to apply deltas to
912 912 basetext = None
913 913
914 914 # Check if we have the entry in cache
915 915 # The cache entry looks like (node, rev, rawtext)
916 916 if self._revisioncache:
917 917 cachedrev = self._revisioncache[1]
918 918
919 919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
920 920 if stopped:
921 921 basetext = self._revisioncache[2]
922 922
923 923 # drop cache to save memory, the caller is expected to
924 924 # update self._inner._revisioncache after validating the text
925 925 self._revisioncache = None
926 926
927 927 targetsize = None
928 928 rawsize = self.index[rev][2]
929 929 if 0 <= rawsize:
930 930 targetsize = 4 * rawsize
931 931
932 932 bins = self._chunks(chain, targetsize=targetsize)
933 933 if basetext is None:
934 934 basetext = bytes(bins[0])
935 935 bins = bins[1:]
936 936
937 937 rawtext = mdiff.patches(basetext, bins)
938 938 del basetext # let us have a chance to free memory early
939 939 return (rev, rawtext, False)
940 940
941 def sidedata(self, rev, sidedata_end):
942 """Return the sidedata for a given revision number."""
943 index_entry = self.index[rev]
944 sidedata_offset = index_entry[8]
945 sidedata_size = index_entry[9]
946
947 if self.inline:
948 sidedata_offset += self.index.entry_size * (1 + rev)
949 if sidedata_size == 0:
950 return {}
951
952 if sidedata_end < sidedata_offset + sidedata_size:
953 filename = self.sidedata_file
954 end = sidedata_end
955 offset = sidedata_offset
956 length = sidedata_size
957 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
958 raise error.RevlogError(m)
959
960 comp_segment = self._segmentfile_sidedata.read_chunk(
961 sidedata_offset, sidedata_size
962 )
963
964 comp = self.index[rev][11]
965 if comp == COMP_MODE_PLAIN:
966 segment = comp_segment
967 elif comp == COMP_MODE_DEFAULT:
968 segment = self._decompressor(comp_segment)
969 elif comp == COMP_MODE_INLINE:
970 segment = self.decompress(comp_segment)
971 else:
972 msg = b'unknown compression mode %d'
973 msg %= comp
974 raise error.RevlogError(msg)
975
976 sidedata = sidedatautil.deserialize_sidedata(segment)
977 return sidedata
978
941 979
942 980 class revlog:
943 981 """
944 982 the underlying revision storage object
945 983
946 984 A revlog consists of two parts, an index and the revision data.
947 985
948 986 The index is a file with a fixed record size containing
949 987 information on each revision, including its nodeid (hash), the
950 988 nodeids of its parents, the position and offset of its data within
951 989 the data file, and the revision it's based on. Finally, each entry
952 990 contains a linkrev entry that can serve as a pointer to external
953 991 data.
954 992
955 993 The revision data itself is a linear collection of data chunks.
956 994 Each chunk represents a revision and is usually represented as a
957 995 delta against the previous chunk. To bound lookup time, runs of
958 996 deltas are limited to about 2 times the length of the original
959 997 version data. This makes retrieval of a version proportional to
960 998 its size, or O(1) relative to the number of revisions.
961 999
962 1000 Both pieces of the revlog are written to in an append-only
963 1001 fashion, which means we never need to rewrite a file to insert or
964 1002 remove data, and can use some simple techniques to avoid the need
965 1003 for locking while reading.
966 1004
967 1005 If checkambig, indexfile is opened with checkambig=True at
968 1006 writing, to avoid file stat ambiguity.
969 1007
970 1008 If mmaplargeindex is True, and an mmapindexthreshold is set, the
971 1009 index will be mmapped rather than read if it is larger than the
972 1010 configured threshold.
973 1011
974 1012 If censorable is True, the revlog can have censored revisions.
975 1013
976 1014 If `upperboundcomp` is not None, this is the expected maximal gain from
977 1015 compression for the data content.
978 1016
979 1017 `concurrencychecker` is an optional function that receives 3 arguments: a
980 1018 file handle, a filename, and an expected position. It should check whether
981 1019 the current position in the file handle is valid, and log/warn/fail (by
982 1020 raising).
983 1021
984 1022 See mercurial/revlogutils/contants.py for details about the content of an
985 1023 index entry.
986 1024 """
987 1025
988 1026 _flagserrorclass = error.RevlogError
989 1027
990 1028 @staticmethod
991 1029 def is_inline_index(header_bytes):
992 1030 """Determine if a revlog is inline from the initial bytes of the index"""
993 1031 header = INDEX_HEADER.unpack(header_bytes)[0]
994 1032
995 1033 _format_flags = header & ~0xFFFF
996 1034 _format_version = header & 0xFFFF
997 1035
998 1036 features = FEATURES_BY_VERSION[_format_version]
999 1037 return features[b'inline'](_format_flags)
1000 1038
1001 1039 def __init__(
1002 1040 self,
1003 1041 opener,
1004 1042 target,
1005 1043 radix,
1006 1044 postfix=None, # only exist for `tmpcensored` now
1007 1045 checkambig=False,
1008 1046 mmaplargeindex=False,
1009 1047 censorable=False,
1010 1048 upperboundcomp=None,
1011 1049 persistentnodemap=False,
1012 1050 concurrencychecker=None,
1013 1051 trypending=False,
1014 1052 try_split=False,
1015 1053 canonical_parent_order=True,
1016 1054 ):
1017 1055 """
1018 1056 create a revlog object
1019 1057
1020 1058 opener is a function that abstracts the file opening operation
1021 1059 and can be used to implement COW semantics or the like.
1022 1060
1023 1061 `target`: a (KIND, ID) tuple that identify the content stored in
1024 1062 this revlog. It help the rest of the code to understand what the revlog
1025 1063 is about without having to resort to heuristic and index filename
1026 1064 analysis. Note: that this must be reliably be set by normal code, but
1027 1065 that test, debug, or performance measurement code might not set this to
1028 1066 accurate value.
1029 1067 """
1030 1068
1031 1069 self.radix = radix
1032 1070
1033 1071 self._docket_file = None
1034 1072 self._indexfile = None
1035 1073 self._datafile = None
1036 1074 self._sidedatafile = None
1037 1075 self._nodemap_file = None
1038 1076 self.postfix = postfix
1039 1077 self._trypending = trypending
1040 1078 self._try_split = try_split
1041 1079 self.opener = opener
1042 1080 if persistentnodemap:
1043 1081 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1044 1082
1045 1083 assert target[0] in ALL_KINDS
1046 1084 assert len(target) == 2
1047 1085 self.target = target
1048 1086 if b'feature-config' in self.opener.options:
1049 1087 self.feature_config = self.opener.options[b'feature-config'].copy()
1050 1088 else:
1051 1089 self.feature_config = FeatureConfig()
1052 1090 self.feature_config.censorable = censorable
1053 1091 self.feature_config.canonical_parent_order = canonical_parent_order
1054 1092 if b'data-config' in self.opener.options:
1055 1093 self.data_config = self.opener.options[b'data-config'].copy()
1056 1094 else:
1057 1095 self.data_config = DataConfig()
1058 1096 self.data_config.check_ambig = checkambig
1059 1097 self.data_config.mmap_large_index = mmaplargeindex
1060 1098 if b'delta-config' in self.opener.options:
1061 1099 self.delta_config = self.opener.options[b'delta-config'].copy()
1062 1100 else:
1063 1101 self.delta_config = DeltaConfig()
1064 1102 self.delta_config.upper_bound_comp = upperboundcomp
1065 1103
1066 1104 # Maps rev to chain base rev.
1067 1105 self._chainbasecache = util.lrucachedict(100)
1068 1106
1069 1107 self.index = None
1070 1108 self._docket = None
1071 1109 self._nodemap_docket = None
1072 1110 # Mapping of partial identifiers to full nodes.
1073 1111 self._pcache = {}
1074 1112
1075 1113 # other optionnals features
1076 1114
1077 1115 # Make copy of flag processors so each revlog instance can support
1078 1116 # custom flags.
1079 1117 self._flagprocessors = dict(flagutil.flagprocessors)
1080 1118 # prevent nesting of addgroup
1081 1119 self._adding_group = None
1082 1120
1083 1121 chunk_cache = self._loadindex()
1084 1122 self._load_inner(chunk_cache)
1085 1123 self._concurrencychecker = concurrencychecker
1086 1124
1087 1125 @property
1088 1126 def _generaldelta(self):
1089 1127 """temporary compatibility proxy"""
1090 1128 util.nouideprecwarn(
1091 1129 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1092 1130 )
1093 1131 return self.delta_config.general_delta
1094 1132
1095 1133 @property
1096 1134 def _checkambig(self):
1097 1135 """temporary compatibility proxy"""
1098 1136 util.nouideprecwarn(
1099 1137 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1100 1138 )
1101 1139 return self.data_config.check_ambig
1102 1140
1103 1141 @property
1104 1142 def _mmaplargeindex(self):
1105 1143 """temporary compatibility proxy"""
1106 1144 util.nouideprecwarn(
1107 1145 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1108 1146 )
1109 1147 return self.data_config.mmap_large_index
1110 1148
1111 1149 @property
1112 1150 def _censorable(self):
1113 1151 """temporary compatibility proxy"""
1114 1152 util.nouideprecwarn(
1115 1153 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1116 1154 )
1117 1155 return self.feature_config.censorable
1118 1156
1119 1157 @property
1120 1158 def _chunkcachesize(self):
1121 1159 """temporary compatibility proxy"""
1122 1160 util.nouideprecwarn(
1123 1161 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1124 1162 )
1125 1163 return self.data_config.chunk_cache_size
1126 1164
1127 1165 @property
1128 1166 def _maxchainlen(self):
1129 1167 """temporary compatibility proxy"""
1130 1168 util.nouideprecwarn(
1131 1169 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1132 1170 )
1133 1171 return self.delta_config.max_chain_len
1134 1172
1135 1173 @property
1136 1174 def _deltabothparents(self):
1137 1175 """temporary compatibility proxy"""
1138 1176 util.nouideprecwarn(
1139 1177 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1140 1178 )
1141 1179 return self.delta_config.delta_both_parents
1142 1180
1143 1181 @property
1144 1182 def _candidate_group_chunk_size(self):
1145 1183 """temporary compatibility proxy"""
1146 1184 util.nouideprecwarn(
1147 1185 b"use revlog.delta_config.candidate_group_chunk_size",
1148 1186 b"6.6",
1149 1187 stacklevel=2,
1150 1188 )
1151 1189 return self.delta_config.candidate_group_chunk_size
1152 1190
1153 1191 @property
1154 1192 def _debug_delta(self):
1155 1193 """temporary compatibility proxy"""
1156 1194 util.nouideprecwarn(
1157 1195 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1158 1196 )
1159 1197 return self.delta_config.debug_delta
1160 1198
1161 1199 @property
1162 1200 def _compengine(self):
1163 1201 """temporary compatibility proxy"""
1164 1202 util.nouideprecwarn(
1165 1203 b"use revlog.feature_config.compression_engine",
1166 1204 b"6.6",
1167 1205 stacklevel=2,
1168 1206 )
1169 1207 return self.feature_config.compression_engine
1170 1208
1171 1209 @property
1172 1210 def upperboundcomp(self):
1173 1211 """temporary compatibility proxy"""
1174 1212 util.nouideprecwarn(
1175 1213 b"use revlog.delta_config.upper_bound_comp",
1176 1214 b"6.6",
1177 1215 stacklevel=2,
1178 1216 )
1179 1217 return self.delta_config.upper_bound_comp
1180 1218
1181 1219 @property
1182 1220 def _compengineopts(self):
1183 1221 """temporary compatibility proxy"""
1184 1222 util.nouideprecwarn(
1185 1223 b"use revlog.feature_config.compression_engine_options",
1186 1224 b"6.6",
1187 1225 stacklevel=2,
1188 1226 )
1189 1227 return self.feature_config.compression_engine_options
1190 1228
1191 1229 @property
1192 1230 def _maxdeltachainspan(self):
1193 1231 """temporary compatibility proxy"""
1194 1232 util.nouideprecwarn(
1195 1233 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1196 1234 )
1197 1235 return self.delta_config.max_deltachain_span
1198 1236
1199 1237 @property
1200 1238 def _withsparseread(self):
1201 1239 """temporary compatibility proxy"""
1202 1240 util.nouideprecwarn(
1203 1241 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1204 1242 )
1205 1243 return self.data_config.with_sparse_read
1206 1244
1207 1245 @property
1208 1246 def _sparserevlog(self):
1209 1247 """temporary compatibility proxy"""
1210 1248 util.nouideprecwarn(
1211 1249 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1212 1250 )
1213 1251 return self.delta_config.sparse_revlog
1214 1252
1215 1253 @property
1216 1254 def hassidedata(self):
1217 1255 """temporary compatibility proxy"""
1218 1256 util.nouideprecwarn(
1219 1257 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1220 1258 )
1221 1259 return self.feature_config.has_side_data
1222 1260
1223 1261 @property
1224 1262 def _srdensitythreshold(self):
1225 1263 """temporary compatibility proxy"""
1226 1264 util.nouideprecwarn(
1227 1265 b"use revlog.data_config.sr_density_threshold",
1228 1266 b"6.6",
1229 1267 stacklevel=2,
1230 1268 )
1231 1269 return self.data_config.sr_density_threshold
1232 1270
1233 1271 @property
1234 1272 def _srmingapsize(self):
1235 1273 """temporary compatibility proxy"""
1236 1274 util.nouideprecwarn(
1237 1275 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1238 1276 )
1239 1277 return self.data_config.sr_min_gap_size
1240 1278
1241 1279 @property
1242 1280 def _compute_rank(self):
1243 1281 """temporary compatibility proxy"""
1244 1282 util.nouideprecwarn(
1245 1283 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1246 1284 )
1247 1285 return self.feature_config.compute_rank
1248 1286
1249 1287 @property
1250 1288 def canonical_parent_order(self):
1251 1289 """temporary compatibility proxy"""
1252 1290 util.nouideprecwarn(
1253 1291 b"use revlog.feature_config.canonical_parent_order",
1254 1292 b"6.6",
1255 1293 stacklevel=2,
1256 1294 )
1257 1295 return self.feature_config.canonical_parent_order
1258 1296
1259 1297 @property
1260 1298 def _lazydelta(self):
1261 1299 """temporary compatibility proxy"""
1262 1300 util.nouideprecwarn(
1263 1301 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1264 1302 )
1265 1303 return self.delta_config.lazy_delta
1266 1304
1267 1305 @property
1268 1306 def _lazydeltabase(self):
1269 1307 """temporary compatibility proxy"""
1270 1308 util.nouideprecwarn(
1271 1309 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1272 1310 )
1273 1311 return self.delta_config.lazy_delta_base
1274 1312
1275 1313 def _init_opts(self):
1276 1314 """process options (from above/config) to setup associated default revlog mode
1277 1315
1278 1316 These values might be affected when actually reading on disk information.
1279 1317
1280 1318 The relevant values are returned for use in _loadindex().
1281 1319
1282 1320 * newversionflags:
1283 1321 version header to use if we need to create a new revlog
1284 1322
1285 1323 * mmapindexthreshold:
1286 1324 minimal index size for start to use mmap
1287 1325
1288 1326 * force_nodemap:
1289 1327 force the usage of a "development" version of the nodemap code
1290 1328 """
1291 1329 opts = self.opener.options
1292 1330
1293 1331 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1294 1332 new_header = CHANGELOGV2
1295 1333 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1296 1334 self.feature_config.compute_rank = compute_rank
1297 1335 elif b'revlogv2' in opts:
1298 1336 new_header = REVLOGV2
1299 1337 elif b'revlogv1' in opts:
1300 1338 new_header = REVLOGV1 | FLAG_INLINE_DATA
1301 1339 if b'generaldelta' in opts:
1302 1340 new_header |= FLAG_GENERALDELTA
1303 1341 elif b'revlogv0' in self.opener.options:
1304 1342 new_header = REVLOGV0
1305 1343 else:
1306 1344 new_header = REVLOG_DEFAULT_VERSION
1307 1345
1308 1346 mmapindexthreshold = None
1309 1347 if self.data_config.mmap_large_index:
1310 1348 mmapindexthreshold = self.data_config.mmap_index_threshold
1311 1349 if self.feature_config.enable_ellipsis:
1312 1350 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1313 1351
1314 1352 # revlog v0 doesn't have flag processors
1315 1353 for flag, processor in opts.get(b'flagprocessors', {}).items():
1316 1354 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1317 1355
1318 1356 chunk_cache_size = self.data_config.chunk_cache_size
1319 1357 if chunk_cache_size <= 0:
1320 1358 raise error.RevlogError(
1321 1359 _(b'revlog chunk cache size %r is not greater than 0')
1322 1360 % chunk_cache_size
1323 1361 )
1324 1362 elif chunk_cache_size & (chunk_cache_size - 1):
1325 1363 raise error.RevlogError(
1326 1364 _(b'revlog chunk cache size %r is not a power of 2')
1327 1365 % chunk_cache_size
1328 1366 )
1329 1367 force_nodemap = opts.get(b'devel-force-nodemap', False)
1330 1368 return new_header, mmapindexthreshold, force_nodemap
1331 1369
1332 1370 def _get_data(self, filepath, mmap_threshold, size=None):
1333 1371 """return a file content with or without mmap
1334 1372
1335 1373 If the file is missing return the empty string"""
1336 1374 try:
1337 1375 with self.opener(filepath) as fp:
1338 1376 if mmap_threshold is not None:
1339 1377 file_size = self.opener.fstat(fp).st_size
1340 1378 if file_size >= mmap_threshold:
1341 1379 if size is not None:
1342 1380 # avoid potentiel mmap crash
1343 1381 size = min(file_size, size)
1344 1382 # TODO: should .close() to release resources without
1345 1383 # relying on Python GC
1346 1384 if size is None:
1347 1385 return util.buffer(util.mmapread(fp))
1348 1386 else:
1349 1387 return util.buffer(util.mmapread(fp, size))
1350 1388 if size is None:
1351 1389 return fp.read()
1352 1390 else:
1353 1391 return fp.read(size)
1354 1392 except FileNotFoundError:
1355 1393 return b''
1356 1394
1357 1395 def get_streams(self, max_linkrev, force_inline=False):
1358 1396 """return a list of streams that represent this revlog
1359 1397
1360 1398 This is used by stream-clone to do bytes to bytes copies of a repository.
1361 1399
1362 1400 This streams data for all revisions that refer to a changelog revision up
1363 1401 to `max_linkrev`.
1364 1402
1365 1403 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1366 1404
1367 1405 It returns is a list of three-tuple:
1368 1406
1369 1407 [
1370 1408 (filename, bytes_stream, stream_size),
1371 1409 …
1372 1410 ]
1373 1411 """
1374 1412 n = len(self)
1375 1413 index = self.index
1376 1414 while n > 0:
1377 1415 linkrev = index[n - 1][4]
1378 1416 if linkrev < max_linkrev:
1379 1417 break
1380 1418 # note: this loop will rarely go through multiple iterations, since
1381 1419 # it only traverses commits created during the current streaming
1382 1420 # pull operation.
1383 1421 #
1384 1422 # If this become a problem, using a binary search should cap the
1385 1423 # runtime of this.
1386 1424 n = n - 1
1387 1425 if n == 0:
1388 1426 # no data to send
1389 1427 return []
1390 1428 index_size = n * index.entry_size
1391 1429 data_size = self.end(n - 1)
1392 1430
1393 1431 # XXX we might have been split (or stripped) since the object
1394 1432 # initialization, We need to close this race too, but having a way to
1395 1433 # pre-open the file we feed to the revlog and never closing them before
1396 1434 # we are done streaming.
1397 1435
1398 1436 if self._inline:
1399 1437
1400 1438 def get_stream():
1401 1439 with self.opener(self._indexfile, mode=b"r") as fp:
1402 1440 yield None
1403 1441 size = index_size + data_size
1404 1442 if size <= 65536:
1405 1443 yield fp.read(size)
1406 1444 else:
1407 1445 yield from util.filechunkiter(fp, limit=size)
1408 1446
1409 1447 inline_stream = get_stream()
1410 1448 next(inline_stream)
1411 1449 return [
1412 1450 (self._indexfile, inline_stream, index_size + data_size),
1413 1451 ]
1414 1452 elif force_inline:
1415 1453
1416 1454 def get_stream():
1417 1455 with self.reading():
1418 1456 yield None
1419 1457
1420 1458 for rev in range(n):
1421 1459 idx = self.index.entry_binary(rev)
1422 1460 if rev == 0 and self._docket is None:
1423 1461 # re-inject the inline flag
1424 1462 header = self._format_flags
1425 1463 header |= self._format_version
1426 1464 header |= FLAG_INLINE_DATA
1427 1465 header = self.index.pack_header(header)
1428 1466 idx = header + idx
1429 1467 yield idx
1430 1468 yield self._inner.get_segment_for_revs(rev, rev)[1]
1431 1469
1432 1470 inline_stream = get_stream()
1433 1471 next(inline_stream)
1434 1472 return [
1435 1473 (self._indexfile, inline_stream, index_size + data_size),
1436 1474 ]
1437 1475 else:
1438 1476
1439 1477 def get_index_stream():
1440 1478 with self.opener(self._indexfile, mode=b"r") as fp:
1441 1479 yield None
1442 1480 if index_size <= 65536:
1443 1481 yield fp.read(index_size)
1444 1482 else:
1445 1483 yield from util.filechunkiter(fp, limit=index_size)
1446 1484
1447 1485 def get_data_stream():
1448 1486 with self._datafp() as fp:
1449 1487 yield None
1450 1488 if data_size <= 65536:
1451 1489 yield fp.read(data_size)
1452 1490 else:
1453 1491 yield from util.filechunkiter(fp, limit=data_size)
1454 1492
1455 1493 index_stream = get_index_stream()
1456 1494 next(index_stream)
1457 1495 data_stream = get_data_stream()
1458 1496 next(data_stream)
1459 1497 return [
1460 1498 (self._datafile, data_stream, data_size),
1461 1499 (self._indexfile, index_stream, index_size),
1462 1500 ]
1463 1501
1464 1502 def _loadindex(self, docket=None):
1465 1503
1466 1504 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1467 1505
1468 1506 if self.postfix is not None:
1469 1507 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1470 1508 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1471 1509 entry_point = b'%s.i.a' % self.radix
1472 1510 elif self._try_split and self.opener.exists(self._split_index_file):
1473 1511 entry_point = self._split_index_file
1474 1512 else:
1475 1513 entry_point = b'%s.i' % self.radix
1476 1514
1477 1515 if docket is not None:
1478 1516 self._docket = docket
1479 1517 self._docket_file = entry_point
1480 1518 else:
1481 1519 self._initempty = True
1482 1520 entry_data = self._get_data(entry_point, mmapindexthreshold)
1483 1521 if len(entry_data) > 0:
1484 1522 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1485 1523 self._initempty = False
1486 1524 else:
1487 1525 header = new_header
1488 1526
1489 1527 self._format_flags = header & ~0xFFFF
1490 1528 self._format_version = header & 0xFFFF
1491 1529
1492 1530 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1493 1531 if supported_flags is None:
1494 1532 msg = _(b'unknown version (%d) in revlog %s')
1495 1533 msg %= (self._format_version, self.display_id)
1496 1534 raise error.RevlogError(msg)
1497 1535 elif self._format_flags & ~supported_flags:
1498 1536 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1499 1537 display_flag = self._format_flags >> 16
1500 1538 msg %= (display_flag, self._format_version, self.display_id)
1501 1539 raise error.RevlogError(msg)
1502 1540
1503 1541 features = FEATURES_BY_VERSION[self._format_version]
1504 1542 self._inline = features[b'inline'](self._format_flags)
1505 1543 self.delta_config.general_delta = features[b'generaldelta'](
1506 1544 self._format_flags
1507 1545 )
1508 1546 self.feature_config.has_side_data = features[b'sidedata']
1509 1547
1510 1548 if not features[b'docket']:
1511 1549 self._indexfile = entry_point
1512 1550 index_data = entry_data
1513 1551 else:
1514 1552 self._docket_file = entry_point
1515 1553 if self._initempty:
1516 1554 self._docket = docketutil.default_docket(self, header)
1517 1555 else:
1518 1556 self._docket = docketutil.parse_docket(
1519 1557 self, entry_data, use_pending=self._trypending
1520 1558 )
1521 1559
1522 1560 if self._docket is not None:
1523 1561 self._indexfile = self._docket.index_filepath()
1524 1562 index_data = b''
1525 1563 index_size = self._docket.index_end
1526 1564 if index_size > 0:
1527 1565 index_data = self._get_data(
1528 1566 self._indexfile, mmapindexthreshold, size=index_size
1529 1567 )
1530 1568 if len(index_data) < index_size:
1531 1569 msg = _(b'too few index data for %s: got %d, expected %d')
1532 1570 msg %= (self.display_id, len(index_data), index_size)
1533 1571 raise error.RevlogError(msg)
1534 1572
1535 1573 self._inline = False
1536 1574 # generaldelta implied by version 2 revlogs.
1537 1575 self.delta_config.general_delta = True
1538 1576 # the logic for persistent nodemap will be dealt with within the
1539 1577 # main docket, so disable it for now.
1540 1578 self._nodemap_file = None
1541 1579
1542 1580 if self._docket is not None:
1543 1581 self._datafile = self._docket.data_filepath()
1544 1582 self._sidedatafile = self._docket.sidedata_filepath()
1545 1583 elif self.postfix is None:
1546 1584 self._datafile = b'%s.d' % self.radix
1547 1585 else:
1548 1586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1549 1587
1550 1588 self.nodeconstants = sha1nodeconstants
1551 1589 self.nullid = self.nodeconstants.nullid
1552 1590
1553 1591 # sparse-revlog can't be on without general-delta (issue6056)
1554 1592 if not self.delta_config.general_delta:
1555 1593 self.delta_config.sparse_revlog = False
1556 1594
1557 1595 self._storedeltachains = True
1558 1596
1559 1597 devel_nodemap = (
1560 1598 self._nodemap_file
1561 1599 and force_nodemap
1562 1600 and parse_index_v1_nodemap is not None
1563 1601 )
1564 1602
1565 1603 use_rust_index = False
1566 1604 if rustrevlog is not None:
1567 1605 if self._nodemap_file is not None:
1568 1606 use_rust_index = True
1569 1607 else:
1570 1608 use_rust_index = self.opener.options.get(b'rust.index')
1571 1609
1572 1610 self._parse_index = parse_index_v1
1573 1611 if self._format_version == REVLOGV0:
1574 1612 self._parse_index = revlogv0.parse_index_v0
1575 1613 elif self._format_version == REVLOGV2:
1576 1614 self._parse_index = parse_index_v2
1577 1615 elif self._format_version == CHANGELOGV2:
1578 1616 self._parse_index = parse_index_cl_v2
1579 1617 elif devel_nodemap:
1580 1618 self._parse_index = parse_index_v1_nodemap
1581 1619 elif use_rust_index:
1582 1620 self._parse_index = parse_index_v1_mixed
1583 1621 try:
1584 1622 d = self._parse_index(index_data, self._inline)
1585 1623 index, chunkcache = d
1586 1624 use_nodemap = (
1587 1625 not self._inline
1588 1626 and self._nodemap_file is not None
1589 1627 and hasattr(index, 'update_nodemap_data')
1590 1628 )
1591 1629 if use_nodemap:
1592 1630 nodemap_data = nodemaputil.persisted_data(self)
1593 1631 if nodemap_data is not None:
1594 1632 docket = nodemap_data[0]
1595 1633 if (
1596 1634 len(d[0]) > docket.tip_rev
1597 1635 and d[0][docket.tip_rev][7] == docket.tip_node
1598 1636 ):
1599 1637 # no changelog tampering
1600 1638 self._nodemap_docket = docket
1601 1639 index.update_nodemap_data(*nodemap_data)
1602 1640 except (ValueError, IndexError):
1603 1641 raise error.RevlogError(
1604 1642 _(b"index %s is corrupted") % self.display_id
1605 1643 )
1606 1644 self.index = index
1607 1645 # revnum -> (chain-length, sum-delta-length)
1608 1646 self._chaininfocache = util.lrucachedict(500)
1609 1647
1610 1648 return chunkcache
1611 1649
1612 1650 def _load_inner(self, chunk_cache):
1613 1651 if self._docket is None:
1614 1652 default_compression_header = None
1615 1653 else:
1616 1654 default_compression_header = self._docket.default_compression_header
1617 1655
1618 1656 self._inner = _InnerRevlog(
1619 1657 opener=self.opener,
1620 1658 index=self.index,
1621 1659 index_file=self._indexfile,
1622 1660 data_file=self._datafile,
1623 1661 sidedata_file=self._sidedatafile,
1624 1662 inline=self._inline,
1625 1663 data_config=self.data_config,
1626 1664 delta_config=self.delta_config,
1627 1665 feature_config=self.feature_config,
1628 1666 chunk_cache=chunk_cache,
1629 1667 default_compression_header=default_compression_header,
1630 1668 )
1631 1669
1632 1670 def get_revlog(self):
1633 1671 """simple function to mirror API of other not-really-revlog API"""
1634 1672 return self
1635 1673
1636 1674 @util.propertycache
1637 1675 def revlog_kind(self):
1638 1676 return self.target[0]
1639 1677
1640 1678 @util.propertycache
1641 1679 def display_id(self):
1642 1680 """The public facing "ID" of the revlog that we use in message"""
1643 1681 if self.revlog_kind == KIND_FILELOG:
1644 1682 # Reference the file without the "data/" prefix, so it is familiar
1645 1683 # to the user.
1646 1684 return self.target[1]
1647 1685 else:
1648 1686 return self.radix
1649 1687
1650 1688 def _datafp(self, mode=b'r'):
1651 1689 """file object for the revlog's data file"""
1652 1690 return self.opener(self._datafile, mode=mode)
1653 1691
1654 1692 def tiprev(self):
1655 1693 return len(self.index) - 1
1656 1694
1657 1695 def tip(self):
1658 1696 return self.node(self.tiprev())
1659 1697
1660 1698 def __contains__(self, rev):
1661 1699 return 0 <= rev < len(self)
1662 1700
1663 1701 def __len__(self):
1664 1702 return len(self.index)
1665 1703
1666 1704 def __iter__(self):
1667 1705 return iter(range(len(self)))
1668 1706
1669 1707 def revs(self, start=0, stop=None):
1670 1708 """iterate over all rev in this revlog (from start to stop)"""
1671 1709 return storageutil.iterrevs(len(self), start=start, stop=stop)
1672 1710
1673 1711 def hasnode(self, node):
1674 1712 try:
1675 1713 self.rev(node)
1676 1714 return True
1677 1715 except KeyError:
1678 1716 return False
1679 1717
1680 1718 def _candelta(self, baserev, rev):
1681 1719 """whether two revisions (baserev, rev) can be delta-ed or not"""
1682 1720 # Disable delta if either rev requires a content-changing flag
1683 1721 # processor (ex. LFS). This is because such flag processor can alter
1684 1722 # the rawtext content that the delta will be based on, and two clients
1685 1723 # could have a same revlog node with different flags (i.e. different
1686 1724 # rawtext contents) and the delta could be incompatible.
1687 1725 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1688 1726 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1689 1727 ):
1690 1728 return False
1691 1729 return True
1692 1730
1693 1731 def update_caches(self, transaction):
1694 1732 """update on disk cache
1695 1733
1696 1734 If a transaction is passed, the update may be delayed to transaction
1697 1735 commit."""
1698 1736 if self._nodemap_file is not None:
1699 1737 if transaction is None:
1700 1738 nodemaputil.update_persistent_nodemap(self)
1701 1739 else:
1702 1740 nodemaputil.setup_persistent_nodemap(transaction, self)
1703 1741
1704 1742 def clearcaches(self):
1705 1743 """Clear in-memory caches"""
1706 1744 self._inner._revisioncache = None
1707 1745 self._chainbasecache.clear()
1708 1746 self._inner._segmentfile.clear_cache()
1709 1747 self._inner._segmentfile_sidedata.clear_cache()
1710 1748 self._pcache = {}
1711 1749 self._nodemap_docket = None
1712 1750 self.index.clearcaches()
1713 1751 # The python code is the one responsible for validating the docket, we
1714 1752 # end up having to refresh it here.
1715 1753 use_nodemap = (
1716 1754 not self._inline
1717 1755 and self._nodemap_file is not None
1718 1756 and hasattr(self.index, 'update_nodemap_data')
1719 1757 )
1720 1758 if use_nodemap:
1721 1759 nodemap_data = nodemaputil.persisted_data(self)
1722 1760 if nodemap_data is not None:
1723 1761 self._nodemap_docket = nodemap_data[0]
1724 1762 self.index.update_nodemap_data(*nodemap_data)
1725 1763
1726 1764 def rev(self, node):
1727 1765 """return the revision number associated with a <nodeid>"""
1728 1766 try:
1729 1767 return self.index.rev(node)
1730 1768 except TypeError:
1731 1769 raise
1732 1770 except error.RevlogError:
1733 1771 # parsers.c radix tree lookup failed
1734 1772 if (
1735 1773 node == self.nodeconstants.wdirid
1736 1774 or node in self.nodeconstants.wdirfilenodeids
1737 1775 ):
1738 1776 raise error.WdirUnsupported
1739 1777 raise error.LookupError(node, self.display_id, _(b'no node'))
1740 1778
1741 1779 # Accessors for index entries.
1742 1780
1743 1781 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1744 1782 # are flags.
1745 1783 def start(self, rev):
1746 1784 return int(self.index[rev][0] >> 16)
1747 1785
1748 1786 def sidedata_cut_off(self, rev):
1749 1787 sd_cut_off = self.index[rev][8]
1750 1788 if sd_cut_off != 0:
1751 1789 return sd_cut_off
1752 1790 # This is some annoying dance, because entries without sidedata
1753 1791 # currently use 0 as their ofsset. (instead of previous-offset +
1754 1792 # previous-size)
1755 1793 #
1756 1794 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1757 1795 # In the meantime, we need this.
1758 1796 while 0 <= rev:
1759 1797 e = self.index[rev]
1760 1798 if e[9] != 0:
1761 1799 return e[8] + e[9]
1762 1800 rev -= 1
1763 1801 return 0
1764 1802
1765 1803 def flags(self, rev):
1766 1804 return self.index[rev][0] & 0xFFFF
1767 1805
1768 1806 def length(self, rev):
1769 1807 return self.index[rev][1]
1770 1808
1771 1809 def sidedata_length(self, rev):
1772 1810 if not self.feature_config.has_side_data:
1773 1811 return 0
1774 1812 return self.index[rev][9]
1775 1813
1776 1814 def rawsize(self, rev):
1777 1815 """return the length of the uncompressed text for a given revision"""
1778 1816 l = self.index[rev][2]
1779 1817 if l >= 0:
1780 1818 return l
1781 1819
1782 1820 t = self.rawdata(rev)
1783 1821 return len(t)
1784 1822
1785 1823 def size(self, rev):
1786 1824 """length of non-raw text (processed by a "read" flag processor)"""
1787 1825 # fast path: if no "read" flag processor could change the content,
1788 1826 # size is rawsize. note: ELLIPSIS is known to not change the content.
1789 1827 flags = self.flags(rev)
1790 1828 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1791 1829 return self.rawsize(rev)
1792 1830
1793 1831 return len(self.revision(rev))
1794 1832
1795 1833 def fast_rank(self, rev):
1796 1834 """Return the rank of a revision if already known, or None otherwise.
1797 1835
1798 1836 The rank of a revision is the size of the sub-graph it defines as a
1799 1837 head. Equivalently, the rank of a revision `r` is the size of the set
1800 1838 `ancestors(r)`, `r` included.
1801 1839
1802 1840 This method returns the rank retrieved from the revlog in constant
1803 1841 time. It makes no attempt at computing unknown values for versions of
1804 1842 the revlog which do not persist the rank.
1805 1843 """
1806 1844 rank = self.index[rev][ENTRY_RANK]
1807 1845 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1808 1846 return None
1809 1847 if rev == nullrev:
1810 1848 return 0 # convention
1811 1849 return rank
1812 1850
1813 1851 def chainbase(self, rev):
1814 1852 base = self._chainbasecache.get(rev)
1815 1853 if base is not None:
1816 1854 return base
1817 1855
1818 1856 index = self.index
1819 1857 iterrev = rev
1820 1858 base = index[iterrev][3]
1821 1859 while base != iterrev:
1822 1860 iterrev = base
1823 1861 base = index[iterrev][3]
1824 1862
1825 1863 self._chainbasecache[rev] = base
1826 1864 return base
1827 1865
1828 1866 def linkrev(self, rev):
1829 1867 return self.index[rev][4]
1830 1868
1831 1869 def parentrevs(self, rev):
1832 1870 try:
1833 1871 entry = self.index[rev]
1834 1872 except IndexError:
1835 1873 if rev == wdirrev:
1836 1874 raise error.WdirUnsupported
1837 1875 raise
1838 1876
1839 1877 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1840 1878 return entry[6], entry[5]
1841 1879 else:
1842 1880 return entry[5], entry[6]
1843 1881
1844 1882 # fast parentrevs(rev) where rev isn't filtered
1845 1883 _uncheckedparentrevs = parentrevs
1846 1884
1847 1885 def node(self, rev):
1848 1886 try:
1849 1887 return self.index[rev][7]
1850 1888 except IndexError:
1851 1889 if rev == wdirrev:
1852 1890 raise error.WdirUnsupported
1853 1891 raise
1854 1892
1855 1893 # Derived from index values.
1856 1894
1857 1895 def end(self, rev):
1858 1896 return self.start(rev) + self.length(rev)
1859 1897
1860 1898 def parents(self, node):
1861 1899 i = self.index
1862 1900 d = i[self.rev(node)]
1863 1901 # inline node() to avoid function call overhead
1864 1902 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1865 1903 return i[d[6]][7], i[d[5]][7]
1866 1904 else:
1867 1905 return i[d[5]][7], i[d[6]][7]
1868 1906
1869 1907 def chainlen(self, rev):
1870 1908 return self._chaininfo(rev)[0]
1871 1909
1872 1910 def _chaininfo(self, rev):
1873 1911 chaininfocache = self._chaininfocache
1874 1912 if rev in chaininfocache:
1875 1913 return chaininfocache[rev]
1876 1914 index = self.index
1877 1915 generaldelta = self.delta_config.general_delta
1878 1916 iterrev = rev
1879 1917 e = index[iterrev]
1880 1918 clen = 0
1881 1919 compresseddeltalen = 0
1882 1920 while iterrev != e[3]:
1883 1921 clen += 1
1884 1922 compresseddeltalen += e[1]
1885 1923 if generaldelta:
1886 1924 iterrev = e[3]
1887 1925 else:
1888 1926 iterrev -= 1
1889 1927 if iterrev in chaininfocache:
1890 1928 t = chaininfocache[iterrev]
1891 1929 clen += t[0]
1892 1930 compresseddeltalen += t[1]
1893 1931 break
1894 1932 e = index[iterrev]
1895 1933 else:
1896 1934 # Add text length of base since decompressing that also takes
1897 1935 # work. For cache hits the length is already included.
1898 1936 compresseddeltalen += e[1]
1899 1937 r = (clen, compresseddeltalen)
1900 1938 chaininfocache[rev] = r
1901 1939 return r
1902 1940
1903 1941 def _deltachain(self, rev, stoprev=None):
1904 1942 return self._inner._deltachain(rev, stoprev=stoprev)
1905 1943
1906 1944 def ancestors(self, revs, stoprev=0, inclusive=False):
1907 1945 """Generate the ancestors of 'revs' in reverse revision order.
1908 1946 Does not generate revs lower than stoprev.
1909 1947
1910 1948 See the documentation for ancestor.lazyancestors for more details."""
1911 1949
1912 1950 # first, make sure start revisions aren't filtered
1913 1951 revs = list(revs)
1914 1952 checkrev = self.node
1915 1953 for r in revs:
1916 1954 checkrev(r)
1917 1955 # and we're sure ancestors aren't filtered as well
1918 1956
1919 1957 if rustancestor is not None and self.index.rust_ext_compat:
1920 1958 lazyancestors = rustancestor.LazyAncestors
1921 1959 arg = self.index
1922 1960 else:
1923 1961 lazyancestors = ancestor.lazyancestors
1924 1962 arg = self._uncheckedparentrevs
1925 1963 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1926 1964
1927 1965 def descendants(self, revs):
1928 1966 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1929 1967
1930 1968 def findcommonmissing(self, common=None, heads=None):
1931 1969 """Return a tuple of the ancestors of common and the ancestors of heads
1932 1970 that are not ancestors of common. In revset terminology, we return the
1933 1971 tuple:
1934 1972
1935 1973 ::common, (::heads) - (::common)
1936 1974
1937 1975 The list is sorted by revision number, meaning it is
1938 1976 topologically sorted.
1939 1977
1940 1978 'heads' and 'common' are both lists of node IDs. If heads is
1941 1979 not supplied, uses all of the revlog's heads. If common is not
1942 1980 supplied, uses nullid."""
1943 1981 if common is None:
1944 1982 common = [self.nullid]
1945 1983 if heads is None:
1946 1984 heads = self.heads()
1947 1985
1948 1986 common = [self.rev(n) for n in common]
1949 1987 heads = [self.rev(n) for n in heads]
1950 1988
1951 1989 # we want the ancestors, but inclusive
1952 1990 class lazyset:
1953 1991 def __init__(self, lazyvalues):
1954 1992 self.addedvalues = set()
1955 1993 self.lazyvalues = lazyvalues
1956 1994
1957 1995 def __contains__(self, value):
1958 1996 return value in self.addedvalues or value in self.lazyvalues
1959 1997
1960 1998 def __iter__(self):
1961 1999 added = self.addedvalues
1962 2000 for r in added:
1963 2001 yield r
1964 2002 for r in self.lazyvalues:
1965 2003 if not r in added:
1966 2004 yield r
1967 2005
1968 2006 def add(self, value):
1969 2007 self.addedvalues.add(value)
1970 2008
1971 2009 def update(self, values):
1972 2010 self.addedvalues.update(values)
1973 2011
1974 2012 has = lazyset(self.ancestors(common))
1975 2013 has.add(nullrev)
1976 2014 has.update(common)
1977 2015
1978 2016 # take all ancestors from heads that aren't in has
1979 2017 missing = set()
1980 2018 visit = collections.deque(r for r in heads if r not in has)
1981 2019 while visit:
1982 2020 r = visit.popleft()
1983 2021 if r in missing:
1984 2022 continue
1985 2023 else:
1986 2024 missing.add(r)
1987 2025 for p in self.parentrevs(r):
1988 2026 if p not in has:
1989 2027 visit.append(p)
1990 2028 missing = list(missing)
1991 2029 missing.sort()
1992 2030 return has, [self.node(miss) for miss in missing]
1993 2031
1994 2032 def incrementalmissingrevs(self, common=None):
1995 2033 """Return an object that can be used to incrementally compute the
1996 2034 revision numbers of the ancestors of arbitrary sets that are not
1997 2035 ancestors of common. This is an ancestor.incrementalmissingancestors
1998 2036 object.
1999 2037
2000 2038 'common' is a list of revision numbers. If common is not supplied, uses
2001 2039 nullrev.
2002 2040 """
2003 2041 if common is None:
2004 2042 common = [nullrev]
2005 2043
2006 2044 if rustancestor is not None and self.index.rust_ext_compat:
2007 2045 return rustancestor.MissingAncestors(self.index, common)
2008 2046 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2009 2047
2010 2048 def findmissingrevs(self, common=None, heads=None):
2011 2049 """Return the revision numbers of the ancestors of heads that
2012 2050 are not ancestors of common.
2013 2051
2014 2052 More specifically, return a list of revision numbers corresponding to
2015 2053 nodes N such that every N satisfies the following constraints:
2016 2054
2017 2055 1. N is an ancestor of some node in 'heads'
2018 2056 2. N is not an ancestor of any node in 'common'
2019 2057
2020 2058 The list is sorted by revision number, meaning it is
2021 2059 topologically sorted.
2022 2060
2023 2061 'heads' and 'common' are both lists of revision numbers. If heads is
2024 2062 not supplied, uses all of the revlog's heads. If common is not
2025 2063 supplied, uses nullid."""
2026 2064 if common is None:
2027 2065 common = [nullrev]
2028 2066 if heads is None:
2029 2067 heads = self.headrevs()
2030 2068
2031 2069 inc = self.incrementalmissingrevs(common=common)
2032 2070 return inc.missingancestors(heads)
2033 2071
2034 2072 def findmissing(self, common=None, heads=None):
2035 2073 """Return the ancestors of heads that are not ancestors of common.
2036 2074
2037 2075 More specifically, return a list of nodes N such that every N
2038 2076 satisfies the following constraints:
2039 2077
2040 2078 1. N is an ancestor of some node in 'heads'
2041 2079 2. N is not an ancestor of any node in 'common'
2042 2080
2043 2081 The list is sorted by revision number, meaning it is
2044 2082 topologically sorted.
2045 2083
2046 2084 'heads' and 'common' are both lists of node IDs. If heads is
2047 2085 not supplied, uses all of the revlog's heads. If common is not
2048 2086 supplied, uses nullid."""
2049 2087 if common is None:
2050 2088 common = [self.nullid]
2051 2089 if heads is None:
2052 2090 heads = self.heads()
2053 2091
2054 2092 common = [self.rev(n) for n in common]
2055 2093 heads = [self.rev(n) for n in heads]
2056 2094
2057 2095 inc = self.incrementalmissingrevs(common=common)
2058 2096 return [self.node(r) for r in inc.missingancestors(heads)]
2059 2097
2060 2098 def nodesbetween(self, roots=None, heads=None):
2061 2099 """Return a topological path from 'roots' to 'heads'.
2062 2100
2063 2101 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2064 2102 topologically sorted list of all nodes N that satisfy both of
2065 2103 these constraints:
2066 2104
2067 2105 1. N is a descendant of some node in 'roots'
2068 2106 2. N is an ancestor of some node in 'heads'
2069 2107
2070 2108 Every node is considered to be both a descendant and an ancestor
2071 2109 of itself, so every reachable node in 'roots' and 'heads' will be
2072 2110 included in 'nodes'.
2073 2111
2074 2112 'outroots' is the list of reachable nodes in 'roots', i.e., the
2075 2113 subset of 'roots' that is returned in 'nodes'. Likewise,
2076 2114 'outheads' is the subset of 'heads' that is also in 'nodes'.
2077 2115
2078 2116 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2079 2117 unspecified, uses nullid as the only root. If 'heads' is
2080 2118 unspecified, uses list of all of the revlog's heads."""
2081 2119 nonodes = ([], [], [])
2082 2120 if roots is not None:
2083 2121 roots = list(roots)
2084 2122 if not roots:
2085 2123 return nonodes
2086 2124 lowestrev = min([self.rev(n) for n in roots])
2087 2125 else:
2088 2126 roots = [self.nullid] # Everybody's a descendant of nullid
2089 2127 lowestrev = nullrev
2090 2128 if (lowestrev == nullrev) and (heads is None):
2091 2129 # We want _all_ the nodes!
2092 2130 return (
2093 2131 [self.node(r) for r in self],
2094 2132 [self.nullid],
2095 2133 list(self.heads()),
2096 2134 )
2097 2135 if heads is None:
2098 2136 # All nodes are ancestors, so the latest ancestor is the last
2099 2137 # node.
2100 2138 highestrev = len(self) - 1
2101 2139 # Set ancestors to None to signal that every node is an ancestor.
2102 2140 ancestors = None
2103 2141 # Set heads to an empty dictionary for later discovery of heads
2104 2142 heads = {}
2105 2143 else:
2106 2144 heads = list(heads)
2107 2145 if not heads:
2108 2146 return nonodes
2109 2147 ancestors = set()
2110 2148 # Turn heads into a dictionary so we can remove 'fake' heads.
2111 2149 # Also, later we will be using it to filter out the heads we can't
2112 2150 # find from roots.
2113 2151 heads = dict.fromkeys(heads, False)
2114 2152 # Start at the top and keep marking parents until we're done.
2115 2153 nodestotag = set(heads)
2116 2154 # Remember where the top was so we can use it as a limit later.
2117 2155 highestrev = max([self.rev(n) for n in nodestotag])
2118 2156 while nodestotag:
2119 2157 # grab a node to tag
2120 2158 n = nodestotag.pop()
2121 2159 # Never tag nullid
2122 2160 if n == self.nullid:
2123 2161 continue
2124 2162 # A node's revision number represents its place in a
2125 2163 # topologically sorted list of nodes.
2126 2164 r = self.rev(n)
2127 2165 if r >= lowestrev:
2128 2166 if n not in ancestors:
2129 2167 # If we are possibly a descendant of one of the roots
2130 2168 # and we haven't already been marked as an ancestor
2131 2169 ancestors.add(n) # Mark as ancestor
2132 2170 # Add non-nullid parents to list of nodes to tag.
2133 2171 nodestotag.update(
2134 2172 [p for p in self.parents(n) if p != self.nullid]
2135 2173 )
2136 2174 elif n in heads: # We've seen it before, is it a fake head?
2137 2175 # So it is, real heads should not be the ancestors of
2138 2176 # any other heads.
2139 2177 heads.pop(n)
2140 2178 if not ancestors:
2141 2179 return nonodes
2142 2180 # Now that we have our set of ancestors, we want to remove any
2143 2181 # roots that are not ancestors.
2144 2182
2145 2183 # If one of the roots was nullid, everything is included anyway.
2146 2184 if lowestrev > nullrev:
2147 2185 # But, since we weren't, let's recompute the lowest rev to not
2148 2186 # include roots that aren't ancestors.
2149 2187
2150 2188 # Filter out roots that aren't ancestors of heads
2151 2189 roots = [root for root in roots if root in ancestors]
2152 2190 # Recompute the lowest revision
2153 2191 if roots:
2154 2192 lowestrev = min([self.rev(root) for root in roots])
2155 2193 else:
2156 2194 # No more roots? Return empty list
2157 2195 return nonodes
2158 2196 else:
2159 2197 # We are descending from nullid, and don't need to care about
2160 2198 # any other roots.
2161 2199 lowestrev = nullrev
2162 2200 roots = [self.nullid]
2163 2201 # Transform our roots list into a set.
2164 2202 descendants = set(roots)
2165 2203 # Also, keep the original roots so we can filter out roots that aren't
2166 2204 # 'real' roots (i.e. are descended from other roots).
2167 2205 roots = descendants.copy()
2168 2206 # Our topologically sorted list of output nodes.
2169 2207 orderedout = []
2170 2208 # Don't start at nullid since we don't want nullid in our output list,
2171 2209 # and if nullid shows up in descendants, empty parents will look like
2172 2210 # they're descendants.
2173 2211 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2174 2212 n = self.node(r)
2175 2213 isdescendant = False
2176 2214 if lowestrev == nullrev: # Everybody is a descendant of nullid
2177 2215 isdescendant = True
2178 2216 elif n in descendants:
2179 2217 # n is already a descendant
2180 2218 isdescendant = True
2181 2219 # This check only needs to be done here because all the roots
2182 2220 # will start being marked is descendants before the loop.
2183 2221 if n in roots:
2184 2222 # If n was a root, check if it's a 'real' root.
2185 2223 p = tuple(self.parents(n))
2186 2224 # If any of its parents are descendants, it's not a root.
2187 2225 if (p[0] in descendants) or (p[1] in descendants):
2188 2226 roots.remove(n)
2189 2227 else:
2190 2228 p = tuple(self.parents(n))
2191 2229 # A node is a descendant if either of its parents are
2192 2230 # descendants. (We seeded the dependents list with the roots
2193 2231 # up there, remember?)
2194 2232 if (p[0] in descendants) or (p[1] in descendants):
2195 2233 descendants.add(n)
2196 2234 isdescendant = True
2197 2235 if isdescendant and ((ancestors is None) or (n in ancestors)):
2198 2236 # Only include nodes that are both descendants and ancestors.
2199 2237 orderedout.append(n)
2200 2238 if (ancestors is not None) and (n in heads):
2201 2239 # We're trying to figure out which heads are reachable
2202 2240 # from roots.
2203 2241 # Mark this head as having been reached
2204 2242 heads[n] = True
2205 2243 elif ancestors is None:
2206 2244 # Otherwise, we're trying to discover the heads.
2207 2245 # Assume this is a head because if it isn't, the next step
2208 2246 # will eventually remove it.
2209 2247 heads[n] = True
2210 2248 # But, obviously its parents aren't.
2211 2249 for p in self.parents(n):
2212 2250 heads.pop(p, None)
2213 2251 heads = [head for head, flag in heads.items() if flag]
2214 2252 roots = list(roots)
2215 2253 assert orderedout
2216 2254 assert roots
2217 2255 assert heads
2218 2256 return (orderedout, roots, heads)
2219 2257
2220 2258 def headrevs(self, revs=None):
2221 2259 if revs is None:
2222 2260 try:
2223 2261 return self.index.headrevs()
2224 2262 except AttributeError:
2225 2263 return self._headrevs()
2226 2264 if rustdagop is not None and self.index.rust_ext_compat:
2227 2265 return rustdagop.headrevs(self.index, revs)
2228 2266 return dagop.headrevs(revs, self._uncheckedparentrevs)
2229 2267
2230 2268 def computephases(self, roots):
2231 2269 return self.index.computephasesmapsets(roots)
2232 2270
2233 2271 def _headrevs(self):
2234 2272 count = len(self)
2235 2273 if not count:
2236 2274 return [nullrev]
2237 2275 # we won't iter over filtered rev so nobody is a head at start
2238 2276 ishead = [0] * (count + 1)
2239 2277 index = self.index
2240 2278 for r in self:
2241 2279 ishead[r] = 1 # I may be an head
2242 2280 e = index[r]
2243 2281 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2244 2282 return [r for r, val in enumerate(ishead) if val]
2245 2283
2246 2284 def heads(self, start=None, stop=None):
2247 2285 """return the list of all nodes that have no children
2248 2286
2249 2287 if start is specified, only heads that are descendants of
2250 2288 start will be returned
2251 2289 if stop is specified, it will consider all the revs from stop
2252 2290 as if they had no children
2253 2291 """
2254 2292 if start is None and stop is None:
2255 2293 if not len(self):
2256 2294 return [self.nullid]
2257 2295 return [self.node(r) for r in self.headrevs()]
2258 2296
2259 2297 if start is None:
2260 2298 start = nullrev
2261 2299 else:
2262 2300 start = self.rev(start)
2263 2301
2264 2302 stoprevs = {self.rev(n) for n in stop or []}
2265 2303
2266 2304 revs = dagop.headrevssubset(
2267 2305 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2268 2306 )
2269 2307
2270 2308 return [self.node(rev) for rev in revs]
2271 2309
2272 2310 def children(self, node):
2273 2311 """find the children of a given node"""
2274 2312 c = []
2275 2313 p = self.rev(node)
2276 2314 for r in self.revs(start=p + 1):
2277 2315 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2278 2316 if prevs:
2279 2317 for pr in prevs:
2280 2318 if pr == p:
2281 2319 c.append(self.node(r))
2282 2320 elif p == nullrev:
2283 2321 c.append(self.node(r))
2284 2322 return c
2285 2323
2286 2324 def commonancestorsheads(self, a, b):
2287 2325 """calculate all the heads of the common ancestors of nodes a and b"""
2288 2326 a, b = self.rev(a), self.rev(b)
2289 2327 ancs = self._commonancestorsheads(a, b)
2290 2328 return pycompat.maplist(self.node, ancs)
2291 2329
2292 2330 def _commonancestorsheads(self, *revs):
2293 2331 """calculate all the heads of the common ancestors of revs"""
2294 2332 try:
2295 2333 ancs = self.index.commonancestorsheads(*revs)
2296 2334 except (AttributeError, OverflowError): # C implementation failed
2297 2335 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2298 2336 return ancs
2299 2337
2300 2338 def isancestor(self, a, b):
2301 2339 """return True if node a is an ancestor of node b
2302 2340
2303 2341 A revision is considered an ancestor of itself."""
2304 2342 a, b = self.rev(a), self.rev(b)
2305 2343 return self.isancestorrev(a, b)
2306 2344
2307 2345 def isancestorrev(self, a, b):
2308 2346 """return True if revision a is an ancestor of revision b
2309 2347
2310 2348 A revision is considered an ancestor of itself.
2311 2349
2312 2350 The implementation of this is trivial but the use of
2313 2351 reachableroots is not."""
2314 2352 if a == nullrev:
2315 2353 return True
2316 2354 elif a == b:
2317 2355 return True
2318 2356 elif a > b:
2319 2357 return False
2320 2358 return bool(self.reachableroots(a, [b], [a], includepath=False))
2321 2359
2322 2360 def reachableroots(self, minroot, heads, roots, includepath=False):
2323 2361 """return (heads(::(<roots> and <roots>::<heads>)))
2324 2362
2325 2363 If includepath is True, return (<roots>::<heads>)."""
2326 2364 try:
2327 2365 return self.index.reachableroots2(
2328 2366 minroot, heads, roots, includepath
2329 2367 )
2330 2368 except AttributeError:
2331 2369 return dagop._reachablerootspure(
2332 2370 self.parentrevs, minroot, roots, heads, includepath
2333 2371 )
2334 2372
2335 2373 def ancestor(self, a, b):
2336 2374 """calculate the "best" common ancestor of nodes a and b"""
2337 2375
2338 2376 a, b = self.rev(a), self.rev(b)
2339 2377 try:
2340 2378 ancs = self.index.ancestors(a, b)
2341 2379 except (AttributeError, OverflowError):
2342 2380 ancs = ancestor.ancestors(self.parentrevs, a, b)
2343 2381 if ancs:
2344 2382 # choose a consistent winner when there's a tie
2345 2383 return min(map(self.node, ancs))
2346 2384 return self.nullid
2347 2385
2348 2386 def _match(self, id):
2349 2387 if isinstance(id, int):
2350 2388 # rev
2351 2389 return self.node(id)
2352 2390 if len(id) == self.nodeconstants.nodelen:
2353 2391 # possibly a binary node
2354 2392 # odds of a binary node being all hex in ASCII are 1 in 10**25
2355 2393 try:
2356 2394 node = id
2357 2395 self.rev(node) # quick search the index
2358 2396 return node
2359 2397 except error.LookupError:
2360 2398 pass # may be partial hex id
2361 2399 try:
2362 2400 # str(rev)
2363 2401 rev = int(id)
2364 2402 if b"%d" % rev != id:
2365 2403 raise ValueError
2366 2404 if rev < 0:
2367 2405 rev = len(self) + rev
2368 2406 if rev < 0 or rev >= len(self):
2369 2407 raise ValueError
2370 2408 return self.node(rev)
2371 2409 except (ValueError, OverflowError):
2372 2410 pass
2373 2411 if len(id) == 2 * self.nodeconstants.nodelen:
2374 2412 try:
2375 2413 # a full hex nodeid?
2376 2414 node = bin(id)
2377 2415 self.rev(node)
2378 2416 return node
2379 2417 except (binascii.Error, error.LookupError):
2380 2418 pass
2381 2419
2382 2420 def _partialmatch(self, id):
2383 2421 # we don't care wdirfilenodeids as they should be always full hash
2384 2422 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2385 2423 ambiguous = False
2386 2424 try:
2387 2425 partial = self.index.partialmatch(id)
2388 2426 if partial and self.hasnode(partial):
2389 2427 if maybewdir:
2390 2428 # single 'ff...' match in radix tree, ambiguous with wdir
2391 2429 ambiguous = True
2392 2430 else:
2393 2431 return partial
2394 2432 elif maybewdir:
2395 2433 # no 'ff...' match in radix tree, wdir identified
2396 2434 raise error.WdirUnsupported
2397 2435 else:
2398 2436 return None
2399 2437 except error.RevlogError:
2400 2438 # parsers.c radix tree lookup gave multiple matches
2401 2439 # fast path: for unfiltered changelog, radix tree is accurate
2402 2440 if not getattr(self, 'filteredrevs', None):
2403 2441 ambiguous = True
2404 2442 # fall through to slow path that filters hidden revisions
2405 2443 except (AttributeError, ValueError):
2406 2444 # we are pure python, or key is not hex
2407 2445 pass
2408 2446 if ambiguous:
2409 2447 raise error.AmbiguousPrefixLookupError(
2410 2448 id, self.display_id, _(b'ambiguous identifier')
2411 2449 )
2412 2450
2413 2451 if id in self._pcache:
2414 2452 return self._pcache[id]
2415 2453
2416 2454 if len(id) <= 40:
2417 2455 # hex(node)[:...]
2418 2456 l = len(id) // 2 * 2 # grab an even number of digits
2419 2457 try:
2420 2458 # we're dropping the last digit, so let's check that it's hex,
2421 2459 # to avoid the expensive computation below if it's not
2422 2460 if len(id) % 2 > 0:
2423 2461 if not (id[-1] in hexdigits):
2424 2462 return None
2425 2463 prefix = bin(id[:l])
2426 2464 except binascii.Error:
2427 2465 pass
2428 2466 else:
2429 2467 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2430 2468 nl = [
2431 2469 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2432 2470 ]
2433 2471 if self.nodeconstants.nullhex.startswith(id):
2434 2472 nl.append(self.nullid)
2435 2473 if len(nl) > 0:
2436 2474 if len(nl) == 1 and not maybewdir:
2437 2475 self._pcache[id] = nl[0]
2438 2476 return nl[0]
2439 2477 raise error.AmbiguousPrefixLookupError(
2440 2478 id, self.display_id, _(b'ambiguous identifier')
2441 2479 )
2442 2480 if maybewdir:
2443 2481 raise error.WdirUnsupported
2444 2482 return None
2445 2483
2446 2484 def lookup(self, id):
2447 2485 """locate a node based on:
2448 2486 - revision number or str(revision number)
2449 2487 - nodeid or subset of hex nodeid
2450 2488 """
2451 2489 n = self._match(id)
2452 2490 if n is not None:
2453 2491 return n
2454 2492 n = self._partialmatch(id)
2455 2493 if n:
2456 2494 return n
2457 2495
2458 2496 raise error.LookupError(id, self.display_id, _(b'no match found'))
2459 2497
2460 2498 def shortest(self, node, minlength=1):
2461 2499 """Find the shortest unambiguous prefix that matches node."""
2462 2500
2463 2501 def isvalid(prefix):
2464 2502 try:
2465 2503 matchednode = self._partialmatch(prefix)
2466 2504 except error.AmbiguousPrefixLookupError:
2467 2505 return False
2468 2506 except error.WdirUnsupported:
2469 2507 # single 'ff...' match
2470 2508 return True
2471 2509 if matchednode is None:
2472 2510 raise error.LookupError(node, self.display_id, _(b'no node'))
2473 2511 return True
2474 2512
2475 2513 def maybewdir(prefix):
2476 2514 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2477 2515
2478 2516 hexnode = hex(node)
2479 2517
2480 2518 def disambiguate(hexnode, minlength):
2481 2519 """Disambiguate against wdirid."""
2482 2520 for length in range(minlength, len(hexnode) + 1):
2483 2521 prefix = hexnode[:length]
2484 2522 if not maybewdir(prefix):
2485 2523 return prefix
2486 2524
2487 2525 if not getattr(self, 'filteredrevs', None):
2488 2526 try:
2489 2527 length = max(self.index.shortest(node), minlength)
2490 2528 return disambiguate(hexnode, length)
2491 2529 except error.RevlogError:
2492 2530 if node != self.nodeconstants.wdirid:
2493 2531 raise error.LookupError(
2494 2532 node, self.display_id, _(b'no node')
2495 2533 )
2496 2534 except AttributeError:
2497 2535 # Fall through to pure code
2498 2536 pass
2499 2537
2500 2538 if node == self.nodeconstants.wdirid:
2501 2539 for length in range(minlength, len(hexnode) + 1):
2502 2540 prefix = hexnode[:length]
2503 2541 if isvalid(prefix):
2504 2542 return prefix
2505 2543
2506 2544 for length in range(minlength, len(hexnode) + 1):
2507 2545 prefix = hexnode[:length]
2508 2546 if isvalid(prefix):
2509 2547 return disambiguate(hexnode, length)
2510 2548
2511 2549 def cmp(self, node, text):
2512 2550 """compare text with a given file revision
2513 2551
2514 2552 returns True if text is different than what is stored.
2515 2553 """
2516 2554 p1, p2 = self.parents(node)
2517 2555 return storageutil.hashrevisionsha1(text, p1, p2) != node
2518 2556
2519 2557 def deltaparent(self, rev):
2520 2558 """return deltaparent of the given revision"""
2521 2559 base = self.index[rev][3]
2522 2560 if base == rev:
2523 2561 return nullrev
2524 2562 elif self.delta_config.general_delta:
2525 2563 return base
2526 2564 else:
2527 2565 return rev - 1
2528 2566
2529 2567 def issnapshot(self, rev):
2530 2568 """tells whether rev is a snapshot"""
2531 2569 ret = self._inner.issnapshot(rev)
2532 2570 self.issnapshot = self._inner.issnapshot
2533 2571 return ret
2534 2572
2535 2573 def snapshotdepth(self, rev):
2536 2574 """number of snapshot in the chain before this one"""
2537 2575 if not self.issnapshot(rev):
2538 2576 raise error.ProgrammingError(b'revision %d not a snapshot')
2539 2577 return len(self._inner._deltachain(rev)[0]) - 1
2540 2578
2541 2579 def revdiff(self, rev1, rev2):
2542 2580 """return or calculate a delta between two revisions
2543 2581
2544 2582 The delta calculated is in binary form and is intended to be written to
2545 2583 revlog data directly. So this function needs raw revision data.
2546 2584 """
2547 2585 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2548 2586 return bytes(self._inner._chunk(rev2))
2549 2587
2550 2588 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2551 2589
2552 2590 def revision(self, nodeorrev):
2553 2591 """return an uncompressed revision of a given node or revision
2554 2592 number.
2555 2593 """
2556 2594 return self._revisiondata(nodeorrev)
2557 2595
2558 2596 def sidedata(self, nodeorrev):
2559 2597 """a map of extra data related to the changeset but not part of the hash
2560 2598
2561 2599 This function currently return a dictionary. However, more advanced
2562 2600 mapping object will likely be used in the future for a more
2563 2601 efficient/lazy code.
2564 2602 """
2565 2603 # deal with <nodeorrev> argument type
2566 2604 if isinstance(nodeorrev, int):
2567 2605 rev = nodeorrev
2568 2606 else:
2569 2607 rev = self.rev(nodeorrev)
2570 2608 return self._sidedata(rev)
2571 2609
2572 2610 def _rawtext(self, node, rev):
2573 2611 """return the possibly unvalidated rawtext for a revision
2574 2612
2575 2613 returns (rev, rawtext, validated)
2576 2614 """
2577 2615 # Check if we have the entry in cache
2578 2616 # The cache entry looks like (node, rev, rawtext)
2579 2617 if self._inner._revisioncache:
2580 2618 if self._inner._revisioncache[0] == node:
2581 2619 return (rev, self._inner._revisioncache[2], True)
2582 2620
2583 2621 if rev is None:
2584 2622 rev = self.rev(node)
2585 2623
2586 2624 return self._inner.raw_text(node, rev)
2587 2625
2588 2626 def _revisiondata(self, nodeorrev, raw=False):
2589 2627 # deal with <nodeorrev> argument type
2590 2628 if isinstance(nodeorrev, int):
2591 2629 rev = nodeorrev
2592 2630 node = self.node(rev)
2593 2631 else:
2594 2632 node = nodeorrev
2595 2633 rev = None
2596 2634
2597 2635 # fast path the special `nullid` rev
2598 2636 if node == self.nullid:
2599 2637 return b""
2600 2638
2601 2639 # ``rawtext`` is the text as stored inside the revlog. Might be the
2602 2640 # revision or might need to be processed to retrieve the revision.
2603 2641 rev, rawtext, validated = self._rawtext(node, rev)
2604 2642
2605 2643 if raw and validated:
2606 2644 # if we don't want to process the raw text and that raw
2607 2645 # text is cached, we can exit early.
2608 2646 return rawtext
2609 2647 if rev is None:
2610 2648 rev = self.rev(node)
2611 2649 # the revlog's flag for this revision
2612 2650 # (usually alter its state or content)
2613 2651 flags = self.flags(rev)
2614 2652
2615 2653 if validated and flags == REVIDX_DEFAULT_FLAGS:
2616 2654 # no extra flags set, no flag processor runs, text = rawtext
2617 2655 return rawtext
2618 2656
2619 2657 if raw:
2620 2658 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2621 2659 text = rawtext
2622 2660 else:
2623 2661 r = flagutil.processflagsread(self, rawtext, flags)
2624 2662 text, validatehash = r
2625 2663 if validatehash:
2626 2664 self.checkhash(text, node, rev=rev)
2627 2665 if not validated:
2628 2666 self._inner._revisioncache = (node, rev, rawtext)
2629 2667
2630 2668 return text
2631 2669
2632 2670 def _sidedata(self, rev):
2633 2671 """Return the sidedata for a given revision number."""
2634 index_entry = self.index[rev]
2635 sidedata_offset = index_entry[8]
2636 sidedata_size = index_entry[9]
2637
2638 if self._inline:
2639 sidedata_offset += self.index.entry_size * (1 + rev)
2640 if sidedata_size == 0:
2641 return {}
2642
2643 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2644 filename = self._sidedatafile
2645 end = self._docket.sidedata_end
2646 offset = sidedata_offset
2647 length = sidedata_size
2648 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2649 raise error.RevlogError(m)
2650
2651 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2652 sidedata_offset, sidedata_size
2653 )
2654
2655 comp = self.index[rev][11]
2656 if comp == COMP_MODE_PLAIN:
2657 segment = comp_segment
2658 elif comp == COMP_MODE_DEFAULT:
2659 segment = self._inner._decompressor(comp_segment)
2660 elif comp == COMP_MODE_INLINE:
2661 segment = self._inner.decompress(comp_segment)
2662 else:
2663 msg = b'unknown compression mode %d'
2664 msg %= comp
2665 raise error.RevlogError(msg)
2666
2667 sidedata = sidedatautil.deserialize_sidedata(segment)
2668 return sidedata
2672 sidedata_end = None
2673 if self._docket is not None:
2674 sidedata_end = self._docket.sidedata_end
2675 return self._inner.sidedata(rev, sidedata_end)
2669 2676
2670 2677 def rawdata(self, nodeorrev):
2671 2678 """return an uncompressed raw data of a given node or revision number."""
2672 2679 return self._revisiondata(nodeorrev, raw=True)
2673 2680
2674 2681 def hash(self, text, p1, p2):
2675 2682 """Compute a node hash.
2676 2683
2677 2684 Available as a function so that subclasses can replace the hash
2678 2685 as needed.
2679 2686 """
2680 2687 return storageutil.hashrevisionsha1(text, p1, p2)
2681 2688
2682 2689 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2683 2690 """Check node hash integrity.
2684 2691
2685 2692 Available as a function so that subclasses can extend hash mismatch
2686 2693 behaviors as needed.
2687 2694 """
2688 2695 try:
2689 2696 if p1 is None and p2 is None:
2690 2697 p1, p2 = self.parents(node)
2691 2698 if node != self.hash(text, p1, p2):
2692 2699 # Clear the revision cache on hash failure. The revision cache
2693 2700 # only stores the raw revision and clearing the cache does have
2694 2701 # the side-effect that we won't have a cache hit when the raw
2695 2702 # revision data is accessed. But this case should be rare and
2696 2703 # it is extra work to teach the cache about the hash
2697 2704 # verification state.
2698 2705 if (
2699 2706 self._inner._revisioncache
2700 2707 and self._inner._revisioncache[0] == node
2701 2708 ):
2702 2709 self._inner._revisioncache = None
2703 2710
2704 2711 revornode = rev
2705 2712 if revornode is None:
2706 2713 revornode = templatefilters.short(hex(node))
2707 2714 raise error.RevlogError(
2708 2715 _(b"integrity check failed on %s:%s")
2709 2716 % (self.display_id, pycompat.bytestr(revornode))
2710 2717 )
2711 2718 except error.RevlogError:
2712 2719 if self.feature_config.censorable and storageutil.iscensoredtext(
2713 2720 text
2714 2721 ):
2715 2722 raise error.CensoredNodeError(self.display_id, node, text)
2716 2723 raise
2717 2724
2718 2725 @property
2719 2726 def _split_index_file(self):
2720 2727 """the path where to expect the index of an ongoing splitting operation
2721 2728
2722 2729 The file will only exist if a splitting operation is in progress, but
2723 2730 it is always expected at the same location."""
2724 2731 parts = self.radix.split(b'/')
2725 2732 if len(parts) > 1:
2726 2733 # adds a '-s' prefix to the ``data/` or `meta/` base
2727 2734 head = parts[0] + b'-s'
2728 2735 mids = parts[1:-1]
2729 2736 tail = parts[-1] + b'.i'
2730 2737 pieces = [head] + mids + [tail]
2731 2738 return b'/'.join(pieces)
2732 2739 else:
2733 2740 # the revlog is stored at the root of the store (changelog or
2734 2741 # manifest), no risk of collision.
2735 2742 return self.radix + b'.i.s'
2736 2743
2737 2744 def _enforceinlinesize(self, tr, side_write=True):
2738 2745 """Check if the revlog is too big for inline and convert if so.
2739 2746
2740 2747 This should be called after revisions are added to the revlog. If the
2741 2748 revlog has grown too large to be an inline revlog, it will convert it
2742 2749 to use multiple index and data files.
2743 2750 """
2744 2751 tiprev = len(self) - 1
2745 2752 total_size = self.start(tiprev) + self.length(tiprev)
2746 2753 if not self._inline or total_size < _maxinline:
2747 2754 return
2748 2755
2749 2756 if self._docket is not None:
2750 2757 msg = b"inline revlog should not have a docket"
2751 2758 raise error.ProgrammingError(msg)
2752 2759
2753 2760 troffset = tr.findoffset(self._indexfile)
2754 2761 if troffset is None:
2755 2762 raise error.RevlogError(
2756 2763 _(b"%s not found in the transaction") % self._indexfile
2757 2764 )
2758 2765 if troffset:
2759 2766 tr.addbackup(self._indexfile, for_offset=True)
2760 2767 tr.add(self._datafile, 0)
2761 2768
2762 2769 new_index_file_path = None
2763 2770 if side_write:
2764 2771 old_index_file_path = self._indexfile
2765 2772 new_index_file_path = self._split_index_file
2766 2773 opener = self.opener
2767 2774 weak_self = weakref.ref(self)
2768 2775
2769 2776 # the "split" index replace the real index when the transaction is
2770 2777 # finalized
2771 2778 def finalize_callback(tr):
2772 2779 opener.rename(
2773 2780 new_index_file_path,
2774 2781 old_index_file_path,
2775 2782 checkambig=True,
2776 2783 )
2777 2784 maybe_self = weak_self()
2778 2785 if maybe_self is not None:
2779 2786 maybe_self._indexfile = old_index_file_path
2780 2787 maybe_self._inner.index_file = maybe_self._indexfile
2781 2788
2782 2789 def abort_callback(tr):
2783 2790 maybe_self = weak_self()
2784 2791 if maybe_self is not None:
2785 2792 maybe_self._indexfile = old_index_file_path
2786 2793 maybe_self._inner.inline = True
2787 2794 maybe_self._inner.index_file = old_index_file_path
2788 2795
2789 2796 tr.registertmp(new_index_file_path)
2790 2797 if self.target[1] is not None:
2791 2798 callback_id = b'000-revlog-split-%d-%s' % self.target
2792 2799 else:
2793 2800 callback_id = b'000-revlog-split-%d' % self.target[0]
2794 2801 tr.addfinalize(callback_id, finalize_callback)
2795 2802 tr.addabort(callback_id, abort_callback)
2796 2803
2797 2804 self._format_flags &= ~FLAG_INLINE_DATA
2798 2805 self._inner.split_inline(
2799 2806 tr,
2800 2807 self._format_flags | self._format_version,
2801 2808 new_index_file_path=new_index_file_path,
2802 2809 )
2803 2810
2804 2811 self._inline = False
2805 2812 if new_index_file_path is not None:
2806 2813 self._indexfile = new_index_file_path
2807 2814
2808 2815 nodemaputil.setup_persistent_nodemap(tr, self)
2809 2816
2810 2817 def _nodeduplicatecallback(self, transaction, node):
2811 2818 """called when trying to add a node already stored."""
2812 2819
2813 2820 @contextlib.contextmanager
2814 2821 def reading(self):
2815 2822 with self._inner.reading():
2816 2823 yield
2817 2824
2818 2825 @contextlib.contextmanager
2819 2826 def _writing(self, transaction):
2820 2827 if self._trypending:
2821 2828 msg = b'try to write in a `trypending` revlog: %s'
2822 2829 msg %= self.display_id
2823 2830 raise error.ProgrammingError(msg)
2824 2831 if self._inner.is_writing:
2825 2832 yield
2826 2833 else:
2827 2834 data_end = None
2828 2835 sidedata_end = None
2829 2836 if self._docket is not None:
2830 2837 data_end = self._docket.data_end
2831 2838 sidedata_end = self._docket.sidedata_end
2832 2839 with self._inner.writing(
2833 2840 transaction,
2834 2841 data_end=data_end,
2835 2842 sidedata_end=sidedata_end,
2836 2843 ):
2837 2844 yield
2838 2845 if self._docket is not None:
2839 2846 self._write_docket(transaction)
2840 2847
2841 2848 def _write_docket(self, transaction):
2842 2849 """write the current docket on disk
2843 2850
2844 2851 Exist as a method to help changelog to implement transaction logic
2845 2852
2846 2853 We could also imagine using the same transaction logic for all revlog
2847 2854 since docket are cheap."""
2848 2855 self._docket.write(transaction)
2849 2856
2850 2857 def addrevision(
2851 2858 self,
2852 2859 text,
2853 2860 transaction,
2854 2861 link,
2855 2862 p1,
2856 2863 p2,
2857 2864 cachedelta=None,
2858 2865 node=None,
2859 2866 flags=REVIDX_DEFAULT_FLAGS,
2860 2867 deltacomputer=None,
2861 2868 sidedata=None,
2862 2869 ):
2863 2870 """add a revision to the log
2864 2871
2865 2872 text - the revision data to add
2866 2873 transaction - the transaction object used for rollback
2867 2874 link - the linkrev data to add
2868 2875 p1, p2 - the parent nodeids of the revision
2869 2876 cachedelta - an optional precomputed delta
2870 2877 node - nodeid of revision; typically node is not specified, and it is
2871 2878 computed by default as hash(text, p1, p2), however subclasses might
2872 2879 use different hashing method (and override checkhash() in such case)
2873 2880 flags - the known flags to set on the revision
2874 2881 deltacomputer - an optional deltacomputer instance shared between
2875 2882 multiple calls
2876 2883 """
2877 2884 if link == nullrev:
2878 2885 raise error.RevlogError(
2879 2886 _(b"attempted to add linkrev -1 to %s") % self.display_id
2880 2887 )
2881 2888
2882 2889 if sidedata is None:
2883 2890 sidedata = {}
2884 2891 elif sidedata and not self.feature_config.has_side_data:
2885 2892 raise error.ProgrammingError(
2886 2893 _(b"trying to add sidedata to a revlog who don't support them")
2887 2894 )
2888 2895
2889 2896 if flags:
2890 2897 node = node or self.hash(text, p1, p2)
2891 2898
2892 2899 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2893 2900
2894 2901 # If the flag processor modifies the revision data, ignore any provided
2895 2902 # cachedelta.
2896 2903 if rawtext != text:
2897 2904 cachedelta = None
2898 2905
2899 2906 if len(rawtext) > _maxentrysize:
2900 2907 raise error.RevlogError(
2901 2908 _(
2902 2909 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2903 2910 )
2904 2911 % (self.display_id, len(rawtext))
2905 2912 )
2906 2913
2907 2914 node = node or self.hash(rawtext, p1, p2)
2908 2915 rev = self.index.get_rev(node)
2909 2916 if rev is not None:
2910 2917 return rev
2911 2918
2912 2919 if validatehash:
2913 2920 self.checkhash(rawtext, node, p1=p1, p2=p2)
2914 2921
2915 2922 return self.addrawrevision(
2916 2923 rawtext,
2917 2924 transaction,
2918 2925 link,
2919 2926 p1,
2920 2927 p2,
2921 2928 node,
2922 2929 flags,
2923 2930 cachedelta=cachedelta,
2924 2931 deltacomputer=deltacomputer,
2925 2932 sidedata=sidedata,
2926 2933 )
2927 2934
2928 2935 def addrawrevision(
2929 2936 self,
2930 2937 rawtext,
2931 2938 transaction,
2932 2939 link,
2933 2940 p1,
2934 2941 p2,
2935 2942 node,
2936 2943 flags,
2937 2944 cachedelta=None,
2938 2945 deltacomputer=None,
2939 2946 sidedata=None,
2940 2947 ):
2941 2948 """add a raw revision with known flags, node and parents
2942 2949 useful when reusing a revision not stored in this revlog (ex: received
2943 2950 over wire, or read from an external bundle).
2944 2951 """
2945 2952 with self._writing(transaction):
2946 2953 return self._addrevision(
2947 2954 node,
2948 2955 rawtext,
2949 2956 transaction,
2950 2957 link,
2951 2958 p1,
2952 2959 p2,
2953 2960 flags,
2954 2961 cachedelta,
2955 2962 deltacomputer=deltacomputer,
2956 2963 sidedata=sidedata,
2957 2964 )
2958 2965
2959 2966 def compress(self, data):
2960 2967 return self._inner.compress(data)
2961 2968
2962 2969 def decompress(self, data):
2963 2970 return self._inner.decompress(data)
2964 2971
2965 2972 def _addrevision(
2966 2973 self,
2967 2974 node,
2968 2975 rawtext,
2969 2976 transaction,
2970 2977 link,
2971 2978 p1,
2972 2979 p2,
2973 2980 flags,
2974 2981 cachedelta,
2975 2982 alwayscache=False,
2976 2983 deltacomputer=None,
2977 2984 sidedata=None,
2978 2985 ):
2979 2986 """internal function to add revisions to the log
2980 2987
2981 2988 see addrevision for argument descriptions.
2982 2989
2983 2990 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2984 2991
2985 2992 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2986 2993 be used.
2987 2994
2988 2995 invariants:
2989 2996 - rawtext is optional (can be None); if not set, cachedelta must be set.
2990 2997 if both are set, they must correspond to each other.
2991 2998 """
2992 2999 if node == self.nullid:
2993 3000 raise error.RevlogError(
2994 3001 _(b"%s: attempt to add null revision") % self.display_id
2995 3002 )
2996 3003 if (
2997 3004 node == self.nodeconstants.wdirid
2998 3005 or node in self.nodeconstants.wdirfilenodeids
2999 3006 ):
3000 3007 raise error.RevlogError(
3001 3008 _(b"%s: attempt to add wdir revision") % self.display_id
3002 3009 )
3003 3010 if self._inner._writinghandles is None:
3004 3011 msg = b'adding revision outside `revlog._writing` context'
3005 3012 raise error.ProgrammingError(msg)
3006 3013
3007 3014 btext = [rawtext]
3008 3015
3009 3016 curr = len(self)
3010 3017 prev = curr - 1
3011 3018
3012 3019 offset = self._get_data_offset(prev)
3013 3020
3014 3021 if self._concurrencychecker:
3015 3022 ifh, dfh, sdfh = self._inner._writinghandles
3016 3023 # XXX no checking for the sidedata file
3017 3024 if self._inline:
3018 3025 # offset is "as if" it were in the .d file, so we need to add on
3019 3026 # the size of the entry metadata.
3020 3027 self._concurrencychecker(
3021 3028 ifh, self._indexfile, offset + curr * self.index.entry_size
3022 3029 )
3023 3030 else:
3024 3031 # Entries in the .i are a consistent size.
3025 3032 self._concurrencychecker(
3026 3033 ifh, self._indexfile, curr * self.index.entry_size
3027 3034 )
3028 3035 self._concurrencychecker(dfh, self._datafile, offset)
3029 3036
3030 3037 p1r, p2r = self.rev(p1), self.rev(p2)
3031 3038
3032 3039 # full versions are inserted when the needed deltas
3033 3040 # become comparable to the uncompressed text
3034 3041 if rawtext is None:
3035 3042 # need rawtext size, before changed by flag processors, which is
3036 3043 # the non-raw size. use revlog explicitly to avoid filelog's extra
3037 3044 # logic that might remove metadata size.
3038 3045 textlen = mdiff.patchedsize(
3039 3046 revlog.size(self, cachedelta[0]), cachedelta[1]
3040 3047 )
3041 3048 else:
3042 3049 textlen = len(rawtext)
3043 3050
3044 3051 if deltacomputer is None:
3045 3052 write_debug = None
3046 3053 if self.delta_config.debug_delta:
3047 3054 write_debug = transaction._report
3048 3055 deltacomputer = deltautil.deltacomputer(
3049 3056 self, write_debug=write_debug
3050 3057 )
3051 3058
3052 3059 if cachedelta is not None and len(cachedelta) == 2:
3053 3060 # If the cached delta has no information about how it should be
3054 3061 # reused, add the default reuse instruction according to the
3055 3062 # revlog's configuration.
3056 3063 if (
3057 3064 self.delta_config.general_delta
3058 3065 and self.delta_config.lazy_delta_base
3059 3066 ):
3060 3067 delta_base_reuse = DELTA_BASE_REUSE_TRY
3061 3068 else:
3062 3069 delta_base_reuse = DELTA_BASE_REUSE_NO
3063 3070 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3064 3071
3065 3072 revinfo = revlogutils.revisioninfo(
3066 3073 node,
3067 3074 p1,
3068 3075 p2,
3069 3076 btext,
3070 3077 textlen,
3071 3078 cachedelta,
3072 3079 flags,
3073 3080 )
3074 3081
3075 3082 deltainfo = deltacomputer.finddeltainfo(revinfo)
3076 3083
3077 3084 compression_mode = COMP_MODE_INLINE
3078 3085 if self._docket is not None:
3079 3086 default_comp = self._docket.default_compression_header
3080 3087 r = deltautil.delta_compression(default_comp, deltainfo)
3081 3088 compression_mode, deltainfo = r
3082 3089
3083 3090 sidedata_compression_mode = COMP_MODE_INLINE
3084 3091 if sidedata and self.feature_config.has_side_data:
3085 3092 sidedata_compression_mode = COMP_MODE_PLAIN
3086 3093 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3087 3094 sidedata_offset = self._docket.sidedata_end
3088 3095 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3089 3096 if (
3090 3097 h != b'u'
3091 3098 and comp_sidedata[0:1] != b'\0'
3092 3099 and len(comp_sidedata) < len(serialized_sidedata)
3093 3100 ):
3094 3101 assert not h
3095 3102 if (
3096 3103 comp_sidedata[0:1]
3097 3104 == self._docket.default_compression_header
3098 3105 ):
3099 3106 sidedata_compression_mode = COMP_MODE_DEFAULT
3100 3107 serialized_sidedata = comp_sidedata
3101 3108 else:
3102 3109 sidedata_compression_mode = COMP_MODE_INLINE
3103 3110 serialized_sidedata = comp_sidedata
3104 3111 else:
3105 3112 serialized_sidedata = b""
3106 3113 # Don't store the offset if the sidedata is empty, that way
3107 3114 # we can easily detect empty sidedata and they will be no different
3108 3115 # than ones we manually add.
3109 3116 sidedata_offset = 0
3110 3117
3111 3118 rank = RANK_UNKNOWN
3112 3119 if self.feature_config.compute_rank:
3113 3120 if (p1r, p2r) == (nullrev, nullrev):
3114 3121 rank = 1
3115 3122 elif p1r != nullrev and p2r == nullrev:
3116 3123 rank = 1 + self.fast_rank(p1r)
3117 3124 elif p1r == nullrev and p2r != nullrev:
3118 3125 rank = 1 + self.fast_rank(p2r)
3119 3126 else: # merge node
3120 3127 if rustdagop is not None and self.index.rust_ext_compat:
3121 3128 rank = rustdagop.rank(self.index, p1r, p2r)
3122 3129 else:
3123 3130 pmin, pmax = sorted((p1r, p2r))
3124 3131 rank = 1 + self.fast_rank(pmax)
3125 3132 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3126 3133
3127 3134 e = revlogutils.entry(
3128 3135 flags=flags,
3129 3136 data_offset=offset,
3130 3137 data_compressed_length=deltainfo.deltalen,
3131 3138 data_uncompressed_length=textlen,
3132 3139 data_compression_mode=compression_mode,
3133 3140 data_delta_base=deltainfo.base,
3134 3141 link_rev=link,
3135 3142 parent_rev_1=p1r,
3136 3143 parent_rev_2=p2r,
3137 3144 node_id=node,
3138 3145 sidedata_offset=sidedata_offset,
3139 3146 sidedata_compressed_length=len(serialized_sidedata),
3140 3147 sidedata_compression_mode=sidedata_compression_mode,
3141 3148 rank=rank,
3142 3149 )
3143 3150
3144 3151 self.index.append(e)
3145 3152 entry = self.index.entry_binary(curr)
3146 3153 if curr == 0 and self._docket is None:
3147 3154 header = self._format_flags | self._format_version
3148 3155 header = self.index.pack_header(header)
3149 3156 entry = header + entry
3150 3157 self._writeentry(
3151 3158 transaction,
3152 3159 entry,
3153 3160 deltainfo.data,
3154 3161 link,
3155 3162 offset,
3156 3163 serialized_sidedata,
3157 3164 sidedata_offset,
3158 3165 )
3159 3166
3160 3167 rawtext = btext[0]
3161 3168
3162 3169 if alwayscache and rawtext is None:
3163 3170 rawtext = deltacomputer.buildtext(revinfo)
3164 3171
3165 3172 if type(rawtext) == bytes: # only accept immutable objects
3166 3173 self._inner._revisioncache = (node, curr, rawtext)
3167 3174 self._chainbasecache[curr] = deltainfo.chainbase
3168 3175 return curr
3169 3176
3170 3177 def _get_data_offset(self, prev):
3171 3178 """Returns the current offset in the (in-transaction) data file.
3172 3179 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3173 3180 file to store that information: since sidedata can be rewritten to the
3174 3181 end of the data file within a transaction, you can have cases where, for
3175 3182 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3176 3183 to `n - 1`'s sidedata being written after `n`'s data.
3177 3184
3178 3185 TODO cache this in a docket file before getting out of experimental."""
3179 3186 if self._docket is None:
3180 3187 return self.end(prev)
3181 3188 else:
3182 3189 return self._docket.data_end
3183 3190
3184 3191 def _writeentry(
3185 3192 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3186 3193 ):
3187 3194 # Files opened in a+ mode have inconsistent behavior on various
3188 3195 # platforms. Windows requires that a file positioning call be made
3189 3196 # when the file handle transitions between reads and writes. See
3190 3197 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3191 3198 # platforms, Python or the platform itself can be buggy. Some versions
3192 3199 # of Solaris have been observed to not append at the end of the file
3193 3200 # if the file was seeked to before the end. See issue4943 for more.
3194 3201 #
3195 3202 # We work around this issue by inserting a seek() before writing.
3196 3203 # Note: This is likely not necessary on Python 3. However, because
3197 3204 # the file handle is reused for reads and may be seeked there, we need
3198 3205 # to be careful before changing this.
3199 3206 if self._inner._writinghandles is None:
3200 3207 msg = b'adding revision outside `revlog._writing` context'
3201 3208 raise error.ProgrammingError(msg)
3202 3209 ifh, dfh, sdfh = self._inner._writinghandles
3203 3210 if self._docket is None:
3204 3211 ifh.seek(0, os.SEEK_END)
3205 3212 else:
3206 3213 ifh.seek(self._docket.index_end, os.SEEK_SET)
3207 3214 if dfh:
3208 3215 if self._docket is None:
3209 3216 dfh.seek(0, os.SEEK_END)
3210 3217 else:
3211 3218 dfh.seek(self._docket.data_end, os.SEEK_SET)
3212 3219 if sdfh:
3213 3220 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3214 3221
3215 3222 curr = len(self) - 1
3216 3223 if not self._inline:
3217 3224 transaction.add(self._datafile, offset)
3218 3225 if self._sidedatafile:
3219 3226 transaction.add(self._sidedatafile, sidedata_offset)
3220 3227 transaction.add(self._indexfile, curr * len(entry))
3221 3228 if data[0]:
3222 3229 dfh.write(data[0])
3223 3230 dfh.write(data[1])
3224 3231 if sidedata:
3225 3232 sdfh.write(sidedata)
3226 3233 ifh.write(entry)
3227 3234 else:
3228 3235 offset += curr * self.index.entry_size
3229 3236 transaction.add(self._indexfile, offset)
3230 3237 ifh.write(entry)
3231 3238 ifh.write(data[0])
3232 3239 ifh.write(data[1])
3233 3240 assert not sidedata
3234 3241 self._enforceinlinesize(transaction)
3235 3242 if self._docket is not None:
3236 3243 # revlog-v2 always has 3 writing handles, help Pytype
3237 3244 wh1 = self._inner._writinghandles[0]
3238 3245 wh2 = self._inner._writinghandles[1]
3239 3246 wh3 = self._inner._writinghandles[2]
3240 3247 assert wh1 is not None
3241 3248 assert wh2 is not None
3242 3249 assert wh3 is not None
3243 3250 self._docket.index_end = wh1.tell()
3244 3251 self._docket.data_end = wh2.tell()
3245 3252 self._docket.sidedata_end = wh3.tell()
3246 3253
3247 3254 nodemaputil.setup_persistent_nodemap(transaction, self)
3248 3255
3249 3256 def addgroup(
3250 3257 self,
3251 3258 deltas,
3252 3259 linkmapper,
3253 3260 transaction,
3254 3261 alwayscache=False,
3255 3262 addrevisioncb=None,
3256 3263 duplicaterevisioncb=None,
3257 3264 debug_info=None,
3258 3265 delta_base_reuse_policy=None,
3259 3266 ):
3260 3267 """
3261 3268 add a delta group
3262 3269
3263 3270 given a set of deltas, add them to the revision log. the
3264 3271 first delta is against its parent, which should be in our
3265 3272 log, the rest are against the previous delta.
3266 3273
3267 3274 If ``addrevisioncb`` is defined, it will be called with arguments of
3268 3275 this revlog and the node that was added.
3269 3276 """
3270 3277
3271 3278 if self._adding_group:
3272 3279 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3273 3280
3274 3281 # read the default delta-base reuse policy from revlog config if the
3275 3282 # group did not specify one.
3276 3283 if delta_base_reuse_policy is None:
3277 3284 if (
3278 3285 self.delta_config.general_delta
3279 3286 and self.delta_config.lazy_delta_base
3280 3287 ):
3281 3288 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3282 3289 else:
3283 3290 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3284 3291
3285 3292 self._adding_group = True
3286 3293 empty = True
3287 3294 try:
3288 3295 with self._writing(transaction):
3289 3296 write_debug = None
3290 3297 if self.delta_config.debug_delta:
3291 3298 write_debug = transaction._report
3292 3299 deltacomputer = deltautil.deltacomputer(
3293 3300 self,
3294 3301 write_debug=write_debug,
3295 3302 debug_info=debug_info,
3296 3303 )
3297 3304 # loop through our set of deltas
3298 3305 for data in deltas:
3299 3306 (
3300 3307 node,
3301 3308 p1,
3302 3309 p2,
3303 3310 linknode,
3304 3311 deltabase,
3305 3312 delta,
3306 3313 flags,
3307 3314 sidedata,
3308 3315 ) = data
3309 3316 link = linkmapper(linknode)
3310 3317 flags = flags or REVIDX_DEFAULT_FLAGS
3311 3318
3312 3319 rev = self.index.get_rev(node)
3313 3320 if rev is not None:
3314 3321 # this can happen if two branches make the same change
3315 3322 self._nodeduplicatecallback(transaction, rev)
3316 3323 if duplicaterevisioncb:
3317 3324 duplicaterevisioncb(self, rev)
3318 3325 empty = False
3319 3326 continue
3320 3327
3321 3328 for p in (p1, p2):
3322 3329 if not self.index.has_node(p):
3323 3330 raise error.LookupError(
3324 3331 p, self.radix, _(b'unknown parent')
3325 3332 )
3326 3333
3327 3334 if not self.index.has_node(deltabase):
3328 3335 raise error.LookupError(
3329 3336 deltabase, self.display_id, _(b'unknown delta base')
3330 3337 )
3331 3338
3332 3339 baserev = self.rev(deltabase)
3333 3340
3334 3341 if baserev != nullrev and self.iscensored(baserev):
3335 3342 # if base is censored, delta must be full replacement in a
3336 3343 # single patch operation
3337 3344 hlen = struct.calcsize(b">lll")
3338 3345 oldlen = self.rawsize(baserev)
3339 3346 newlen = len(delta) - hlen
3340 3347 if delta[:hlen] != mdiff.replacediffheader(
3341 3348 oldlen, newlen
3342 3349 ):
3343 3350 raise error.CensoredBaseError(
3344 3351 self.display_id, self.node(baserev)
3345 3352 )
3346 3353
3347 3354 if not flags and self._peek_iscensored(baserev, delta):
3348 3355 flags |= REVIDX_ISCENSORED
3349 3356
3350 3357 # We assume consumers of addrevisioncb will want to retrieve
3351 3358 # the added revision, which will require a call to
3352 3359 # revision(). revision() will fast path if there is a cache
3353 3360 # hit. So, we tell _addrevision() to always cache in this case.
3354 3361 # We're only using addgroup() in the context of changegroup
3355 3362 # generation so the revision data can always be handled as raw
3356 3363 # by the flagprocessor.
3357 3364 rev = self._addrevision(
3358 3365 node,
3359 3366 None,
3360 3367 transaction,
3361 3368 link,
3362 3369 p1,
3363 3370 p2,
3364 3371 flags,
3365 3372 (baserev, delta, delta_base_reuse_policy),
3366 3373 alwayscache=alwayscache,
3367 3374 deltacomputer=deltacomputer,
3368 3375 sidedata=sidedata,
3369 3376 )
3370 3377
3371 3378 if addrevisioncb:
3372 3379 addrevisioncb(self, rev)
3373 3380 empty = False
3374 3381 finally:
3375 3382 self._adding_group = False
3376 3383 return not empty
3377 3384
3378 3385 def iscensored(self, rev):
3379 3386 """Check if a file revision is censored."""
3380 3387 if not self.feature_config.censorable:
3381 3388 return False
3382 3389
3383 3390 return self.flags(rev) & REVIDX_ISCENSORED
3384 3391
3385 3392 def _peek_iscensored(self, baserev, delta):
3386 3393 """Quickly check if a delta produces a censored revision."""
3387 3394 if not self.feature_config.censorable:
3388 3395 return False
3389 3396
3390 3397 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3391 3398
3392 3399 def getstrippoint(self, minlink):
3393 3400 """find the minimum rev that must be stripped to strip the linkrev
3394 3401
3395 3402 Returns a tuple containing the minimum rev and a set of all revs that
3396 3403 have linkrevs that will be broken by this strip.
3397 3404 """
3398 3405 return storageutil.resolvestripinfo(
3399 3406 minlink,
3400 3407 len(self) - 1,
3401 3408 self.headrevs(),
3402 3409 self.linkrev,
3403 3410 self.parentrevs,
3404 3411 )
3405 3412
3406 3413 def strip(self, minlink, transaction):
3407 3414 """truncate the revlog on the first revision with a linkrev >= minlink
3408 3415
3409 3416 This function is called when we're stripping revision minlink and
3410 3417 its descendants from the repository.
3411 3418
3412 3419 We have to remove all revisions with linkrev >= minlink, because
3413 3420 the equivalent changelog revisions will be renumbered after the
3414 3421 strip.
3415 3422
3416 3423 So we truncate the revlog on the first of these revisions, and
3417 3424 trust that the caller has saved the revisions that shouldn't be
3418 3425 removed and that it'll re-add them after this truncation.
3419 3426 """
3420 3427 if len(self) == 0:
3421 3428 return
3422 3429
3423 3430 rev, _ = self.getstrippoint(minlink)
3424 3431 if rev == len(self):
3425 3432 return
3426 3433
3427 3434 # first truncate the files on disk
3428 3435 data_end = self.start(rev)
3429 3436 if not self._inline:
3430 3437 transaction.add(self._datafile, data_end)
3431 3438 end = rev * self.index.entry_size
3432 3439 else:
3433 3440 end = data_end + (rev * self.index.entry_size)
3434 3441
3435 3442 if self._sidedatafile:
3436 3443 sidedata_end = self.sidedata_cut_off(rev)
3437 3444 transaction.add(self._sidedatafile, sidedata_end)
3438 3445
3439 3446 transaction.add(self._indexfile, end)
3440 3447 if self._docket is not None:
3441 3448 # XXX we could, leverage the docket while stripping. However it is
3442 3449 # not powerfull enough at the time of this comment
3443 3450 self._docket.index_end = end
3444 3451 self._docket.data_end = data_end
3445 3452 self._docket.sidedata_end = sidedata_end
3446 3453 self._docket.write(transaction, stripping=True)
3447 3454
3448 3455 # then reset internal state in memory to forget those revisions
3449 3456 self._inner._revisioncache = None
3450 3457 self._chaininfocache = util.lrucachedict(500)
3451 3458 self._inner._segmentfile.clear_cache()
3452 3459 self._inner._segmentfile_sidedata.clear_cache()
3453 3460
3454 3461 del self.index[rev:-1]
3455 3462
3456 3463 def checksize(self):
3457 3464 """Check size of index and data files
3458 3465
3459 3466 return a (dd, di) tuple.
3460 3467 - dd: extra bytes for the "data" file
3461 3468 - di: extra bytes for the "index" file
3462 3469
3463 3470 A healthy revlog will return (0, 0).
3464 3471 """
3465 3472 expected = 0
3466 3473 if len(self):
3467 3474 expected = max(0, self.end(len(self) - 1))
3468 3475
3469 3476 try:
3470 3477 with self._datafp() as f:
3471 3478 f.seek(0, io.SEEK_END)
3472 3479 actual = f.tell()
3473 3480 dd = actual - expected
3474 3481 except FileNotFoundError:
3475 3482 dd = 0
3476 3483
3477 3484 try:
3478 3485 f = self.opener(self._indexfile)
3479 3486 f.seek(0, io.SEEK_END)
3480 3487 actual = f.tell()
3481 3488 f.close()
3482 3489 s = self.index.entry_size
3483 3490 i = max(0, actual // s)
3484 3491 di = actual - (i * s)
3485 3492 if self._inline:
3486 3493 databytes = 0
3487 3494 for r in self:
3488 3495 databytes += max(0, self.length(r))
3489 3496 dd = 0
3490 3497 di = actual - len(self) * s - databytes
3491 3498 except FileNotFoundError:
3492 3499 di = 0
3493 3500
3494 3501 return (dd, di)
3495 3502
3496 3503 def files(self):
3497 3504 """return list of files that compose this revlog"""
3498 3505 res = [self._indexfile]
3499 3506 if self._docket_file is None:
3500 3507 if not self._inline:
3501 3508 res.append(self._datafile)
3502 3509 else:
3503 3510 res.append(self._docket_file)
3504 3511 res.extend(self._docket.old_index_filepaths(include_empty=False))
3505 3512 if self._docket.data_end:
3506 3513 res.append(self._datafile)
3507 3514 res.extend(self._docket.old_data_filepaths(include_empty=False))
3508 3515 if self._docket.sidedata_end:
3509 3516 res.append(self._sidedatafile)
3510 3517 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3511 3518 return res
3512 3519
3513 3520 def emitrevisions(
3514 3521 self,
3515 3522 nodes,
3516 3523 nodesorder=None,
3517 3524 revisiondata=False,
3518 3525 assumehaveparentrevisions=False,
3519 3526 deltamode=repository.CG_DELTAMODE_STD,
3520 3527 sidedata_helpers=None,
3521 3528 debug_info=None,
3522 3529 ):
3523 3530 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3524 3531 raise error.ProgrammingError(
3525 3532 b'unhandled value for nodesorder: %s' % nodesorder
3526 3533 )
3527 3534
3528 3535 if nodesorder is None and not self.delta_config.general_delta:
3529 3536 nodesorder = b'storage'
3530 3537
3531 3538 if (
3532 3539 not self._storedeltachains
3533 3540 and deltamode != repository.CG_DELTAMODE_PREV
3534 3541 ):
3535 3542 deltamode = repository.CG_DELTAMODE_FULL
3536 3543
3537 3544 return storageutil.emitrevisions(
3538 3545 self,
3539 3546 nodes,
3540 3547 nodesorder,
3541 3548 revlogrevisiondelta,
3542 3549 deltaparentfn=self.deltaparent,
3543 3550 candeltafn=self._candelta,
3544 3551 rawsizefn=self.rawsize,
3545 3552 revdifffn=self.revdiff,
3546 3553 flagsfn=self.flags,
3547 3554 deltamode=deltamode,
3548 3555 revisiondata=revisiondata,
3549 3556 assumehaveparentrevisions=assumehaveparentrevisions,
3550 3557 sidedata_helpers=sidedata_helpers,
3551 3558 debug_info=debug_info,
3552 3559 )
3553 3560
3554 3561 DELTAREUSEALWAYS = b'always'
3555 3562 DELTAREUSESAMEREVS = b'samerevs'
3556 3563 DELTAREUSENEVER = b'never'
3557 3564
3558 3565 DELTAREUSEFULLADD = b'fulladd'
3559 3566
3560 3567 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3561 3568
3562 3569 def clone(
3563 3570 self,
3564 3571 tr,
3565 3572 destrevlog,
3566 3573 addrevisioncb=None,
3567 3574 deltareuse=DELTAREUSESAMEREVS,
3568 3575 forcedeltabothparents=None,
3569 3576 sidedata_helpers=None,
3570 3577 ):
3571 3578 """Copy this revlog to another, possibly with format changes.
3572 3579
3573 3580 The destination revlog will contain the same revisions and nodes.
3574 3581 However, it may not be bit-for-bit identical due to e.g. delta encoding
3575 3582 differences.
3576 3583
3577 3584 The ``deltareuse`` argument control how deltas from the existing revlog
3578 3585 are preserved in the destination revlog. The argument can have the
3579 3586 following values:
3580 3587
3581 3588 DELTAREUSEALWAYS
3582 3589 Deltas will always be reused (if possible), even if the destination
3583 3590 revlog would not select the same revisions for the delta. This is the
3584 3591 fastest mode of operation.
3585 3592 DELTAREUSESAMEREVS
3586 3593 Deltas will be reused if the destination revlog would pick the same
3587 3594 revisions for the delta. This mode strikes a balance between speed
3588 3595 and optimization.
3589 3596 DELTAREUSENEVER
3590 3597 Deltas will never be reused. This is the slowest mode of execution.
3591 3598 This mode can be used to recompute deltas (e.g. if the diff/delta
3592 3599 algorithm changes).
3593 3600 DELTAREUSEFULLADD
3594 3601 Revision will be re-added as if their were new content. This is
3595 3602 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3596 3603 eg: large file detection and handling.
3597 3604
3598 3605 Delta computation can be slow, so the choice of delta reuse policy can
3599 3606 significantly affect run time.
3600 3607
3601 3608 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3602 3609 two extremes. Deltas will be reused if they are appropriate. But if the
3603 3610 delta could choose a better revision, it will do so. This means if you
3604 3611 are converting a non-generaldelta revlog to a generaldelta revlog,
3605 3612 deltas will be recomputed if the delta's parent isn't a parent of the
3606 3613 revision.
3607 3614
3608 3615 In addition to the delta policy, the ``forcedeltabothparents``
3609 3616 argument controls whether to force compute deltas against both parents
3610 3617 for merges. By default, the current default is used.
3611 3618
3612 3619 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3613 3620 `sidedata_helpers`.
3614 3621 """
3615 3622 if deltareuse not in self.DELTAREUSEALL:
3616 3623 raise ValueError(
3617 3624 _(b'value for deltareuse invalid: %s') % deltareuse
3618 3625 )
3619 3626
3620 3627 if len(destrevlog):
3621 3628 raise ValueError(_(b'destination revlog is not empty'))
3622 3629
3623 3630 if getattr(self, 'filteredrevs', None):
3624 3631 raise ValueError(_(b'source revlog has filtered revisions'))
3625 3632 if getattr(destrevlog, 'filteredrevs', None):
3626 3633 raise ValueError(_(b'destination revlog has filtered revisions'))
3627 3634
3628 3635 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3629 3636 # if possible.
3630 3637 old_delta_config = destrevlog.delta_config
3631 3638 destrevlog.delta_config = destrevlog.delta_config.copy()
3632 3639
3633 3640 try:
3634 3641 if deltareuse == self.DELTAREUSEALWAYS:
3635 3642 destrevlog.delta_config.lazy_delta_base = True
3636 3643 destrevlog.delta_config.lazy_delta = True
3637 3644 elif deltareuse == self.DELTAREUSESAMEREVS:
3638 3645 destrevlog.delta_config.lazy_delta_base = False
3639 3646 destrevlog.delta_config.lazy_delta = True
3640 3647 elif deltareuse == self.DELTAREUSENEVER:
3641 3648 destrevlog.delta_config.lazy_delta_base = False
3642 3649 destrevlog.delta_config.lazy_delta = False
3643 3650
3644 3651 delta_both_parents = (
3645 3652 forcedeltabothparents or old_delta_config.delta_both_parents
3646 3653 )
3647 3654 destrevlog.delta_config.delta_both_parents = delta_both_parents
3648 3655
3649 3656 with self.reading(), destrevlog._writing(tr):
3650 3657 self._clone(
3651 3658 tr,
3652 3659 destrevlog,
3653 3660 addrevisioncb,
3654 3661 deltareuse,
3655 3662 forcedeltabothparents,
3656 3663 sidedata_helpers,
3657 3664 )
3658 3665
3659 3666 finally:
3660 3667 destrevlog.delta_config = old_delta_config
3661 3668
3662 3669 def _clone(
3663 3670 self,
3664 3671 tr,
3665 3672 destrevlog,
3666 3673 addrevisioncb,
3667 3674 deltareuse,
3668 3675 forcedeltabothparents,
3669 3676 sidedata_helpers,
3670 3677 ):
3671 3678 """perform the core duty of `revlog.clone` after parameter processing"""
3672 3679 write_debug = None
3673 3680 if self.delta_config.debug_delta:
3674 3681 write_debug = tr._report
3675 3682 deltacomputer = deltautil.deltacomputer(
3676 3683 destrevlog,
3677 3684 write_debug=write_debug,
3678 3685 )
3679 3686 index = self.index
3680 3687 for rev in self:
3681 3688 entry = index[rev]
3682 3689
3683 3690 # Some classes override linkrev to take filtered revs into
3684 3691 # account. Use raw entry from index.
3685 3692 flags = entry[0] & 0xFFFF
3686 3693 linkrev = entry[4]
3687 3694 p1 = index[entry[5]][7]
3688 3695 p2 = index[entry[6]][7]
3689 3696 node = entry[7]
3690 3697
3691 3698 # (Possibly) reuse the delta from the revlog if allowed and
3692 3699 # the revlog chunk is a delta.
3693 3700 cachedelta = None
3694 3701 rawtext = None
3695 3702 if deltareuse == self.DELTAREUSEFULLADD:
3696 3703 text = self._revisiondata(rev)
3697 3704 sidedata = self.sidedata(rev)
3698 3705
3699 3706 if sidedata_helpers is not None:
3700 3707 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3701 3708 self, sidedata_helpers, sidedata, rev
3702 3709 )
3703 3710 flags = flags | new_flags[0] & ~new_flags[1]
3704 3711
3705 3712 destrevlog.addrevision(
3706 3713 text,
3707 3714 tr,
3708 3715 linkrev,
3709 3716 p1,
3710 3717 p2,
3711 3718 cachedelta=cachedelta,
3712 3719 node=node,
3713 3720 flags=flags,
3714 3721 deltacomputer=deltacomputer,
3715 3722 sidedata=sidedata,
3716 3723 )
3717 3724 else:
3718 3725 if destrevlog.delta_config.lazy_delta:
3719 3726 dp = self.deltaparent(rev)
3720 3727 if dp != nullrev:
3721 3728 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3722 3729
3723 3730 sidedata = None
3724 3731 if not cachedelta:
3725 3732 try:
3726 3733 rawtext = self._revisiondata(rev)
3727 3734 except error.CensoredNodeError as censored:
3728 3735 assert flags & REVIDX_ISCENSORED
3729 3736 rawtext = censored.tombstone
3730 3737 sidedata = self.sidedata(rev)
3731 3738 if sidedata is None:
3732 3739 sidedata = self.sidedata(rev)
3733 3740
3734 3741 if sidedata_helpers is not None:
3735 3742 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3736 3743 self, sidedata_helpers, sidedata, rev
3737 3744 )
3738 3745 flags = flags | new_flags[0] & ~new_flags[1]
3739 3746
3740 3747 destrevlog._addrevision(
3741 3748 node,
3742 3749 rawtext,
3743 3750 tr,
3744 3751 linkrev,
3745 3752 p1,
3746 3753 p2,
3747 3754 flags,
3748 3755 cachedelta,
3749 3756 deltacomputer=deltacomputer,
3750 3757 sidedata=sidedata,
3751 3758 )
3752 3759
3753 3760 if addrevisioncb:
3754 3761 addrevisioncb(self, rev, node)
3755 3762
3756 3763 def censorrevision(self, tr, censornode, tombstone=b''):
3757 3764 if self._format_version == REVLOGV0:
3758 3765 raise error.RevlogError(
3759 3766 _(b'cannot censor with version %d revlogs')
3760 3767 % self._format_version
3761 3768 )
3762 3769 elif self._format_version == REVLOGV1:
3763 3770 rewrite.v1_censor(self, tr, censornode, tombstone)
3764 3771 else:
3765 3772 rewrite.v2_censor(self, tr, censornode, tombstone)
3766 3773
3767 3774 def verifyintegrity(self, state):
3768 3775 """Verifies the integrity of the revlog.
3769 3776
3770 3777 Yields ``revlogproblem`` instances describing problems that are
3771 3778 found.
3772 3779 """
3773 3780 dd, di = self.checksize()
3774 3781 if dd:
3775 3782 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3776 3783 if di:
3777 3784 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3778 3785
3779 3786 version = self._format_version
3780 3787
3781 3788 # The verifier tells us what version revlog we should be.
3782 3789 if version != state[b'expectedversion']:
3783 3790 yield revlogproblem(
3784 3791 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3785 3792 % (self.display_id, version, state[b'expectedversion'])
3786 3793 )
3787 3794
3788 3795 state[b'skipread'] = set()
3789 3796 state[b'safe_renamed'] = set()
3790 3797
3791 3798 for rev in self:
3792 3799 node = self.node(rev)
3793 3800
3794 3801 # Verify contents. 4 cases to care about:
3795 3802 #
3796 3803 # common: the most common case
3797 3804 # rename: with a rename
3798 3805 # meta: file content starts with b'\1\n', the metadata
3799 3806 # header defined in filelog.py, but without a rename
3800 3807 # ext: content stored externally
3801 3808 #
3802 3809 # More formally, their differences are shown below:
3803 3810 #
3804 3811 # | common | rename | meta | ext
3805 3812 # -------------------------------------------------------
3806 3813 # flags() | 0 | 0 | 0 | not 0
3807 3814 # renamed() | False | True | False | ?
3808 3815 # rawtext[0:2]=='\1\n'| False | True | True | ?
3809 3816 #
3810 3817 # "rawtext" means the raw text stored in revlog data, which
3811 3818 # could be retrieved by "rawdata(rev)". "text"
3812 3819 # mentioned below is "revision(rev)".
3813 3820 #
3814 3821 # There are 3 different lengths stored physically:
3815 3822 # 1. L1: rawsize, stored in revlog index
3816 3823 # 2. L2: len(rawtext), stored in revlog data
3817 3824 # 3. L3: len(text), stored in revlog data if flags==0, or
3818 3825 # possibly somewhere else if flags!=0
3819 3826 #
3820 3827 # L1 should be equal to L2. L3 could be different from them.
3821 3828 # "text" may or may not affect commit hash depending on flag
3822 3829 # processors (see flagutil.addflagprocessor).
3823 3830 #
3824 3831 # | common | rename | meta | ext
3825 3832 # -------------------------------------------------
3826 3833 # rawsize() | L1 | L1 | L1 | L1
3827 3834 # size() | L1 | L2-LM | L1(*) | L1 (?)
3828 3835 # len(rawtext) | L2 | L2 | L2 | L2
3829 3836 # len(text) | L2 | L2 | L2 | L3
3830 3837 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3831 3838 #
3832 3839 # LM: length of metadata, depending on rawtext
3833 3840 # (*): not ideal, see comment in filelog.size
3834 3841 # (?): could be "- len(meta)" if the resolved content has
3835 3842 # rename metadata
3836 3843 #
3837 3844 # Checks needed to be done:
3838 3845 # 1. length check: L1 == L2, in all cases.
3839 3846 # 2. hash check: depending on flag processor, we may need to
3840 3847 # use either "text" (external), or "rawtext" (in revlog).
3841 3848
3842 3849 try:
3843 3850 skipflags = state.get(b'skipflags', 0)
3844 3851 if skipflags:
3845 3852 skipflags &= self.flags(rev)
3846 3853
3847 3854 _verify_revision(self, skipflags, state, node)
3848 3855
3849 3856 l1 = self.rawsize(rev)
3850 3857 l2 = len(self.rawdata(node))
3851 3858
3852 3859 if l1 != l2:
3853 3860 yield revlogproblem(
3854 3861 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3855 3862 node=node,
3856 3863 )
3857 3864
3858 3865 except error.CensoredNodeError:
3859 3866 if state[b'erroroncensored']:
3860 3867 yield revlogproblem(
3861 3868 error=_(b'censored file data'), node=node
3862 3869 )
3863 3870 state[b'skipread'].add(node)
3864 3871 except Exception as e:
3865 3872 yield revlogproblem(
3866 3873 error=_(b'unpacking %s: %s')
3867 3874 % (short(node), stringutil.forcebytestr(e)),
3868 3875 node=node,
3869 3876 )
3870 3877 state[b'skipread'].add(node)
3871 3878
3872 3879 def storageinfo(
3873 3880 self,
3874 3881 exclusivefiles=False,
3875 3882 sharedfiles=False,
3876 3883 revisionscount=False,
3877 3884 trackedsize=False,
3878 3885 storedsize=False,
3879 3886 ):
3880 3887 d = {}
3881 3888
3882 3889 if exclusivefiles:
3883 3890 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3884 3891 if not self._inline:
3885 3892 d[b'exclusivefiles'].append((self.opener, self._datafile))
3886 3893
3887 3894 if sharedfiles:
3888 3895 d[b'sharedfiles'] = []
3889 3896
3890 3897 if revisionscount:
3891 3898 d[b'revisionscount'] = len(self)
3892 3899
3893 3900 if trackedsize:
3894 3901 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3895 3902
3896 3903 if storedsize:
3897 3904 d[b'storedsize'] = sum(
3898 3905 self.opener.stat(path).st_size for path in self.files()
3899 3906 )
3900 3907
3901 3908 return d
3902 3909
3903 3910 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3904 3911 if not self.feature_config.has_side_data:
3905 3912 return
3906 3913 # revlog formats with sidedata support does not support inline
3907 3914 assert not self._inline
3908 3915 if not helpers[1] and not helpers[2]:
3909 3916 # Nothing to generate or remove
3910 3917 return
3911 3918
3912 3919 new_entries = []
3913 3920 # append the new sidedata
3914 3921 with self._writing(transaction):
3915 3922 ifh, dfh, sdfh = self._inner._writinghandles
3916 3923 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3917 3924
3918 3925 current_offset = sdfh.tell()
3919 3926 for rev in range(startrev, endrev + 1):
3920 3927 entry = self.index[rev]
3921 3928 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3922 3929 store=self,
3923 3930 sidedata_helpers=helpers,
3924 3931 sidedata={},
3925 3932 rev=rev,
3926 3933 )
3927 3934
3928 3935 serialized_sidedata = sidedatautil.serialize_sidedata(
3929 3936 new_sidedata
3930 3937 )
3931 3938
3932 3939 sidedata_compression_mode = COMP_MODE_INLINE
3933 3940 if serialized_sidedata and self.feature_config.has_side_data:
3934 3941 sidedata_compression_mode = COMP_MODE_PLAIN
3935 3942 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3936 3943 if (
3937 3944 h != b'u'
3938 3945 and comp_sidedata[0] != b'\0'
3939 3946 and len(comp_sidedata) < len(serialized_sidedata)
3940 3947 ):
3941 3948 assert not h
3942 3949 if (
3943 3950 comp_sidedata[0]
3944 3951 == self._docket.default_compression_header
3945 3952 ):
3946 3953 sidedata_compression_mode = COMP_MODE_DEFAULT
3947 3954 serialized_sidedata = comp_sidedata
3948 3955 else:
3949 3956 sidedata_compression_mode = COMP_MODE_INLINE
3950 3957 serialized_sidedata = comp_sidedata
3951 3958 if entry[8] != 0 or entry[9] != 0:
3952 3959 # rewriting entries that already have sidedata is not
3953 3960 # supported yet, because it introduces garbage data in the
3954 3961 # revlog.
3955 3962 msg = b"rewriting existing sidedata is not supported yet"
3956 3963 raise error.Abort(msg)
3957 3964
3958 3965 # Apply (potential) flags to add and to remove after running
3959 3966 # the sidedata helpers
3960 3967 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3961 3968 entry_update = (
3962 3969 current_offset,
3963 3970 len(serialized_sidedata),
3964 3971 new_offset_flags,
3965 3972 sidedata_compression_mode,
3966 3973 )
3967 3974
3968 3975 # the sidedata computation might have move the file cursors around
3969 3976 sdfh.seek(current_offset, os.SEEK_SET)
3970 3977 sdfh.write(serialized_sidedata)
3971 3978 new_entries.append(entry_update)
3972 3979 current_offset += len(serialized_sidedata)
3973 3980 self._docket.sidedata_end = sdfh.tell()
3974 3981
3975 3982 # rewrite the new index entries
3976 3983 ifh.seek(startrev * self.index.entry_size)
3977 3984 for i, e in enumerate(new_entries):
3978 3985 rev = startrev + i
3979 3986 self.index.replace_sidedata_info(rev, *e)
3980 3987 packed = self.index.entry_binary(rev)
3981 3988 if rev == 0 and self._docket is None:
3982 3989 header = self._format_flags | self._format_version
3983 3990 header = self.index.pack_header(header)
3984 3991 packed = header + packed
3985 3992 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now