##// END OF EJS Templates
revlog: move the `deltachain` method on the inner object...
marmoute -
r51988:30f458fc default
parent child Browse files
Show More
@@ -1,3969 +1,3971 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 393
394 394 @property
395 395 def index_file(self):
396 396 return self.__index_file
397 397
398 398 @index_file.setter
399 399 def index_file(self, new_index_file):
400 400 self.__index_file = new_index_file
401 401 if self.inline:
402 402 self._segmentfile.filename = new_index_file
403 403
404 404 def __len__(self):
405 405 return len(self.index)
406 406
407 407 # Derived from index values.
408 408
409 409 def start(self, rev):
410 410 """the offset of the data chunk for this revision"""
411 411 return int(self.index[rev][0] >> 16)
412 412
413 413 def length(self, rev):
414 414 """the length of the data chunk for this revision"""
415 415 return self.index[rev][1]
416 416
417 417 def end(self, rev):
418 418 """the end of the data chunk for this revision"""
419 419 return self.start(rev) + self.length(rev)
420 420
421 421 def deltaparent(self, rev):
422 422 """return deltaparent of the given revision"""
423 423 base = self.index[rev][3]
424 424 if base == rev:
425 425 return nullrev
426 426 elif self.delta_config.general_delta:
427 427 return base
428 428 else:
429 429 return rev - 1
430 430
431 431 def issnapshot(self, rev):
432 432 """tells whether rev is a snapshot"""
433 433 if not self.delta_config.sparse_revlog:
434 434 return self.deltaparent(rev) == nullrev
435 435 elif hasattr(self.index, 'issnapshot'):
436 436 # directly assign the method to cache the testing and access
437 437 self.issnapshot = self.index.issnapshot
438 438 return self.issnapshot(rev)
439 439 if rev == nullrev:
440 440 return True
441 441 entry = self.index[rev]
442 442 base = entry[3]
443 443 if base == rev:
444 444 return True
445 445 if base == nullrev:
446 446 return True
447 447 p1 = entry[5]
448 448 while self.length(p1) == 0:
449 449 b = self.deltaparent(p1)
450 450 if b == p1:
451 451 break
452 452 p1 = b
453 453 p2 = entry[6]
454 454 while self.length(p2) == 0:
455 455 b = self.deltaparent(p2)
456 456 if b == p2:
457 457 break
458 458 p2 = b
459 459 if base == p1 or base == p2:
460 460 return False
461 461 return self.issnapshot(base)
462 462
463 def _deltachain(self, rev, stoprev=None):
464 """Obtain the delta chain for a revision.
465
466 ``stoprev`` specifies a revision to stop at. If not specified, we
467 stop at the base of the chain.
468
469 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
470 revs in ascending order and ``stopped`` is a bool indicating whether
471 ``stoprev`` was hit.
472 """
473 generaldelta = self.delta_config.general_delta
474 # Try C implementation.
475 try:
476 return self.index.deltachain(rev, stoprev, generaldelta)
477 except AttributeError:
478 pass
479
480 chain = []
481
482 # Alias to prevent attribute lookup in tight loop.
483 index = self.index
484
485 iterrev = rev
486 e = index[iterrev]
487 while iterrev != e[3] and iterrev != stoprev:
488 chain.append(iterrev)
489 if generaldelta:
490 iterrev = e[3]
491 else:
492 iterrev -= 1
493 e = index[iterrev]
494
495 if iterrev == stoprev:
496 stopped = True
497 else:
498 chain.append(iterrev)
499 stopped = False
500
501 chain.reverse()
502 return chain, stopped
503
463 504 @util.propertycache
464 505 def _compressor(self):
465 506 engine = util.compengines[self.feature_config.compression_engine]
466 507 return engine.revlogcompressor(
467 508 self.feature_config.compression_engine_options
468 509 )
469 510
470 511 @util.propertycache
471 512 def _decompressor(self):
472 513 """the default decompressor"""
473 514 if self._default_compression_header is None:
474 515 return None
475 516 t = self._default_compression_header
476 517 c = self._get_decompressor(t)
477 518 return c.decompress
478 519
479 520 def _get_decompressor(self, t):
480 521 try:
481 522 compressor = self._decompressors[t]
482 523 except KeyError:
483 524 try:
484 525 engine = util.compengines.forrevlogheader(t)
485 526 compressor = engine.revlogcompressor(
486 527 self.feature_config.compression_engine_options
487 528 )
488 529 self._decompressors[t] = compressor
489 530 except KeyError:
490 531 raise error.RevlogError(
491 532 _(b'unknown compression type %s') % binascii.hexlify(t)
492 533 )
493 534 return compressor
494 535
495 536 def compress(self, data):
496 537 """Generate a possibly-compressed representation of data."""
497 538 if not data:
498 539 return b'', data
499 540
500 541 compressed = self._compressor.compress(data)
501 542
502 543 if compressed:
503 544 # The revlog compressor added the header in the returned data.
504 545 return b'', compressed
505 546
506 547 if data[0:1] == b'\0':
507 548 return b'', data
508 549 return b'u', data
509 550
510 551 def decompress(self, data):
511 552 """Decompress a revlog chunk.
512 553
513 554 The chunk is expected to begin with a header identifying the
514 555 format type so it can be routed to an appropriate decompressor.
515 556 """
516 557 if not data:
517 558 return data
518 559
519 560 # Revlogs are read much more frequently than they are written and many
520 561 # chunks only take microseconds to decompress, so performance is
521 562 # important here.
522 563 #
523 564 # We can make a few assumptions about revlogs:
524 565 #
525 566 # 1) the majority of chunks will be compressed (as opposed to inline
526 567 # raw data).
527 568 # 2) decompressing *any* data will likely by at least 10x slower than
528 569 # returning raw inline data.
529 570 # 3) we want to prioritize common and officially supported compression
530 571 # engines
531 572 #
532 573 # It follows that we want to optimize for "decompress compressed data
533 574 # when encoded with common and officially supported compression engines"
534 575 # case over "raw data" and "data encoded by less common or non-official
535 576 # compression engines." That is why we have the inline lookup first
536 577 # followed by the compengines lookup.
537 578 #
538 579 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
539 580 # compressed chunks. And this matters for changelog and manifest reads.
540 581 t = data[0:1]
541 582
542 583 if t == b'x':
543 584 try:
544 585 return _zlibdecompress(data)
545 586 except zlib.error as e:
546 587 raise error.RevlogError(
547 588 _(b'revlog decompress error: %s')
548 589 % stringutil.forcebytestr(e)
549 590 )
550 591 # '\0' is more common than 'u' so it goes first.
551 592 elif t == b'\0':
552 593 return data
553 594 elif t == b'u':
554 595 return util.buffer(data, 1)
555 596
556 597 compressor = self._get_decompressor(t)
557 598
558 599 return compressor.decompress(data)
559 600
560 601 @contextlib.contextmanager
561 602 def reading(self):
562 603 """Context manager that keeps data and sidedata files open for reading"""
563 604 if len(self.index) == 0:
564 605 yield # nothing to be read
565 606 else:
566 607 with self._segmentfile.reading():
567 608 with self._segmentfile_sidedata.reading():
568 609 yield
569 610
570 611 @property
571 612 def is_writing(self):
572 613 """True is a writing context is open"""
573 614 return self._writinghandles is not None
574 615
575 616 @contextlib.contextmanager
576 617 def writing(self, transaction, data_end=None, sidedata_end=None):
577 618 """Open the revlog files for writing
578 619
579 620 Add content to a revlog should be done within such context.
580 621 """
581 622 if self.is_writing:
582 623 yield
583 624 else:
584 625 ifh = dfh = sdfh = None
585 626 try:
586 627 r = len(self.index)
587 628 # opening the data file.
588 629 dsize = 0
589 630 if r:
590 631 dsize = self.end(r - 1)
591 632 dfh = None
592 633 if not self.inline:
593 634 try:
594 635 dfh = self.opener(self.data_file, mode=b"r+")
595 636 if data_end is None:
596 637 dfh.seek(0, os.SEEK_END)
597 638 else:
598 639 dfh.seek(data_end, os.SEEK_SET)
599 640 except FileNotFoundError:
600 641 dfh = self.opener(self.data_file, mode=b"w+")
601 642 transaction.add(self.data_file, dsize)
602 643 if self.sidedata_file is not None:
603 644 assert sidedata_end is not None
604 645 # revlog-v2 does not inline, help Pytype
605 646 assert dfh is not None
606 647 try:
607 648 sdfh = self.opener(self.sidedata_file, mode=b"r+")
608 649 dfh.seek(sidedata_end, os.SEEK_SET)
609 650 except FileNotFoundError:
610 651 sdfh = self.opener(self.sidedata_file, mode=b"w+")
611 652 transaction.add(self.sidedata_file, sidedata_end)
612 653
613 654 # opening the index file.
614 655 isize = r * self.index.entry_size
615 656 ifh = self.__index_write_fp()
616 657 if self.inline:
617 658 transaction.add(self.index_file, dsize + isize)
618 659 else:
619 660 transaction.add(self.index_file, isize)
620 661 # exposing all file handle for writing.
621 662 self._writinghandles = (ifh, dfh, sdfh)
622 663 self._segmentfile.writing_handle = ifh if self.inline else dfh
623 664 self._segmentfile_sidedata.writing_handle = sdfh
624 665 yield
625 666 finally:
626 667 self._writinghandles = None
627 668 self._segmentfile.writing_handle = None
628 669 self._segmentfile_sidedata.writing_handle = None
629 670 if dfh is not None:
630 671 dfh.close()
631 672 if sdfh is not None:
632 673 sdfh.close()
633 674 # closing the index file last to avoid exposing referent to
634 675 # potential unflushed data content.
635 676 if ifh is not None:
636 677 ifh.close()
637 678
638 679 def __index_write_fp(self, index_end=None):
639 680 """internal method to open the index file for writing
640 681
641 682 You should not use this directly and use `_writing` instead
642 683 """
643 684 try:
644 685 f = self.opener(
645 686 self.index_file,
646 687 mode=b"r+",
647 688 checkambig=self.data_config.check_ambig,
648 689 )
649 690 if index_end is None:
650 691 f.seek(0, os.SEEK_END)
651 692 else:
652 693 f.seek(index_end, os.SEEK_SET)
653 694 return f
654 695 except FileNotFoundError:
655 696 return self.opener(
656 697 self.index_file,
657 698 mode=b"w+",
658 699 checkambig=self.data_config.check_ambig,
659 700 )
660 701
661 702 def __index_new_fp(self):
662 703 """internal method to create a new index file for writing
663 704
664 705 You should not use this unless you are upgrading from inline revlog
665 706 """
666 707 return self.opener(
667 708 self.index_file,
668 709 mode=b"w",
669 710 checkambig=self.data_config.check_ambig,
670 711 atomictemp=True,
671 712 )
672 713
673 714 def split_inline(self, tr, header, new_index_file_path=None):
674 715 """split the data of an inline revlog into an index and a data file"""
675 716 existing_handles = False
676 717 if self._writinghandles is not None:
677 718 existing_handles = True
678 719 fp = self._writinghandles[0]
679 720 fp.flush()
680 721 fp.close()
681 722 # We can't use the cached file handle after close(). So prevent
682 723 # its usage.
683 724 self._writinghandles = None
684 725 self._segmentfile.writing_handle = None
685 726 # No need to deal with sidedata writing handle as it is only
686 727 # relevant with revlog-v2 which is never inline, not reaching
687 728 # this code
688 729
689 730 new_dfh = self.opener(self.data_file, mode=b"w+")
690 731 new_dfh.truncate(0) # drop any potentially existing data
691 732 try:
692 733 with self.reading():
693 734 for r in range(len(self.index)):
694 735 new_dfh.write(self.get_segment_for_revs(r, r)[1])
695 736 new_dfh.flush()
696 737
697 738 if new_index_file_path is not None:
698 739 self.index_file = new_index_file_path
699 740 with self.__index_new_fp() as fp:
700 741 self.inline = False
701 742 for i in range(len(self.index)):
702 743 e = self.index.entry_binary(i)
703 744 if i == 0:
704 745 packed_header = self.index.pack_header(header)
705 746 e = packed_header + e
706 747 fp.write(e)
707 748
708 749 # If we don't use side-write, the temp file replace the real
709 750 # index when we exit the context manager
710 751
711 752 self._segmentfile = randomaccessfile.randomaccessfile(
712 753 self.opener,
713 754 self.data_file,
714 755 self.data_config.chunk_cache_size,
715 756 )
716 757
717 758 if existing_handles:
718 759 # switched from inline to conventional reopen the index
719 760 ifh = self.__index_write_fp()
720 761 self._writinghandles = (ifh, new_dfh, None)
721 762 self._segmentfile.writing_handle = new_dfh
722 763 new_dfh = None
723 764 # No need to deal with sidedata writing handle as it is only
724 765 # relevant with revlog-v2 which is never inline, not reaching
725 766 # this code
726 767 finally:
727 768 if new_dfh is not None:
728 769 new_dfh.close()
729 770 return self.index_file
730 771
731 772 def get_segment_for_revs(self, startrev, endrev):
732 773 """Obtain a segment of raw data corresponding to a range of revisions.
733 774
734 775 Accepts the start and end revisions and an optional already-open
735 776 file handle to be used for reading. If the file handle is read, its
736 777 seek position will not be preserved.
737 778
738 779 Requests for data may be satisfied by a cache.
739 780
740 781 Returns a 2-tuple of (offset, data) for the requested range of
741 782 revisions. Offset is the integer offset from the beginning of the
742 783 revlog and data is a str or buffer of the raw byte data.
743 784
744 785 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
745 786 to determine where each revision's data begins and ends.
746 787
747 788 API: we should consider making this a private part of the InnerRevlog
748 789 at some point.
749 790 """
750 791 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
751 792 # (functions are expensive).
752 793 index = self.index
753 794 istart = index[startrev]
754 795 start = int(istart[0] >> 16)
755 796 if startrev == endrev:
756 797 end = start + istart[1]
757 798 else:
758 799 iend = index[endrev]
759 800 end = int(iend[0] >> 16) + iend[1]
760 801
761 802 if self.inline:
762 803 start += (startrev + 1) * self.index.entry_size
763 804 end += (endrev + 1) * self.index.entry_size
764 805 length = end - start
765 806
766 807 return start, self._segmentfile.read_chunk(start, length)
767 808
768 809 def _chunk(self, rev):
769 810 """Obtain a single decompressed chunk for a revision.
770 811
771 812 Accepts an integer revision and an optional already-open file handle
772 813 to be used for reading. If used, the seek position of the file will not
773 814 be preserved.
774 815
775 816 Returns a str holding uncompressed data for the requested revision.
776 817 """
777 818 compression_mode = self.index[rev][10]
778 819 data = self.get_segment_for_revs(rev, rev)[1]
779 820 if compression_mode == COMP_MODE_PLAIN:
780 821 return data
781 822 elif compression_mode == COMP_MODE_DEFAULT:
782 823 return self._decompressor(data)
783 824 elif compression_mode == COMP_MODE_INLINE:
784 825 return self.decompress(data)
785 826 else:
786 827 msg = b'unknown compression mode %d'
787 828 msg %= compression_mode
788 829 raise error.RevlogError(msg)
789 830
790 831 def _chunks(self, revs, targetsize=None):
791 832 """Obtain decompressed chunks for the specified revisions.
792 833
793 834 Accepts an iterable of numeric revisions that are assumed to be in
794 835 ascending order. Also accepts an optional already-open file handle
795 836 to be used for reading. If used, the seek position of the file will
796 837 not be preserved.
797 838
798 839 This function is similar to calling ``self._chunk()`` multiple times,
799 840 but is faster.
800 841
801 842 Returns a list with decompressed data for each requested revision.
802 843 """
803 844 if not revs:
804 845 return []
805 846 start = self.start
806 847 length = self.length
807 848 inline = self.inline
808 849 iosize = self.index.entry_size
809 850 buffer = util.buffer
810 851
811 852 l = []
812 853 ladd = l.append
813 854
814 855 if not self.data_config.with_sparse_read:
815 856 slicedchunks = (revs,)
816 857 else:
817 858 slicedchunks = deltautil.slicechunk(
818 859 self,
819 860 revs,
820 861 targetsize=targetsize,
821 862 )
822 863
823 864 for revschunk in slicedchunks:
824 865 firstrev = revschunk[0]
825 866 # Skip trailing revisions with empty diff
826 867 for lastrev in revschunk[::-1]:
827 868 if length(lastrev) != 0:
828 869 break
829 870
830 871 try:
831 872 offset, data = self.get_segment_for_revs(firstrev, lastrev)
832 873 except OverflowError:
833 874 # issue4215 - we can't cache a run of chunks greater than
834 875 # 2G on Windows
835 876 return [self._chunk(rev) for rev in revschunk]
836 877
837 878 decomp = self.decompress
838 879 # self._decompressor might be None, but will not be used in that case
839 880 def_decomp = self._decompressor
840 881 for rev in revschunk:
841 882 chunkstart = start(rev)
842 883 if inline:
843 884 chunkstart += (rev + 1) * iosize
844 885 chunklength = length(rev)
845 886 comp_mode = self.index[rev][10]
846 887 c = buffer(data, chunkstart - offset, chunklength)
847 888 if comp_mode == COMP_MODE_PLAIN:
848 889 ladd(c)
849 890 elif comp_mode == COMP_MODE_INLINE:
850 891 ladd(decomp(c))
851 892 elif comp_mode == COMP_MODE_DEFAULT:
852 893 ladd(def_decomp(c))
853 894 else:
854 895 msg = b'unknown compression mode %d'
855 896 msg %= comp_mode
856 897 raise error.RevlogError(msg)
857 898
858 899 return l
859 900
860 901
861 902 class revlog:
862 903 """
863 904 the underlying revision storage object
864 905
865 906 A revlog consists of two parts, an index and the revision data.
866 907
867 908 The index is a file with a fixed record size containing
868 909 information on each revision, including its nodeid (hash), the
869 910 nodeids of its parents, the position and offset of its data within
870 911 the data file, and the revision it's based on. Finally, each entry
871 912 contains a linkrev entry that can serve as a pointer to external
872 913 data.
873 914
874 915 The revision data itself is a linear collection of data chunks.
875 916 Each chunk represents a revision and is usually represented as a
876 917 delta against the previous chunk. To bound lookup time, runs of
877 918 deltas are limited to about 2 times the length of the original
878 919 version data. This makes retrieval of a version proportional to
879 920 its size, or O(1) relative to the number of revisions.
880 921
881 922 Both pieces of the revlog are written to in an append-only
882 923 fashion, which means we never need to rewrite a file to insert or
883 924 remove data, and can use some simple techniques to avoid the need
884 925 for locking while reading.
885 926
886 927 If checkambig, indexfile is opened with checkambig=True at
887 928 writing, to avoid file stat ambiguity.
888 929
889 930 If mmaplargeindex is True, and an mmapindexthreshold is set, the
890 931 index will be mmapped rather than read if it is larger than the
891 932 configured threshold.
892 933
893 934 If censorable is True, the revlog can have censored revisions.
894 935
895 936 If `upperboundcomp` is not None, this is the expected maximal gain from
896 937 compression for the data content.
897 938
898 939 `concurrencychecker` is an optional function that receives 3 arguments: a
899 940 file handle, a filename, and an expected position. It should check whether
900 941 the current position in the file handle is valid, and log/warn/fail (by
901 942 raising).
902 943
903 944 See mercurial/revlogutils/contants.py for details about the content of an
904 945 index entry.
905 946 """
906 947
907 948 _flagserrorclass = error.RevlogError
908 949
909 950 @staticmethod
910 951 def is_inline_index(header_bytes):
911 952 """Determine if a revlog is inline from the initial bytes of the index"""
912 953 header = INDEX_HEADER.unpack(header_bytes)[0]
913 954
914 955 _format_flags = header & ~0xFFFF
915 956 _format_version = header & 0xFFFF
916 957
917 958 features = FEATURES_BY_VERSION[_format_version]
918 959 return features[b'inline'](_format_flags)
919 960
920 961 def __init__(
921 962 self,
922 963 opener,
923 964 target,
924 965 radix,
925 966 postfix=None, # only exist for `tmpcensored` now
926 967 checkambig=False,
927 968 mmaplargeindex=False,
928 969 censorable=False,
929 970 upperboundcomp=None,
930 971 persistentnodemap=False,
931 972 concurrencychecker=None,
932 973 trypending=False,
933 974 try_split=False,
934 975 canonical_parent_order=True,
935 976 ):
936 977 """
937 978 create a revlog object
938 979
939 980 opener is a function that abstracts the file opening operation
940 981 and can be used to implement COW semantics or the like.
941 982
942 983 `target`: a (KIND, ID) tuple that identify the content stored in
943 984 this revlog. It help the rest of the code to understand what the revlog
944 985 is about without having to resort to heuristic and index filename
945 986 analysis. Note: that this must be reliably be set by normal code, but
946 987 that test, debug, or performance measurement code might not set this to
947 988 accurate value.
948 989 """
949 990
950 991 self.radix = radix
951 992
952 993 self._docket_file = None
953 994 self._indexfile = None
954 995 self._datafile = None
955 996 self._sidedatafile = None
956 997 self._nodemap_file = None
957 998 self.postfix = postfix
958 999 self._trypending = trypending
959 1000 self._try_split = try_split
960 1001 self.opener = opener
961 1002 if persistentnodemap:
962 1003 self._nodemap_file = nodemaputil.get_nodemap_file(self)
963 1004
964 1005 assert target[0] in ALL_KINDS
965 1006 assert len(target) == 2
966 1007 self.target = target
967 1008 if b'feature-config' in self.opener.options:
968 1009 self.feature_config = self.opener.options[b'feature-config'].copy()
969 1010 else:
970 1011 self.feature_config = FeatureConfig()
971 1012 self.feature_config.censorable = censorable
972 1013 self.feature_config.canonical_parent_order = canonical_parent_order
973 1014 if b'data-config' in self.opener.options:
974 1015 self.data_config = self.opener.options[b'data-config'].copy()
975 1016 else:
976 1017 self.data_config = DataConfig()
977 1018 self.data_config.check_ambig = checkambig
978 1019 self.data_config.mmap_large_index = mmaplargeindex
979 1020 if b'delta-config' in self.opener.options:
980 1021 self.delta_config = self.opener.options[b'delta-config'].copy()
981 1022 else:
982 1023 self.delta_config = DeltaConfig()
983 1024 self.delta_config.upper_bound_comp = upperboundcomp
984 1025
985 1026 # 3-tuple of (node, rev, text) for a raw revision.
986 1027 self._revisioncache = None
987 1028 # Maps rev to chain base rev.
988 1029 self._chainbasecache = util.lrucachedict(100)
989 1030
990 1031 self.index = None
991 1032 self._docket = None
992 1033 self._nodemap_docket = None
993 1034 # Mapping of partial identifiers to full nodes.
994 1035 self._pcache = {}
995 1036
996 1037 # other optionnals features
997 1038
998 1039 # Make copy of flag processors so each revlog instance can support
999 1040 # custom flags.
1000 1041 self._flagprocessors = dict(flagutil.flagprocessors)
1001 1042 # prevent nesting of addgroup
1002 1043 self._adding_group = None
1003 1044
1004 1045 chunk_cache = self._loadindex()
1005 1046 self._load_inner(chunk_cache)
1006
1007 1047 self._concurrencychecker = concurrencychecker
1008 1048
1009 1049 @property
1010 1050 def _generaldelta(self):
1011 1051 """temporary compatibility proxy"""
1012 1052 util.nouideprecwarn(
1013 1053 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1014 1054 )
1015 1055 return self.delta_config.general_delta
1016 1056
1017 1057 @property
1018 1058 def _checkambig(self):
1019 1059 """temporary compatibility proxy"""
1020 1060 util.nouideprecwarn(
1021 1061 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1022 1062 )
1023 1063 return self.data_config.check_ambig
1024 1064
1025 1065 @property
1026 1066 def _mmaplargeindex(self):
1027 1067 """temporary compatibility proxy"""
1028 1068 util.nouideprecwarn(
1029 1069 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1030 1070 )
1031 1071 return self.data_config.mmap_large_index
1032 1072
1033 1073 @property
1034 1074 def _censorable(self):
1035 1075 """temporary compatibility proxy"""
1036 1076 util.nouideprecwarn(
1037 1077 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1038 1078 )
1039 1079 return self.feature_config.censorable
1040 1080
1041 1081 @property
1042 1082 def _chunkcachesize(self):
1043 1083 """temporary compatibility proxy"""
1044 1084 util.nouideprecwarn(
1045 1085 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1046 1086 )
1047 1087 return self.data_config.chunk_cache_size
1048 1088
1049 1089 @property
1050 1090 def _maxchainlen(self):
1051 1091 """temporary compatibility proxy"""
1052 1092 util.nouideprecwarn(
1053 1093 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1054 1094 )
1055 1095 return self.delta_config.max_chain_len
1056 1096
1057 1097 @property
1058 1098 def _deltabothparents(self):
1059 1099 """temporary compatibility proxy"""
1060 1100 util.nouideprecwarn(
1061 1101 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1062 1102 )
1063 1103 return self.delta_config.delta_both_parents
1064 1104
1065 1105 @property
1066 1106 def _candidate_group_chunk_size(self):
1067 1107 """temporary compatibility proxy"""
1068 1108 util.nouideprecwarn(
1069 1109 b"use revlog.delta_config.candidate_group_chunk_size",
1070 1110 b"6.6",
1071 1111 stacklevel=2,
1072 1112 )
1073 1113 return self.delta_config.candidate_group_chunk_size
1074 1114
1075 1115 @property
1076 1116 def _debug_delta(self):
1077 1117 """temporary compatibility proxy"""
1078 1118 util.nouideprecwarn(
1079 1119 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1080 1120 )
1081 1121 return self.delta_config.debug_delta
1082 1122
1083 1123 @property
1084 1124 def _compengine(self):
1085 1125 """temporary compatibility proxy"""
1086 1126 util.nouideprecwarn(
1087 1127 b"use revlog.feature_config.compression_engine",
1088 1128 b"6.6",
1089 1129 stacklevel=2,
1090 1130 )
1091 1131 return self.feature_config.compression_engine
1092 1132
1093 1133 @property
1094 1134 def upperboundcomp(self):
1095 1135 """temporary compatibility proxy"""
1096 1136 util.nouideprecwarn(
1097 1137 b"use revlog.delta_config.upper_bound_comp",
1098 1138 b"6.6",
1099 1139 stacklevel=2,
1100 1140 )
1101 1141 return self.delta_config.upper_bound_comp
1102 1142
1103 1143 @property
1104 1144 def _compengineopts(self):
1105 1145 """temporary compatibility proxy"""
1106 1146 util.nouideprecwarn(
1107 1147 b"use revlog.feature_config.compression_engine_options",
1108 1148 b"6.6",
1109 1149 stacklevel=2,
1110 1150 )
1111 1151 return self.feature_config.compression_engine_options
1112 1152
1113 1153 @property
1114 1154 def _maxdeltachainspan(self):
1115 1155 """temporary compatibility proxy"""
1116 1156 util.nouideprecwarn(
1117 1157 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1118 1158 )
1119 1159 return self.delta_config.max_deltachain_span
1120 1160
1121 1161 @property
1122 1162 def _withsparseread(self):
1123 1163 """temporary compatibility proxy"""
1124 1164 util.nouideprecwarn(
1125 1165 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1126 1166 )
1127 1167 return self.data_config.with_sparse_read
1128 1168
1129 1169 @property
1130 1170 def _sparserevlog(self):
1131 1171 """temporary compatibility proxy"""
1132 1172 util.nouideprecwarn(
1133 1173 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1134 1174 )
1135 1175 return self.delta_config.sparse_revlog
1136 1176
1137 1177 @property
1138 1178 def hassidedata(self):
1139 1179 """temporary compatibility proxy"""
1140 1180 util.nouideprecwarn(
1141 1181 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1142 1182 )
1143 1183 return self.feature_config.has_side_data
1144 1184
1145 1185 @property
1146 1186 def _srdensitythreshold(self):
1147 1187 """temporary compatibility proxy"""
1148 1188 util.nouideprecwarn(
1149 1189 b"use revlog.data_config.sr_density_threshold",
1150 1190 b"6.6",
1151 1191 stacklevel=2,
1152 1192 )
1153 1193 return self.data_config.sr_density_threshold
1154 1194
1155 1195 @property
1156 1196 def _srmingapsize(self):
1157 1197 """temporary compatibility proxy"""
1158 1198 util.nouideprecwarn(
1159 1199 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1160 1200 )
1161 1201 return self.data_config.sr_min_gap_size
1162 1202
1163 1203 @property
1164 1204 def _compute_rank(self):
1165 1205 """temporary compatibility proxy"""
1166 1206 util.nouideprecwarn(
1167 1207 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1168 1208 )
1169 1209 return self.feature_config.compute_rank
1170 1210
1171 1211 @property
1172 1212 def canonical_parent_order(self):
1173 1213 """temporary compatibility proxy"""
1174 1214 util.nouideprecwarn(
1175 1215 b"use revlog.feature_config.canonical_parent_order",
1176 1216 b"6.6",
1177 1217 stacklevel=2,
1178 1218 )
1179 1219 return self.feature_config.canonical_parent_order
1180 1220
1181 1221 @property
1182 1222 def _lazydelta(self):
1183 1223 """temporary compatibility proxy"""
1184 1224 util.nouideprecwarn(
1185 1225 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1186 1226 )
1187 1227 return self.delta_config.lazy_delta
1188 1228
1189 1229 @property
1190 1230 def _lazydeltabase(self):
1191 1231 """temporary compatibility proxy"""
1192 1232 util.nouideprecwarn(
1193 1233 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1194 1234 )
1195 1235 return self.delta_config.lazy_delta_base
1196 1236
1197 1237 def _init_opts(self):
1198 1238 """process options (from above/config) to setup associated default revlog mode
1199 1239
1200 1240 These values might be affected when actually reading on disk information.
1201 1241
1202 1242 The relevant values are returned for use in _loadindex().
1203 1243
1204 1244 * newversionflags:
1205 1245 version header to use if we need to create a new revlog
1206 1246
1207 1247 * mmapindexthreshold:
1208 1248 minimal index size for start to use mmap
1209 1249
1210 1250 * force_nodemap:
1211 1251 force the usage of a "development" version of the nodemap code
1212 1252 """
1213 1253 opts = self.opener.options
1214 1254
1215 1255 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1216 1256 new_header = CHANGELOGV2
1217 1257 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1218 1258 self.feature_config.compute_rank = compute_rank
1219 1259 elif b'revlogv2' in opts:
1220 1260 new_header = REVLOGV2
1221 1261 elif b'revlogv1' in opts:
1222 1262 new_header = REVLOGV1 | FLAG_INLINE_DATA
1223 1263 if b'generaldelta' in opts:
1224 1264 new_header |= FLAG_GENERALDELTA
1225 1265 elif b'revlogv0' in self.opener.options:
1226 1266 new_header = REVLOGV0
1227 1267 else:
1228 1268 new_header = REVLOG_DEFAULT_VERSION
1229 1269
1230 1270 mmapindexthreshold = None
1231 1271 if self.data_config.mmap_large_index:
1232 1272 mmapindexthreshold = self.data_config.mmap_index_threshold
1233 1273 if self.feature_config.enable_ellipsis:
1234 1274 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1235 1275
1236 1276 # revlog v0 doesn't have flag processors
1237 1277 for flag, processor in opts.get(b'flagprocessors', {}).items():
1238 1278 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1239 1279
1240 1280 chunk_cache_size = self.data_config.chunk_cache_size
1241 1281 if chunk_cache_size <= 0:
1242 1282 raise error.RevlogError(
1243 1283 _(b'revlog chunk cache size %r is not greater than 0')
1244 1284 % chunk_cache_size
1245 1285 )
1246 1286 elif chunk_cache_size & (chunk_cache_size - 1):
1247 1287 raise error.RevlogError(
1248 1288 _(b'revlog chunk cache size %r is not a power of 2')
1249 1289 % chunk_cache_size
1250 1290 )
1251 1291 force_nodemap = opts.get(b'devel-force-nodemap', False)
1252 1292 return new_header, mmapindexthreshold, force_nodemap
1253 1293
1254 1294 def _get_data(self, filepath, mmap_threshold, size=None):
1255 1295 """return a file content with or without mmap
1256 1296
1257 1297 If the file is missing return the empty string"""
1258 1298 try:
1259 1299 with self.opener(filepath) as fp:
1260 1300 if mmap_threshold is not None:
1261 1301 file_size = self.opener.fstat(fp).st_size
1262 1302 if file_size >= mmap_threshold:
1263 1303 if size is not None:
1264 1304 # avoid potentiel mmap crash
1265 1305 size = min(file_size, size)
1266 1306 # TODO: should .close() to release resources without
1267 1307 # relying on Python GC
1268 1308 if size is None:
1269 1309 return util.buffer(util.mmapread(fp))
1270 1310 else:
1271 1311 return util.buffer(util.mmapread(fp, size))
1272 1312 if size is None:
1273 1313 return fp.read()
1274 1314 else:
1275 1315 return fp.read(size)
1276 1316 except FileNotFoundError:
1277 1317 return b''
1278 1318
1279 1319 def get_streams(self, max_linkrev, force_inline=False):
1280 1320 """return a list of streams that represent this revlog
1281 1321
1282 1322 This is used by stream-clone to do bytes to bytes copies of a repository.
1283 1323
1284 1324 This streams data for all revisions that refer to a changelog revision up
1285 1325 to `max_linkrev`.
1286 1326
1287 1327 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1288 1328
1289 1329 It returns is a list of three-tuple:
1290 1330
1291 1331 [
1292 1332 (filename, bytes_stream, stream_size),
1293 1333 …
1294 1334 ]
1295 1335 """
1296 1336 n = len(self)
1297 1337 index = self.index
1298 1338 while n > 0:
1299 1339 linkrev = index[n - 1][4]
1300 1340 if linkrev < max_linkrev:
1301 1341 break
1302 1342 # note: this loop will rarely go through multiple iterations, since
1303 1343 # it only traverses commits created during the current streaming
1304 1344 # pull operation.
1305 1345 #
1306 1346 # If this become a problem, using a binary search should cap the
1307 1347 # runtime of this.
1308 1348 n = n - 1
1309 1349 if n == 0:
1310 1350 # no data to send
1311 1351 return []
1312 1352 index_size = n * index.entry_size
1313 1353 data_size = self.end(n - 1)
1314 1354
1315 1355 # XXX we might have been split (or stripped) since the object
1316 1356 # initialization, We need to close this race too, but having a way to
1317 1357 # pre-open the file we feed to the revlog and never closing them before
1318 1358 # we are done streaming.
1319 1359
1320 1360 if self._inline:
1321 1361
1322 1362 def get_stream():
1323 1363 with self.opener(self._indexfile, mode=b"r") as fp:
1324 1364 yield None
1325 1365 size = index_size + data_size
1326 1366 if size <= 65536:
1327 1367 yield fp.read(size)
1328 1368 else:
1329 1369 yield from util.filechunkiter(fp, limit=size)
1330 1370
1331 1371 inline_stream = get_stream()
1332 1372 next(inline_stream)
1333 1373 return [
1334 1374 (self._indexfile, inline_stream, index_size + data_size),
1335 1375 ]
1336 1376 elif force_inline:
1337 1377
1338 1378 def get_stream():
1339 1379 with self.reading():
1340 1380 yield None
1341 1381
1342 1382 for rev in range(n):
1343 1383 idx = self.index.entry_binary(rev)
1344 1384 if rev == 0 and self._docket is None:
1345 1385 # re-inject the inline flag
1346 1386 header = self._format_flags
1347 1387 header |= self._format_version
1348 1388 header |= FLAG_INLINE_DATA
1349 1389 header = self.index.pack_header(header)
1350 1390 idx = header + idx
1351 1391 yield idx
1352 1392 yield self._inner.get_segment_for_revs(rev, rev)[1]
1353 1393
1354 1394 inline_stream = get_stream()
1355 1395 next(inline_stream)
1356 1396 return [
1357 1397 (self._indexfile, inline_stream, index_size + data_size),
1358 1398 ]
1359 1399 else:
1360 1400
1361 1401 def get_index_stream():
1362 1402 with self.opener(self._indexfile, mode=b"r") as fp:
1363 1403 yield None
1364 1404 if index_size <= 65536:
1365 1405 yield fp.read(index_size)
1366 1406 else:
1367 1407 yield from util.filechunkiter(fp, limit=index_size)
1368 1408
1369 1409 def get_data_stream():
1370 1410 with self._datafp() as fp:
1371 1411 yield None
1372 1412 if data_size <= 65536:
1373 1413 yield fp.read(data_size)
1374 1414 else:
1375 1415 yield from util.filechunkiter(fp, limit=data_size)
1376 1416
1377 1417 index_stream = get_index_stream()
1378 1418 next(index_stream)
1379 1419 data_stream = get_data_stream()
1380 1420 next(data_stream)
1381 1421 return [
1382 1422 (self._datafile, data_stream, data_size),
1383 1423 (self._indexfile, index_stream, index_size),
1384 1424 ]
1385 1425
1386 1426 def _loadindex(self, docket=None):
1387 1427
1388 1428 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1389 1429
1390 1430 if self.postfix is not None:
1391 1431 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1392 1432 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1393 1433 entry_point = b'%s.i.a' % self.radix
1394 1434 elif self._try_split and self.opener.exists(self._split_index_file):
1395 1435 entry_point = self._split_index_file
1396 1436 else:
1397 1437 entry_point = b'%s.i' % self.radix
1398 1438
1399 1439 if docket is not None:
1400 1440 self._docket = docket
1401 1441 self._docket_file = entry_point
1402 1442 else:
1403 1443 self._initempty = True
1404 1444 entry_data = self._get_data(entry_point, mmapindexthreshold)
1405 1445 if len(entry_data) > 0:
1406 1446 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1407 1447 self._initempty = False
1408 1448 else:
1409 1449 header = new_header
1410 1450
1411 1451 self._format_flags = header & ~0xFFFF
1412 1452 self._format_version = header & 0xFFFF
1413 1453
1414 1454 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1415 1455 if supported_flags is None:
1416 1456 msg = _(b'unknown version (%d) in revlog %s')
1417 1457 msg %= (self._format_version, self.display_id)
1418 1458 raise error.RevlogError(msg)
1419 1459 elif self._format_flags & ~supported_flags:
1420 1460 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1421 1461 display_flag = self._format_flags >> 16
1422 1462 msg %= (display_flag, self._format_version, self.display_id)
1423 1463 raise error.RevlogError(msg)
1424 1464
1425 1465 features = FEATURES_BY_VERSION[self._format_version]
1426 1466 self._inline = features[b'inline'](self._format_flags)
1427 1467 self.delta_config.general_delta = features[b'generaldelta'](
1428 1468 self._format_flags
1429 1469 )
1430 1470 self.feature_config.has_side_data = features[b'sidedata']
1431 1471
1432 1472 if not features[b'docket']:
1433 1473 self._indexfile = entry_point
1434 1474 index_data = entry_data
1435 1475 else:
1436 1476 self._docket_file = entry_point
1437 1477 if self._initempty:
1438 1478 self._docket = docketutil.default_docket(self, header)
1439 1479 else:
1440 1480 self._docket = docketutil.parse_docket(
1441 1481 self, entry_data, use_pending=self._trypending
1442 1482 )
1443 1483
1444 1484 if self._docket is not None:
1445 1485 self._indexfile = self._docket.index_filepath()
1446 1486 index_data = b''
1447 1487 index_size = self._docket.index_end
1448 1488 if index_size > 0:
1449 1489 index_data = self._get_data(
1450 1490 self._indexfile, mmapindexthreshold, size=index_size
1451 1491 )
1452 1492 if len(index_data) < index_size:
1453 1493 msg = _(b'too few index data for %s: got %d, expected %d')
1454 1494 msg %= (self.display_id, len(index_data), index_size)
1455 1495 raise error.RevlogError(msg)
1456 1496
1457 1497 self._inline = False
1458 1498 # generaldelta implied by version 2 revlogs.
1459 1499 self.delta_config.general_delta = True
1460 1500 # the logic for persistent nodemap will be dealt with within the
1461 1501 # main docket, so disable it for now.
1462 1502 self._nodemap_file = None
1463 1503
1464 1504 if self._docket is not None:
1465 1505 self._datafile = self._docket.data_filepath()
1466 1506 self._sidedatafile = self._docket.sidedata_filepath()
1467 1507 elif self.postfix is None:
1468 1508 self._datafile = b'%s.d' % self.radix
1469 1509 else:
1470 1510 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1471 1511
1472 1512 self.nodeconstants = sha1nodeconstants
1473 1513 self.nullid = self.nodeconstants.nullid
1474 1514
1475 1515 # sparse-revlog can't be on without general-delta (issue6056)
1476 1516 if not self.delta_config.general_delta:
1477 1517 self.delta_config.sparse_revlog = False
1478 1518
1479 1519 self._storedeltachains = True
1480 1520
1481 1521 devel_nodemap = (
1482 1522 self._nodemap_file
1483 1523 and force_nodemap
1484 1524 and parse_index_v1_nodemap is not None
1485 1525 )
1486 1526
1487 1527 use_rust_index = False
1488 1528 if rustrevlog is not None:
1489 1529 if self._nodemap_file is not None:
1490 1530 use_rust_index = True
1491 1531 else:
1492 1532 use_rust_index = self.opener.options.get(b'rust.index')
1493 1533
1494 1534 self._parse_index = parse_index_v1
1495 1535 if self._format_version == REVLOGV0:
1496 1536 self._parse_index = revlogv0.parse_index_v0
1497 1537 elif self._format_version == REVLOGV2:
1498 1538 self._parse_index = parse_index_v2
1499 1539 elif self._format_version == CHANGELOGV2:
1500 1540 self._parse_index = parse_index_cl_v2
1501 1541 elif devel_nodemap:
1502 1542 self._parse_index = parse_index_v1_nodemap
1503 1543 elif use_rust_index:
1504 1544 self._parse_index = parse_index_v1_mixed
1505 1545 try:
1506 1546 d = self._parse_index(index_data, self._inline)
1507 1547 index, chunkcache = d
1508 1548 use_nodemap = (
1509 1549 not self._inline
1510 1550 and self._nodemap_file is not None
1511 1551 and hasattr(index, 'update_nodemap_data')
1512 1552 )
1513 1553 if use_nodemap:
1514 1554 nodemap_data = nodemaputil.persisted_data(self)
1515 1555 if nodemap_data is not None:
1516 1556 docket = nodemap_data[0]
1517 1557 if (
1518 1558 len(d[0]) > docket.tip_rev
1519 1559 and d[0][docket.tip_rev][7] == docket.tip_node
1520 1560 ):
1521 1561 # no changelog tampering
1522 1562 self._nodemap_docket = docket
1523 1563 index.update_nodemap_data(*nodemap_data)
1524 1564 except (ValueError, IndexError):
1525 1565 raise error.RevlogError(
1526 1566 _(b"index %s is corrupted") % self.display_id
1527 1567 )
1528 1568 self.index = index
1529 1569 # revnum -> (chain-length, sum-delta-length)
1530 1570 self._chaininfocache = util.lrucachedict(500)
1531 1571
1532 1572 return chunkcache
1533 1573
1534 1574 def _load_inner(self, chunk_cache):
1535 1575 if self._docket is None:
1536 1576 default_compression_header = None
1537 1577 else:
1538 1578 default_compression_header = self._docket.default_compression_header
1539 1579
1540 1580 self._inner = _InnerRevlog(
1541 1581 opener=self.opener,
1542 1582 index=self.index,
1543 1583 index_file=self._indexfile,
1544 1584 data_file=self._datafile,
1545 1585 sidedata_file=self._sidedatafile,
1546 1586 inline=self._inline,
1547 1587 data_config=self.data_config,
1548 1588 delta_config=self.delta_config,
1549 1589 feature_config=self.feature_config,
1550 1590 chunk_cache=chunk_cache,
1551 1591 default_compression_header=default_compression_header,
1552 1592 )
1553 1593
1554 1594 def get_revlog(self):
1555 1595 """simple function to mirror API of other not-really-revlog API"""
1556 1596 return self
1557 1597
1558 1598 @util.propertycache
1559 1599 def revlog_kind(self):
1560 1600 return self.target[0]
1561 1601
1562 1602 @util.propertycache
1563 1603 def display_id(self):
1564 1604 """The public facing "ID" of the revlog that we use in message"""
1565 1605 if self.revlog_kind == KIND_FILELOG:
1566 1606 # Reference the file without the "data/" prefix, so it is familiar
1567 1607 # to the user.
1568 1608 return self.target[1]
1569 1609 else:
1570 1610 return self.radix
1571 1611
1572 1612 def _datafp(self, mode=b'r'):
1573 1613 """file object for the revlog's data file"""
1574 1614 return self.opener(self._datafile, mode=mode)
1575 1615
1576 1616 def tiprev(self):
1577 1617 return len(self.index) - 1
1578 1618
1579 1619 def tip(self):
1580 1620 return self.node(self.tiprev())
1581 1621
1582 1622 def __contains__(self, rev):
1583 1623 return 0 <= rev < len(self)
1584 1624
1585 1625 def __len__(self):
1586 1626 return len(self.index)
1587 1627
1588 1628 def __iter__(self):
1589 1629 return iter(range(len(self)))
1590 1630
1591 1631 def revs(self, start=0, stop=None):
1592 1632 """iterate over all rev in this revlog (from start to stop)"""
1593 1633 return storageutil.iterrevs(len(self), start=start, stop=stop)
1594 1634
1595 1635 def hasnode(self, node):
1596 1636 try:
1597 1637 self.rev(node)
1598 1638 return True
1599 1639 except KeyError:
1600 1640 return False
1601 1641
1602 1642 def _candelta(self, baserev, rev):
1603 1643 """whether two revisions (baserev, rev) can be delta-ed or not"""
1604 1644 # Disable delta if either rev requires a content-changing flag
1605 1645 # processor (ex. LFS). This is because such flag processor can alter
1606 1646 # the rawtext content that the delta will be based on, and two clients
1607 1647 # could have a same revlog node with different flags (i.e. different
1608 1648 # rawtext contents) and the delta could be incompatible.
1609 1649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1610 1650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1611 1651 ):
1612 1652 return False
1613 1653 return True
1614 1654
1615 1655 def update_caches(self, transaction):
1616 1656 """update on disk cache
1617 1657
1618 1658 If a transaction is passed, the update may be delayed to transaction
1619 1659 commit."""
1620 1660 if self._nodemap_file is not None:
1621 1661 if transaction is None:
1622 1662 nodemaputil.update_persistent_nodemap(self)
1623 1663 else:
1624 1664 nodemaputil.setup_persistent_nodemap(transaction, self)
1625 1665
1626 1666 def clearcaches(self):
1627 1667 """Clear in-memory caches"""
1628 1668 self._revisioncache = None
1629 1669 self._chainbasecache.clear()
1630 1670 self._inner._segmentfile.clear_cache()
1631 1671 self._inner._segmentfile_sidedata.clear_cache()
1632 1672 self._pcache = {}
1633 1673 self._nodemap_docket = None
1634 1674 self.index.clearcaches()
1635 1675 # The python code is the one responsible for validating the docket, we
1636 1676 # end up having to refresh it here.
1637 1677 use_nodemap = (
1638 1678 not self._inline
1639 1679 and self._nodemap_file is not None
1640 1680 and hasattr(self.index, 'update_nodemap_data')
1641 1681 )
1642 1682 if use_nodemap:
1643 1683 nodemap_data = nodemaputil.persisted_data(self)
1644 1684 if nodemap_data is not None:
1645 1685 self._nodemap_docket = nodemap_data[0]
1646 1686 self.index.update_nodemap_data(*nodemap_data)
1647 1687
1648 1688 def rev(self, node):
1649 1689 """return the revision number associated with a <nodeid>"""
1650 1690 try:
1651 1691 return self.index.rev(node)
1652 1692 except TypeError:
1653 1693 raise
1654 1694 except error.RevlogError:
1655 1695 # parsers.c radix tree lookup failed
1656 1696 if (
1657 1697 node == self.nodeconstants.wdirid
1658 1698 or node in self.nodeconstants.wdirfilenodeids
1659 1699 ):
1660 1700 raise error.WdirUnsupported
1661 1701 raise error.LookupError(node, self.display_id, _(b'no node'))
1662 1702
1663 1703 # Accessors for index entries.
1664 1704
1665 1705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1666 1706 # are flags.
1667 1707 def start(self, rev):
1668 1708 return int(self.index[rev][0] >> 16)
1669 1709
1670 1710 def sidedata_cut_off(self, rev):
1671 1711 sd_cut_off = self.index[rev][8]
1672 1712 if sd_cut_off != 0:
1673 1713 return sd_cut_off
1674 1714 # This is some annoying dance, because entries without sidedata
1675 1715 # currently use 0 as their ofsset. (instead of previous-offset +
1676 1716 # previous-size)
1677 1717 #
1678 1718 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1679 1719 # In the meantime, we need this.
1680 1720 while 0 <= rev:
1681 1721 e = self.index[rev]
1682 1722 if e[9] != 0:
1683 1723 return e[8] + e[9]
1684 1724 rev -= 1
1685 1725 return 0
1686 1726
1687 1727 def flags(self, rev):
1688 1728 return self.index[rev][0] & 0xFFFF
1689 1729
1690 1730 def length(self, rev):
1691 1731 return self.index[rev][1]
1692 1732
1693 1733 def sidedata_length(self, rev):
1694 1734 if not self.feature_config.has_side_data:
1695 1735 return 0
1696 1736 return self.index[rev][9]
1697 1737
1698 1738 def rawsize(self, rev):
1699 1739 """return the length of the uncompressed text for a given revision"""
1700 1740 l = self.index[rev][2]
1701 1741 if l >= 0:
1702 1742 return l
1703 1743
1704 1744 t = self.rawdata(rev)
1705 1745 return len(t)
1706 1746
1707 1747 def size(self, rev):
1708 1748 """length of non-raw text (processed by a "read" flag processor)"""
1709 1749 # fast path: if no "read" flag processor could change the content,
1710 1750 # size is rawsize. note: ELLIPSIS is known to not change the content.
1711 1751 flags = self.flags(rev)
1712 1752 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1713 1753 return self.rawsize(rev)
1714 1754
1715 1755 return len(self.revision(rev))
1716 1756
1717 1757 def fast_rank(self, rev):
1718 1758 """Return the rank of a revision if already known, or None otherwise.
1719 1759
1720 1760 The rank of a revision is the size of the sub-graph it defines as a
1721 1761 head. Equivalently, the rank of a revision `r` is the size of the set
1722 1762 `ancestors(r)`, `r` included.
1723 1763
1724 1764 This method returns the rank retrieved from the revlog in constant
1725 1765 time. It makes no attempt at computing unknown values for versions of
1726 1766 the revlog which do not persist the rank.
1727 1767 """
1728 1768 rank = self.index[rev][ENTRY_RANK]
1729 1769 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1730 1770 return None
1731 1771 if rev == nullrev:
1732 1772 return 0 # convention
1733 1773 return rank
1734 1774
1735 1775 def chainbase(self, rev):
1736 1776 base = self._chainbasecache.get(rev)
1737 1777 if base is not None:
1738 1778 return base
1739 1779
1740 1780 index = self.index
1741 1781 iterrev = rev
1742 1782 base = index[iterrev][3]
1743 1783 while base != iterrev:
1744 1784 iterrev = base
1745 1785 base = index[iterrev][3]
1746 1786
1747 1787 self._chainbasecache[rev] = base
1748 1788 return base
1749 1789
1750 1790 def linkrev(self, rev):
1751 1791 return self.index[rev][4]
1752 1792
1753 1793 def parentrevs(self, rev):
1754 1794 try:
1755 1795 entry = self.index[rev]
1756 1796 except IndexError:
1757 1797 if rev == wdirrev:
1758 1798 raise error.WdirUnsupported
1759 1799 raise
1760 1800
1761 1801 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1762 1802 return entry[6], entry[5]
1763 1803 else:
1764 1804 return entry[5], entry[6]
1765 1805
1766 1806 # fast parentrevs(rev) where rev isn't filtered
1767 1807 _uncheckedparentrevs = parentrevs
1768 1808
1769 1809 def node(self, rev):
1770 1810 try:
1771 1811 return self.index[rev][7]
1772 1812 except IndexError:
1773 1813 if rev == wdirrev:
1774 1814 raise error.WdirUnsupported
1775 1815 raise
1776 1816
1777 1817 # Derived from index values.
1778 1818
1779 1819 def end(self, rev):
1780 1820 return self.start(rev) + self.length(rev)
1781 1821
1782 1822 def parents(self, node):
1783 1823 i = self.index
1784 1824 d = i[self.rev(node)]
1785 1825 # inline node() to avoid function call overhead
1786 1826 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1787 1827 return i[d[6]][7], i[d[5]][7]
1788 1828 else:
1789 1829 return i[d[5]][7], i[d[6]][7]
1790 1830
1791 1831 def chainlen(self, rev):
1792 1832 return self._chaininfo(rev)[0]
1793 1833
1794 1834 def _chaininfo(self, rev):
1795 1835 chaininfocache = self._chaininfocache
1796 1836 if rev in chaininfocache:
1797 1837 return chaininfocache[rev]
1798 1838 index = self.index
1799 1839 generaldelta = self.delta_config.general_delta
1800 1840 iterrev = rev
1801 1841 e = index[iterrev]
1802 1842 clen = 0
1803 1843 compresseddeltalen = 0
1804 1844 while iterrev != e[3]:
1805 1845 clen += 1
1806 1846 compresseddeltalen += e[1]
1807 1847 if generaldelta:
1808 1848 iterrev = e[3]
1809 1849 else:
1810 1850 iterrev -= 1
1811 1851 if iterrev in chaininfocache:
1812 1852 t = chaininfocache[iterrev]
1813 1853 clen += t[0]
1814 1854 compresseddeltalen += t[1]
1815 1855 break
1816 1856 e = index[iterrev]
1817 1857 else:
1818 1858 # Add text length of base since decompressing that also takes
1819 1859 # work. For cache hits the length is already included.
1820 1860 compresseddeltalen += e[1]
1821 1861 r = (clen, compresseddeltalen)
1822 1862 chaininfocache[rev] = r
1823 1863 return r
1824 1864
1825 1865 def _deltachain(self, rev, stoprev=None):
1826 """Obtain the delta chain for a revision.
1827
1828 ``stoprev`` specifies a revision to stop at. If not specified, we
1829 stop at the base of the chain.
1830
1831 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1832 revs in ascending order and ``stopped`` is a bool indicating whether
1833 ``stoprev`` was hit.
1834 """
1835 generaldelta = self.delta_config.general_delta
1836 # Try C implementation.
1837 try:
1838 return self.index.deltachain(rev, stoprev, generaldelta)
1839 except AttributeError:
1840 pass
1841
1842 chain = []
1843
1844 # Alias to prevent attribute lookup in tight loop.
1845 index = self.index
1846
1847 iterrev = rev
1848 e = index[iterrev]
1849 while iterrev != e[3] and iterrev != stoprev:
1850 chain.append(iterrev)
1851 if generaldelta:
1852 iterrev = e[3]
1853 else:
1854 iterrev -= 1
1855 e = index[iterrev]
1856
1857 if iterrev == stoprev:
1858 stopped = True
1859 else:
1860 chain.append(iterrev)
1861 stopped = False
1862
1863 chain.reverse()
1864 return chain, stopped
1866 return self._inner._deltachain(rev, stoprev=stoprev)
1865 1867
1866 1868 def ancestors(self, revs, stoprev=0, inclusive=False):
1867 1869 """Generate the ancestors of 'revs' in reverse revision order.
1868 1870 Does not generate revs lower than stoprev.
1869 1871
1870 1872 See the documentation for ancestor.lazyancestors for more details."""
1871 1873
1872 1874 # first, make sure start revisions aren't filtered
1873 1875 revs = list(revs)
1874 1876 checkrev = self.node
1875 1877 for r in revs:
1876 1878 checkrev(r)
1877 1879 # and we're sure ancestors aren't filtered as well
1878 1880
1879 1881 if rustancestor is not None and self.index.rust_ext_compat:
1880 1882 lazyancestors = rustancestor.LazyAncestors
1881 1883 arg = self.index
1882 1884 else:
1883 1885 lazyancestors = ancestor.lazyancestors
1884 1886 arg = self._uncheckedparentrevs
1885 1887 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1886 1888
1887 1889 def descendants(self, revs):
1888 1890 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1889 1891
1890 1892 def findcommonmissing(self, common=None, heads=None):
1891 1893 """Return a tuple of the ancestors of common and the ancestors of heads
1892 1894 that are not ancestors of common. In revset terminology, we return the
1893 1895 tuple:
1894 1896
1895 1897 ::common, (::heads) - (::common)
1896 1898
1897 1899 The list is sorted by revision number, meaning it is
1898 1900 topologically sorted.
1899 1901
1900 1902 'heads' and 'common' are both lists of node IDs. If heads is
1901 1903 not supplied, uses all of the revlog's heads. If common is not
1902 1904 supplied, uses nullid."""
1903 1905 if common is None:
1904 1906 common = [self.nullid]
1905 1907 if heads is None:
1906 1908 heads = self.heads()
1907 1909
1908 1910 common = [self.rev(n) for n in common]
1909 1911 heads = [self.rev(n) for n in heads]
1910 1912
1911 1913 # we want the ancestors, but inclusive
1912 1914 class lazyset:
1913 1915 def __init__(self, lazyvalues):
1914 1916 self.addedvalues = set()
1915 1917 self.lazyvalues = lazyvalues
1916 1918
1917 1919 def __contains__(self, value):
1918 1920 return value in self.addedvalues or value in self.lazyvalues
1919 1921
1920 1922 def __iter__(self):
1921 1923 added = self.addedvalues
1922 1924 for r in added:
1923 1925 yield r
1924 1926 for r in self.lazyvalues:
1925 1927 if not r in added:
1926 1928 yield r
1927 1929
1928 1930 def add(self, value):
1929 1931 self.addedvalues.add(value)
1930 1932
1931 1933 def update(self, values):
1932 1934 self.addedvalues.update(values)
1933 1935
1934 1936 has = lazyset(self.ancestors(common))
1935 1937 has.add(nullrev)
1936 1938 has.update(common)
1937 1939
1938 1940 # take all ancestors from heads that aren't in has
1939 1941 missing = set()
1940 1942 visit = collections.deque(r for r in heads if r not in has)
1941 1943 while visit:
1942 1944 r = visit.popleft()
1943 1945 if r in missing:
1944 1946 continue
1945 1947 else:
1946 1948 missing.add(r)
1947 1949 for p in self.parentrevs(r):
1948 1950 if p not in has:
1949 1951 visit.append(p)
1950 1952 missing = list(missing)
1951 1953 missing.sort()
1952 1954 return has, [self.node(miss) for miss in missing]
1953 1955
1954 1956 def incrementalmissingrevs(self, common=None):
1955 1957 """Return an object that can be used to incrementally compute the
1956 1958 revision numbers of the ancestors of arbitrary sets that are not
1957 1959 ancestors of common. This is an ancestor.incrementalmissingancestors
1958 1960 object.
1959 1961
1960 1962 'common' is a list of revision numbers. If common is not supplied, uses
1961 1963 nullrev.
1962 1964 """
1963 1965 if common is None:
1964 1966 common = [nullrev]
1965 1967
1966 1968 if rustancestor is not None and self.index.rust_ext_compat:
1967 1969 return rustancestor.MissingAncestors(self.index, common)
1968 1970 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1969 1971
1970 1972 def findmissingrevs(self, common=None, heads=None):
1971 1973 """Return the revision numbers of the ancestors of heads that
1972 1974 are not ancestors of common.
1973 1975
1974 1976 More specifically, return a list of revision numbers corresponding to
1975 1977 nodes N such that every N satisfies the following constraints:
1976 1978
1977 1979 1. N is an ancestor of some node in 'heads'
1978 1980 2. N is not an ancestor of any node in 'common'
1979 1981
1980 1982 The list is sorted by revision number, meaning it is
1981 1983 topologically sorted.
1982 1984
1983 1985 'heads' and 'common' are both lists of revision numbers. If heads is
1984 1986 not supplied, uses all of the revlog's heads. If common is not
1985 1987 supplied, uses nullid."""
1986 1988 if common is None:
1987 1989 common = [nullrev]
1988 1990 if heads is None:
1989 1991 heads = self.headrevs()
1990 1992
1991 1993 inc = self.incrementalmissingrevs(common=common)
1992 1994 return inc.missingancestors(heads)
1993 1995
1994 1996 def findmissing(self, common=None, heads=None):
1995 1997 """Return the ancestors of heads that are not ancestors of common.
1996 1998
1997 1999 More specifically, return a list of nodes N such that every N
1998 2000 satisfies the following constraints:
1999 2001
2000 2002 1. N is an ancestor of some node in 'heads'
2001 2003 2. N is not an ancestor of any node in 'common'
2002 2004
2003 2005 The list is sorted by revision number, meaning it is
2004 2006 topologically sorted.
2005 2007
2006 2008 'heads' and 'common' are both lists of node IDs. If heads is
2007 2009 not supplied, uses all of the revlog's heads. If common is not
2008 2010 supplied, uses nullid."""
2009 2011 if common is None:
2010 2012 common = [self.nullid]
2011 2013 if heads is None:
2012 2014 heads = self.heads()
2013 2015
2014 2016 common = [self.rev(n) for n in common]
2015 2017 heads = [self.rev(n) for n in heads]
2016 2018
2017 2019 inc = self.incrementalmissingrevs(common=common)
2018 2020 return [self.node(r) for r in inc.missingancestors(heads)]
2019 2021
2020 2022 def nodesbetween(self, roots=None, heads=None):
2021 2023 """Return a topological path from 'roots' to 'heads'.
2022 2024
2023 2025 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2024 2026 topologically sorted list of all nodes N that satisfy both of
2025 2027 these constraints:
2026 2028
2027 2029 1. N is a descendant of some node in 'roots'
2028 2030 2. N is an ancestor of some node in 'heads'
2029 2031
2030 2032 Every node is considered to be both a descendant and an ancestor
2031 2033 of itself, so every reachable node in 'roots' and 'heads' will be
2032 2034 included in 'nodes'.
2033 2035
2034 2036 'outroots' is the list of reachable nodes in 'roots', i.e., the
2035 2037 subset of 'roots' that is returned in 'nodes'. Likewise,
2036 2038 'outheads' is the subset of 'heads' that is also in 'nodes'.
2037 2039
2038 2040 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2039 2041 unspecified, uses nullid as the only root. If 'heads' is
2040 2042 unspecified, uses list of all of the revlog's heads."""
2041 2043 nonodes = ([], [], [])
2042 2044 if roots is not None:
2043 2045 roots = list(roots)
2044 2046 if not roots:
2045 2047 return nonodes
2046 2048 lowestrev = min([self.rev(n) for n in roots])
2047 2049 else:
2048 2050 roots = [self.nullid] # Everybody's a descendant of nullid
2049 2051 lowestrev = nullrev
2050 2052 if (lowestrev == nullrev) and (heads is None):
2051 2053 # We want _all_ the nodes!
2052 2054 return (
2053 2055 [self.node(r) for r in self],
2054 2056 [self.nullid],
2055 2057 list(self.heads()),
2056 2058 )
2057 2059 if heads is None:
2058 2060 # All nodes are ancestors, so the latest ancestor is the last
2059 2061 # node.
2060 2062 highestrev = len(self) - 1
2061 2063 # Set ancestors to None to signal that every node is an ancestor.
2062 2064 ancestors = None
2063 2065 # Set heads to an empty dictionary for later discovery of heads
2064 2066 heads = {}
2065 2067 else:
2066 2068 heads = list(heads)
2067 2069 if not heads:
2068 2070 return nonodes
2069 2071 ancestors = set()
2070 2072 # Turn heads into a dictionary so we can remove 'fake' heads.
2071 2073 # Also, later we will be using it to filter out the heads we can't
2072 2074 # find from roots.
2073 2075 heads = dict.fromkeys(heads, False)
2074 2076 # Start at the top and keep marking parents until we're done.
2075 2077 nodestotag = set(heads)
2076 2078 # Remember where the top was so we can use it as a limit later.
2077 2079 highestrev = max([self.rev(n) for n in nodestotag])
2078 2080 while nodestotag:
2079 2081 # grab a node to tag
2080 2082 n = nodestotag.pop()
2081 2083 # Never tag nullid
2082 2084 if n == self.nullid:
2083 2085 continue
2084 2086 # A node's revision number represents its place in a
2085 2087 # topologically sorted list of nodes.
2086 2088 r = self.rev(n)
2087 2089 if r >= lowestrev:
2088 2090 if n not in ancestors:
2089 2091 # If we are possibly a descendant of one of the roots
2090 2092 # and we haven't already been marked as an ancestor
2091 2093 ancestors.add(n) # Mark as ancestor
2092 2094 # Add non-nullid parents to list of nodes to tag.
2093 2095 nodestotag.update(
2094 2096 [p for p in self.parents(n) if p != self.nullid]
2095 2097 )
2096 2098 elif n in heads: # We've seen it before, is it a fake head?
2097 2099 # So it is, real heads should not be the ancestors of
2098 2100 # any other heads.
2099 2101 heads.pop(n)
2100 2102 if not ancestors:
2101 2103 return nonodes
2102 2104 # Now that we have our set of ancestors, we want to remove any
2103 2105 # roots that are not ancestors.
2104 2106
2105 2107 # If one of the roots was nullid, everything is included anyway.
2106 2108 if lowestrev > nullrev:
2107 2109 # But, since we weren't, let's recompute the lowest rev to not
2108 2110 # include roots that aren't ancestors.
2109 2111
2110 2112 # Filter out roots that aren't ancestors of heads
2111 2113 roots = [root for root in roots if root in ancestors]
2112 2114 # Recompute the lowest revision
2113 2115 if roots:
2114 2116 lowestrev = min([self.rev(root) for root in roots])
2115 2117 else:
2116 2118 # No more roots? Return empty list
2117 2119 return nonodes
2118 2120 else:
2119 2121 # We are descending from nullid, and don't need to care about
2120 2122 # any other roots.
2121 2123 lowestrev = nullrev
2122 2124 roots = [self.nullid]
2123 2125 # Transform our roots list into a set.
2124 2126 descendants = set(roots)
2125 2127 # Also, keep the original roots so we can filter out roots that aren't
2126 2128 # 'real' roots (i.e. are descended from other roots).
2127 2129 roots = descendants.copy()
2128 2130 # Our topologically sorted list of output nodes.
2129 2131 orderedout = []
2130 2132 # Don't start at nullid since we don't want nullid in our output list,
2131 2133 # and if nullid shows up in descendants, empty parents will look like
2132 2134 # they're descendants.
2133 2135 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2134 2136 n = self.node(r)
2135 2137 isdescendant = False
2136 2138 if lowestrev == nullrev: # Everybody is a descendant of nullid
2137 2139 isdescendant = True
2138 2140 elif n in descendants:
2139 2141 # n is already a descendant
2140 2142 isdescendant = True
2141 2143 # This check only needs to be done here because all the roots
2142 2144 # will start being marked is descendants before the loop.
2143 2145 if n in roots:
2144 2146 # If n was a root, check if it's a 'real' root.
2145 2147 p = tuple(self.parents(n))
2146 2148 # If any of its parents are descendants, it's not a root.
2147 2149 if (p[0] in descendants) or (p[1] in descendants):
2148 2150 roots.remove(n)
2149 2151 else:
2150 2152 p = tuple(self.parents(n))
2151 2153 # A node is a descendant if either of its parents are
2152 2154 # descendants. (We seeded the dependents list with the roots
2153 2155 # up there, remember?)
2154 2156 if (p[0] in descendants) or (p[1] in descendants):
2155 2157 descendants.add(n)
2156 2158 isdescendant = True
2157 2159 if isdescendant and ((ancestors is None) or (n in ancestors)):
2158 2160 # Only include nodes that are both descendants and ancestors.
2159 2161 orderedout.append(n)
2160 2162 if (ancestors is not None) and (n in heads):
2161 2163 # We're trying to figure out which heads are reachable
2162 2164 # from roots.
2163 2165 # Mark this head as having been reached
2164 2166 heads[n] = True
2165 2167 elif ancestors is None:
2166 2168 # Otherwise, we're trying to discover the heads.
2167 2169 # Assume this is a head because if it isn't, the next step
2168 2170 # will eventually remove it.
2169 2171 heads[n] = True
2170 2172 # But, obviously its parents aren't.
2171 2173 for p in self.parents(n):
2172 2174 heads.pop(p, None)
2173 2175 heads = [head for head, flag in heads.items() if flag]
2174 2176 roots = list(roots)
2175 2177 assert orderedout
2176 2178 assert roots
2177 2179 assert heads
2178 2180 return (orderedout, roots, heads)
2179 2181
2180 2182 def headrevs(self, revs=None):
2181 2183 if revs is None:
2182 2184 try:
2183 2185 return self.index.headrevs()
2184 2186 except AttributeError:
2185 2187 return self._headrevs()
2186 2188 if rustdagop is not None and self.index.rust_ext_compat:
2187 2189 return rustdagop.headrevs(self.index, revs)
2188 2190 return dagop.headrevs(revs, self._uncheckedparentrevs)
2189 2191
2190 2192 def computephases(self, roots):
2191 2193 return self.index.computephasesmapsets(roots)
2192 2194
2193 2195 def _headrevs(self):
2194 2196 count = len(self)
2195 2197 if not count:
2196 2198 return [nullrev]
2197 2199 # we won't iter over filtered rev so nobody is a head at start
2198 2200 ishead = [0] * (count + 1)
2199 2201 index = self.index
2200 2202 for r in self:
2201 2203 ishead[r] = 1 # I may be an head
2202 2204 e = index[r]
2203 2205 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2204 2206 return [r for r, val in enumerate(ishead) if val]
2205 2207
2206 2208 def heads(self, start=None, stop=None):
2207 2209 """return the list of all nodes that have no children
2208 2210
2209 2211 if start is specified, only heads that are descendants of
2210 2212 start will be returned
2211 2213 if stop is specified, it will consider all the revs from stop
2212 2214 as if they had no children
2213 2215 """
2214 2216 if start is None and stop is None:
2215 2217 if not len(self):
2216 2218 return [self.nullid]
2217 2219 return [self.node(r) for r in self.headrevs()]
2218 2220
2219 2221 if start is None:
2220 2222 start = nullrev
2221 2223 else:
2222 2224 start = self.rev(start)
2223 2225
2224 2226 stoprevs = {self.rev(n) for n in stop or []}
2225 2227
2226 2228 revs = dagop.headrevssubset(
2227 2229 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2228 2230 )
2229 2231
2230 2232 return [self.node(rev) for rev in revs]
2231 2233
2232 2234 def children(self, node):
2233 2235 """find the children of a given node"""
2234 2236 c = []
2235 2237 p = self.rev(node)
2236 2238 for r in self.revs(start=p + 1):
2237 2239 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2238 2240 if prevs:
2239 2241 for pr in prevs:
2240 2242 if pr == p:
2241 2243 c.append(self.node(r))
2242 2244 elif p == nullrev:
2243 2245 c.append(self.node(r))
2244 2246 return c
2245 2247
2246 2248 def commonancestorsheads(self, a, b):
2247 2249 """calculate all the heads of the common ancestors of nodes a and b"""
2248 2250 a, b = self.rev(a), self.rev(b)
2249 2251 ancs = self._commonancestorsheads(a, b)
2250 2252 return pycompat.maplist(self.node, ancs)
2251 2253
2252 2254 def _commonancestorsheads(self, *revs):
2253 2255 """calculate all the heads of the common ancestors of revs"""
2254 2256 try:
2255 2257 ancs = self.index.commonancestorsheads(*revs)
2256 2258 except (AttributeError, OverflowError): # C implementation failed
2257 2259 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2258 2260 return ancs
2259 2261
2260 2262 def isancestor(self, a, b):
2261 2263 """return True if node a is an ancestor of node b
2262 2264
2263 2265 A revision is considered an ancestor of itself."""
2264 2266 a, b = self.rev(a), self.rev(b)
2265 2267 return self.isancestorrev(a, b)
2266 2268
2267 2269 def isancestorrev(self, a, b):
2268 2270 """return True if revision a is an ancestor of revision b
2269 2271
2270 2272 A revision is considered an ancestor of itself.
2271 2273
2272 2274 The implementation of this is trivial but the use of
2273 2275 reachableroots is not."""
2274 2276 if a == nullrev:
2275 2277 return True
2276 2278 elif a == b:
2277 2279 return True
2278 2280 elif a > b:
2279 2281 return False
2280 2282 return bool(self.reachableroots(a, [b], [a], includepath=False))
2281 2283
2282 2284 def reachableroots(self, minroot, heads, roots, includepath=False):
2283 2285 """return (heads(::(<roots> and <roots>::<heads>)))
2284 2286
2285 2287 If includepath is True, return (<roots>::<heads>)."""
2286 2288 try:
2287 2289 return self.index.reachableroots2(
2288 2290 minroot, heads, roots, includepath
2289 2291 )
2290 2292 except AttributeError:
2291 2293 return dagop._reachablerootspure(
2292 2294 self.parentrevs, minroot, roots, heads, includepath
2293 2295 )
2294 2296
2295 2297 def ancestor(self, a, b):
2296 2298 """calculate the "best" common ancestor of nodes a and b"""
2297 2299
2298 2300 a, b = self.rev(a), self.rev(b)
2299 2301 try:
2300 2302 ancs = self.index.ancestors(a, b)
2301 2303 except (AttributeError, OverflowError):
2302 2304 ancs = ancestor.ancestors(self.parentrevs, a, b)
2303 2305 if ancs:
2304 2306 # choose a consistent winner when there's a tie
2305 2307 return min(map(self.node, ancs))
2306 2308 return self.nullid
2307 2309
2308 2310 def _match(self, id):
2309 2311 if isinstance(id, int):
2310 2312 # rev
2311 2313 return self.node(id)
2312 2314 if len(id) == self.nodeconstants.nodelen:
2313 2315 # possibly a binary node
2314 2316 # odds of a binary node being all hex in ASCII are 1 in 10**25
2315 2317 try:
2316 2318 node = id
2317 2319 self.rev(node) # quick search the index
2318 2320 return node
2319 2321 except error.LookupError:
2320 2322 pass # may be partial hex id
2321 2323 try:
2322 2324 # str(rev)
2323 2325 rev = int(id)
2324 2326 if b"%d" % rev != id:
2325 2327 raise ValueError
2326 2328 if rev < 0:
2327 2329 rev = len(self) + rev
2328 2330 if rev < 0 or rev >= len(self):
2329 2331 raise ValueError
2330 2332 return self.node(rev)
2331 2333 except (ValueError, OverflowError):
2332 2334 pass
2333 2335 if len(id) == 2 * self.nodeconstants.nodelen:
2334 2336 try:
2335 2337 # a full hex nodeid?
2336 2338 node = bin(id)
2337 2339 self.rev(node)
2338 2340 return node
2339 2341 except (binascii.Error, error.LookupError):
2340 2342 pass
2341 2343
2342 2344 def _partialmatch(self, id):
2343 2345 # we don't care wdirfilenodeids as they should be always full hash
2344 2346 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2345 2347 ambiguous = False
2346 2348 try:
2347 2349 partial = self.index.partialmatch(id)
2348 2350 if partial and self.hasnode(partial):
2349 2351 if maybewdir:
2350 2352 # single 'ff...' match in radix tree, ambiguous with wdir
2351 2353 ambiguous = True
2352 2354 else:
2353 2355 return partial
2354 2356 elif maybewdir:
2355 2357 # no 'ff...' match in radix tree, wdir identified
2356 2358 raise error.WdirUnsupported
2357 2359 else:
2358 2360 return None
2359 2361 except error.RevlogError:
2360 2362 # parsers.c radix tree lookup gave multiple matches
2361 2363 # fast path: for unfiltered changelog, radix tree is accurate
2362 2364 if not getattr(self, 'filteredrevs', None):
2363 2365 ambiguous = True
2364 2366 # fall through to slow path that filters hidden revisions
2365 2367 except (AttributeError, ValueError):
2366 2368 # we are pure python, or key is not hex
2367 2369 pass
2368 2370 if ambiguous:
2369 2371 raise error.AmbiguousPrefixLookupError(
2370 2372 id, self.display_id, _(b'ambiguous identifier')
2371 2373 )
2372 2374
2373 2375 if id in self._pcache:
2374 2376 return self._pcache[id]
2375 2377
2376 2378 if len(id) <= 40:
2377 2379 # hex(node)[:...]
2378 2380 l = len(id) // 2 * 2 # grab an even number of digits
2379 2381 try:
2380 2382 # we're dropping the last digit, so let's check that it's hex,
2381 2383 # to avoid the expensive computation below if it's not
2382 2384 if len(id) % 2 > 0:
2383 2385 if not (id[-1] in hexdigits):
2384 2386 return None
2385 2387 prefix = bin(id[:l])
2386 2388 except binascii.Error:
2387 2389 pass
2388 2390 else:
2389 2391 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2390 2392 nl = [
2391 2393 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2392 2394 ]
2393 2395 if self.nodeconstants.nullhex.startswith(id):
2394 2396 nl.append(self.nullid)
2395 2397 if len(nl) > 0:
2396 2398 if len(nl) == 1 and not maybewdir:
2397 2399 self._pcache[id] = nl[0]
2398 2400 return nl[0]
2399 2401 raise error.AmbiguousPrefixLookupError(
2400 2402 id, self.display_id, _(b'ambiguous identifier')
2401 2403 )
2402 2404 if maybewdir:
2403 2405 raise error.WdirUnsupported
2404 2406 return None
2405 2407
2406 2408 def lookup(self, id):
2407 2409 """locate a node based on:
2408 2410 - revision number or str(revision number)
2409 2411 - nodeid or subset of hex nodeid
2410 2412 """
2411 2413 n = self._match(id)
2412 2414 if n is not None:
2413 2415 return n
2414 2416 n = self._partialmatch(id)
2415 2417 if n:
2416 2418 return n
2417 2419
2418 2420 raise error.LookupError(id, self.display_id, _(b'no match found'))
2419 2421
2420 2422 def shortest(self, node, minlength=1):
2421 2423 """Find the shortest unambiguous prefix that matches node."""
2422 2424
2423 2425 def isvalid(prefix):
2424 2426 try:
2425 2427 matchednode = self._partialmatch(prefix)
2426 2428 except error.AmbiguousPrefixLookupError:
2427 2429 return False
2428 2430 except error.WdirUnsupported:
2429 2431 # single 'ff...' match
2430 2432 return True
2431 2433 if matchednode is None:
2432 2434 raise error.LookupError(node, self.display_id, _(b'no node'))
2433 2435 return True
2434 2436
2435 2437 def maybewdir(prefix):
2436 2438 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2437 2439
2438 2440 hexnode = hex(node)
2439 2441
2440 2442 def disambiguate(hexnode, minlength):
2441 2443 """Disambiguate against wdirid."""
2442 2444 for length in range(minlength, len(hexnode) + 1):
2443 2445 prefix = hexnode[:length]
2444 2446 if not maybewdir(prefix):
2445 2447 return prefix
2446 2448
2447 2449 if not getattr(self, 'filteredrevs', None):
2448 2450 try:
2449 2451 length = max(self.index.shortest(node), minlength)
2450 2452 return disambiguate(hexnode, length)
2451 2453 except error.RevlogError:
2452 2454 if node != self.nodeconstants.wdirid:
2453 2455 raise error.LookupError(
2454 2456 node, self.display_id, _(b'no node')
2455 2457 )
2456 2458 except AttributeError:
2457 2459 # Fall through to pure code
2458 2460 pass
2459 2461
2460 2462 if node == self.nodeconstants.wdirid:
2461 2463 for length in range(minlength, len(hexnode) + 1):
2462 2464 prefix = hexnode[:length]
2463 2465 if isvalid(prefix):
2464 2466 return prefix
2465 2467
2466 2468 for length in range(minlength, len(hexnode) + 1):
2467 2469 prefix = hexnode[:length]
2468 2470 if isvalid(prefix):
2469 2471 return disambiguate(hexnode, length)
2470 2472
2471 2473 def cmp(self, node, text):
2472 2474 """compare text with a given file revision
2473 2475
2474 2476 returns True if text is different than what is stored.
2475 2477 """
2476 2478 p1, p2 = self.parents(node)
2477 2479 return storageutil.hashrevisionsha1(text, p1, p2) != node
2478 2480
2479 2481 def deltaparent(self, rev):
2480 2482 """return deltaparent of the given revision"""
2481 2483 base = self.index[rev][3]
2482 2484 if base == rev:
2483 2485 return nullrev
2484 2486 elif self.delta_config.general_delta:
2485 2487 return base
2486 2488 else:
2487 2489 return rev - 1
2488 2490
2489 2491 def issnapshot(self, rev):
2490 2492 """tells whether rev is a snapshot"""
2491 2493 ret = self._inner.issnapshot(rev)
2492 2494 self.issnapshot = self._inner.issnapshot
2493 2495 return ret
2494 2496
2495 2497 def snapshotdepth(self, rev):
2496 2498 """number of snapshot in the chain before this one"""
2497 2499 if not self.issnapshot(rev):
2498 2500 raise error.ProgrammingError(b'revision %d not a snapshot')
2499 return len(self._deltachain(rev)[0]) - 1
2501 return len(self._inner._deltachain(rev)[0]) - 1
2500 2502
2501 2503 def revdiff(self, rev1, rev2):
2502 2504 """return or calculate a delta between two revisions
2503 2505
2504 2506 The delta calculated is in binary form and is intended to be written to
2505 2507 revlog data directly. So this function needs raw revision data.
2506 2508 """
2507 2509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2508 2510 return bytes(self._inner._chunk(rev2))
2509 2511
2510 2512 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2511 2513
2512 2514 def revision(self, nodeorrev):
2513 2515 """return an uncompressed revision of a given node or revision
2514 2516 number.
2515 2517 """
2516 2518 return self._revisiondata(nodeorrev)
2517 2519
2518 2520 def sidedata(self, nodeorrev):
2519 2521 """a map of extra data related to the changeset but not part of the hash
2520 2522
2521 2523 This function currently return a dictionary. However, more advanced
2522 2524 mapping object will likely be used in the future for a more
2523 2525 efficient/lazy code.
2524 2526 """
2525 2527 # deal with <nodeorrev> argument type
2526 2528 if isinstance(nodeorrev, int):
2527 2529 rev = nodeorrev
2528 2530 else:
2529 2531 rev = self.rev(nodeorrev)
2530 2532 return self._sidedata(rev)
2531 2533
2532 2534 def _revisiondata(self, nodeorrev, raw=False):
2533 2535 # deal with <nodeorrev> argument type
2534 2536 if isinstance(nodeorrev, int):
2535 2537 rev = nodeorrev
2536 2538 node = self.node(rev)
2537 2539 else:
2538 2540 node = nodeorrev
2539 2541 rev = None
2540 2542
2541 2543 # fast path the special `nullid` rev
2542 2544 if node == self.nullid:
2543 2545 return b""
2544 2546
2545 2547 # ``rawtext`` is the text as stored inside the revlog. Might be the
2546 2548 # revision or might need to be processed to retrieve the revision.
2547 2549 rev, rawtext, validated = self._rawtext(node, rev)
2548 2550
2549 2551 if raw and validated:
2550 2552 # if we don't want to process the raw text and that raw
2551 2553 # text is cached, we can exit early.
2552 2554 return rawtext
2553 2555 if rev is None:
2554 2556 rev = self.rev(node)
2555 2557 # the revlog's flag for this revision
2556 2558 # (usually alter its state or content)
2557 2559 flags = self.flags(rev)
2558 2560
2559 2561 if validated and flags == REVIDX_DEFAULT_FLAGS:
2560 2562 # no extra flags set, no flag processor runs, text = rawtext
2561 2563 return rawtext
2562 2564
2563 2565 if raw:
2564 2566 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2565 2567 text = rawtext
2566 2568 else:
2567 2569 r = flagutil.processflagsread(self, rawtext, flags)
2568 2570 text, validatehash = r
2569 2571 if validatehash:
2570 2572 self.checkhash(text, node, rev=rev)
2571 2573 if not validated:
2572 2574 self._revisioncache = (node, rev, rawtext)
2573 2575
2574 2576 return text
2575 2577
2576 2578 def _rawtext(self, node, rev):
2577 2579 """return the possibly unvalidated rawtext for a revision
2578 2580
2579 2581 returns (rev, rawtext, validated)
2580 2582 """
2581 2583
2582 2584 # revision in the cache (could be useful to apply delta)
2583 2585 cachedrev = None
2584 2586 # An intermediate text to apply deltas to
2585 2587 basetext = None
2586 2588
2587 2589 # Check if we have the entry in cache
2588 2590 # The cache entry looks like (node, rev, rawtext)
2589 2591 if self._revisioncache:
2590 2592 if self._revisioncache[0] == node:
2591 2593 return (rev, self._revisioncache[2], True)
2592 2594 cachedrev = self._revisioncache[1]
2593 2595
2594 2596 if rev is None:
2595 2597 rev = self.rev(node)
2596 2598
2597 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2598 2600 if stopped:
2599 2601 basetext = self._revisioncache[2]
2600 2602
2601 2603 # drop cache to save memory, the caller is expected to
2602 2604 # update self._revisioncache after validating the text
2603 2605 self._revisioncache = None
2604 2606
2605 2607 targetsize = None
2606 2608 rawsize = self.index[rev][2]
2607 2609 if 0 <= rawsize:
2608 2610 targetsize = 4 * rawsize
2609 2611
2610 2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2611 2613 if basetext is None:
2612 2614 basetext = bytes(bins[0])
2613 2615 bins = bins[1:]
2614 2616
2615 2617 rawtext = mdiff.patches(basetext, bins)
2616 2618 del basetext # let us have a chance to free memory early
2617 2619 return (rev, rawtext, False)
2618 2620
2619 2621 def _sidedata(self, rev):
2620 2622 """Return the sidedata for a given revision number."""
2621 2623 index_entry = self.index[rev]
2622 2624 sidedata_offset = index_entry[8]
2623 2625 sidedata_size = index_entry[9]
2624 2626
2625 2627 if self._inline:
2626 2628 sidedata_offset += self.index.entry_size * (1 + rev)
2627 2629 if sidedata_size == 0:
2628 2630 return {}
2629 2631
2630 2632 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2631 2633 filename = self._sidedatafile
2632 2634 end = self._docket.sidedata_end
2633 2635 offset = sidedata_offset
2634 2636 length = sidedata_size
2635 2637 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2636 2638 raise error.RevlogError(m)
2637 2639
2638 2640 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2639 2641 sidedata_offset, sidedata_size
2640 2642 )
2641 2643
2642 2644 comp = self.index[rev][11]
2643 2645 if comp == COMP_MODE_PLAIN:
2644 2646 segment = comp_segment
2645 2647 elif comp == COMP_MODE_DEFAULT:
2646 2648 segment = self._inner._decompressor(comp_segment)
2647 2649 elif comp == COMP_MODE_INLINE:
2648 2650 segment = self._inner.decompress(comp_segment)
2649 2651 else:
2650 2652 msg = b'unknown compression mode %d'
2651 2653 msg %= comp
2652 2654 raise error.RevlogError(msg)
2653 2655
2654 2656 sidedata = sidedatautil.deserialize_sidedata(segment)
2655 2657 return sidedata
2656 2658
2657 2659 def rawdata(self, nodeorrev):
2658 2660 """return an uncompressed raw data of a given node or revision number."""
2659 2661 return self._revisiondata(nodeorrev, raw=True)
2660 2662
2661 2663 def hash(self, text, p1, p2):
2662 2664 """Compute a node hash.
2663 2665
2664 2666 Available as a function so that subclasses can replace the hash
2665 2667 as needed.
2666 2668 """
2667 2669 return storageutil.hashrevisionsha1(text, p1, p2)
2668 2670
2669 2671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2670 2672 """Check node hash integrity.
2671 2673
2672 2674 Available as a function so that subclasses can extend hash mismatch
2673 2675 behaviors as needed.
2674 2676 """
2675 2677 try:
2676 2678 if p1 is None and p2 is None:
2677 2679 p1, p2 = self.parents(node)
2678 2680 if node != self.hash(text, p1, p2):
2679 2681 # Clear the revision cache on hash failure. The revision cache
2680 2682 # only stores the raw revision and clearing the cache does have
2681 2683 # the side-effect that we won't have a cache hit when the raw
2682 2684 # revision data is accessed. But this case should be rare and
2683 2685 # it is extra work to teach the cache about the hash
2684 2686 # verification state.
2685 2687 if self._revisioncache and self._revisioncache[0] == node:
2686 2688 self._revisioncache = None
2687 2689
2688 2690 revornode = rev
2689 2691 if revornode is None:
2690 2692 revornode = templatefilters.short(hex(node))
2691 2693 raise error.RevlogError(
2692 2694 _(b"integrity check failed on %s:%s")
2693 2695 % (self.display_id, pycompat.bytestr(revornode))
2694 2696 )
2695 2697 except error.RevlogError:
2696 2698 if self.feature_config.censorable and storageutil.iscensoredtext(
2697 2699 text
2698 2700 ):
2699 2701 raise error.CensoredNodeError(self.display_id, node, text)
2700 2702 raise
2701 2703
2702 2704 @property
2703 2705 def _split_index_file(self):
2704 2706 """the path where to expect the index of an ongoing splitting operation
2705 2707
2706 2708 The file will only exist if a splitting operation is in progress, but
2707 2709 it is always expected at the same location."""
2708 2710 parts = self.radix.split(b'/')
2709 2711 if len(parts) > 1:
2710 2712 # adds a '-s' prefix to the ``data/` or `meta/` base
2711 2713 head = parts[0] + b'-s'
2712 2714 mids = parts[1:-1]
2713 2715 tail = parts[-1] + b'.i'
2714 2716 pieces = [head] + mids + [tail]
2715 2717 return b'/'.join(pieces)
2716 2718 else:
2717 2719 # the revlog is stored at the root of the store (changelog or
2718 2720 # manifest), no risk of collision.
2719 2721 return self.radix + b'.i.s'
2720 2722
2721 2723 def _enforceinlinesize(self, tr, side_write=True):
2722 2724 """Check if the revlog is too big for inline and convert if so.
2723 2725
2724 2726 This should be called after revisions are added to the revlog. If the
2725 2727 revlog has grown too large to be an inline revlog, it will convert it
2726 2728 to use multiple index and data files.
2727 2729 """
2728 2730 tiprev = len(self) - 1
2729 2731 total_size = self.start(tiprev) + self.length(tiprev)
2730 2732 if not self._inline or total_size < _maxinline:
2731 2733 return
2732 2734
2733 2735 if self._docket is not None:
2734 2736 msg = b"inline revlog should not have a docket"
2735 2737 raise error.ProgrammingError(msg)
2736 2738
2737 2739 troffset = tr.findoffset(self._indexfile)
2738 2740 if troffset is None:
2739 2741 raise error.RevlogError(
2740 2742 _(b"%s not found in the transaction") % self._indexfile
2741 2743 )
2742 2744 if troffset:
2743 2745 tr.addbackup(self._indexfile, for_offset=True)
2744 2746 tr.add(self._datafile, 0)
2745 2747
2746 2748 new_index_file_path = None
2747 2749 if side_write:
2748 2750 old_index_file_path = self._indexfile
2749 2751 new_index_file_path = self._split_index_file
2750 2752 opener = self.opener
2751 2753 weak_self = weakref.ref(self)
2752 2754
2753 2755 # the "split" index replace the real index when the transaction is
2754 2756 # finalized
2755 2757 def finalize_callback(tr):
2756 2758 opener.rename(
2757 2759 new_index_file_path,
2758 2760 old_index_file_path,
2759 2761 checkambig=True,
2760 2762 )
2761 2763 maybe_self = weak_self()
2762 2764 if maybe_self is not None:
2763 2765 maybe_self._indexfile = old_index_file_path
2764 2766 maybe_self._inner.index_file = maybe_self._indexfile
2765 2767
2766 2768 def abort_callback(tr):
2767 2769 maybe_self = weak_self()
2768 2770 if maybe_self is not None:
2769 2771 maybe_self._indexfile = old_index_file_path
2770 2772 maybe_self._inner.inline = True
2771 2773 maybe_self._inner.index_file = old_index_file_path
2772 2774
2773 2775 tr.registertmp(new_index_file_path)
2774 2776 if self.target[1] is not None:
2775 2777 callback_id = b'000-revlog-split-%d-%s' % self.target
2776 2778 else:
2777 2779 callback_id = b'000-revlog-split-%d' % self.target[0]
2778 2780 tr.addfinalize(callback_id, finalize_callback)
2779 2781 tr.addabort(callback_id, abort_callback)
2780 2782
2781 2783 self._format_flags &= ~FLAG_INLINE_DATA
2782 2784 self._inner.split_inline(
2783 2785 tr,
2784 2786 self._format_flags | self._format_version,
2785 2787 new_index_file_path=new_index_file_path,
2786 2788 )
2787 2789
2788 2790 self._inline = False
2789 2791 if new_index_file_path is not None:
2790 2792 self._indexfile = new_index_file_path
2791 2793
2792 2794 nodemaputil.setup_persistent_nodemap(tr, self)
2793 2795
2794 2796 def _nodeduplicatecallback(self, transaction, node):
2795 2797 """called when trying to add a node already stored."""
2796 2798
2797 2799 @contextlib.contextmanager
2798 2800 def reading(self):
2799 2801 with self._inner.reading():
2800 2802 yield
2801 2803
2802 2804 @contextlib.contextmanager
2803 2805 def _writing(self, transaction):
2804 2806 if self._trypending:
2805 2807 msg = b'try to write in a `trypending` revlog: %s'
2806 2808 msg %= self.display_id
2807 2809 raise error.ProgrammingError(msg)
2808 2810 if self._inner.is_writing:
2809 2811 yield
2810 2812 else:
2811 2813 data_end = None
2812 2814 sidedata_end = None
2813 2815 if self._docket is not None:
2814 2816 data_end = self._docket.data_end
2815 2817 sidedata_end = self._docket.sidedata_end
2816 2818 with self._inner.writing(
2817 2819 transaction,
2818 2820 data_end=data_end,
2819 2821 sidedata_end=sidedata_end,
2820 2822 ):
2821 2823 yield
2822 2824 if self._docket is not None:
2823 2825 self._write_docket(transaction)
2824 2826
2825 2827 def _write_docket(self, transaction):
2826 2828 """write the current docket on disk
2827 2829
2828 2830 Exist as a method to help changelog to implement transaction logic
2829 2831
2830 2832 We could also imagine using the same transaction logic for all revlog
2831 2833 since docket are cheap."""
2832 2834 self._docket.write(transaction)
2833 2835
2834 2836 def addrevision(
2835 2837 self,
2836 2838 text,
2837 2839 transaction,
2838 2840 link,
2839 2841 p1,
2840 2842 p2,
2841 2843 cachedelta=None,
2842 2844 node=None,
2843 2845 flags=REVIDX_DEFAULT_FLAGS,
2844 2846 deltacomputer=None,
2845 2847 sidedata=None,
2846 2848 ):
2847 2849 """add a revision to the log
2848 2850
2849 2851 text - the revision data to add
2850 2852 transaction - the transaction object used for rollback
2851 2853 link - the linkrev data to add
2852 2854 p1, p2 - the parent nodeids of the revision
2853 2855 cachedelta - an optional precomputed delta
2854 2856 node - nodeid of revision; typically node is not specified, and it is
2855 2857 computed by default as hash(text, p1, p2), however subclasses might
2856 2858 use different hashing method (and override checkhash() in such case)
2857 2859 flags - the known flags to set on the revision
2858 2860 deltacomputer - an optional deltacomputer instance shared between
2859 2861 multiple calls
2860 2862 """
2861 2863 if link == nullrev:
2862 2864 raise error.RevlogError(
2863 2865 _(b"attempted to add linkrev -1 to %s") % self.display_id
2864 2866 )
2865 2867
2866 2868 if sidedata is None:
2867 2869 sidedata = {}
2868 2870 elif sidedata and not self.feature_config.has_side_data:
2869 2871 raise error.ProgrammingError(
2870 2872 _(b"trying to add sidedata to a revlog who don't support them")
2871 2873 )
2872 2874
2873 2875 if flags:
2874 2876 node = node or self.hash(text, p1, p2)
2875 2877
2876 2878 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2877 2879
2878 2880 # If the flag processor modifies the revision data, ignore any provided
2879 2881 # cachedelta.
2880 2882 if rawtext != text:
2881 2883 cachedelta = None
2882 2884
2883 2885 if len(rawtext) > _maxentrysize:
2884 2886 raise error.RevlogError(
2885 2887 _(
2886 2888 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2887 2889 )
2888 2890 % (self.display_id, len(rawtext))
2889 2891 )
2890 2892
2891 2893 node = node or self.hash(rawtext, p1, p2)
2892 2894 rev = self.index.get_rev(node)
2893 2895 if rev is not None:
2894 2896 return rev
2895 2897
2896 2898 if validatehash:
2897 2899 self.checkhash(rawtext, node, p1=p1, p2=p2)
2898 2900
2899 2901 return self.addrawrevision(
2900 2902 rawtext,
2901 2903 transaction,
2902 2904 link,
2903 2905 p1,
2904 2906 p2,
2905 2907 node,
2906 2908 flags,
2907 2909 cachedelta=cachedelta,
2908 2910 deltacomputer=deltacomputer,
2909 2911 sidedata=sidedata,
2910 2912 )
2911 2913
2912 2914 def addrawrevision(
2913 2915 self,
2914 2916 rawtext,
2915 2917 transaction,
2916 2918 link,
2917 2919 p1,
2918 2920 p2,
2919 2921 node,
2920 2922 flags,
2921 2923 cachedelta=None,
2922 2924 deltacomputer=None,
2923 2925 sidedata=None,
2924 2926 ):
2925 2927 """add a raw revision with known flags, node and parents
2926 2928 useful when reusing a revision not stored in this revlog (ex: received
2927 2929 over wire, or read from an external bundle).
2928 2930 """
2929 2931 with self._writing(transaction):
2930 2932 return self._addrevision(
2931 2933 node,
2932 2934 rawtext,
2933 2935 transaction,
2934 2936 link,
2935 2937 p1,
2936 2938 p2,
2937 2939 flags,
2938 2940 cachedelta,
2939 2941 deltacomputer=deltacomputer,
2940 2942 sidedata=sidedata,
2941 2943 )
2942 2944
2943 2945 def compress(self, data):
2944 2946 return self._inner.compress(data)
2945 2947
2946 2948 def decompress(self, data):
2947 2949 return self._inner.decompress(data)
2948 2950
2949 2951 def _addrevision(
2950 2952 self,
2951 2953 node,
2952 2954 rawtext,
2953 2955 transaction,
2954 2956 link,
2955 2957 p1,
2956 2958 p2,
2957 2959 flags,
2958 2960 cachedelta,
2959 2961 alwayscache=False,
2960 2962 deltacomputer=None,
2961 2963 sidedata=None,
2962 2964 ):
2963 2965 """internal function to add revisions to the log
2964 2966
2965 2967 see addrevision for argument descriptions.
2966 2968
2967 2969 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2968 2970
2969 2971 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2970 2972 be used.
2971 2973
2972 2974 invariants:
2973 2975 - rawtext is optional (can be None); if not set, cachedelta must be set.
2974 2976 if both are set, they must correspond to each other.
2975 2977 """
2976 2978 if node == self.nullid:
2977 2979 raise error.RevlogError(
2978 2980 _(b"%s: attempt to add null revision") % self.display_id
2979 2981 )
2980 2982 if (
2981 2983 node == self.nodeconstants.wdirid
2982 2984 or node in self.nodeconstants.wdirfilenodeids
2983 2985 ):
2984 2986 raise error.RevlogError(
2985 2987 _(b"%s: attempt to add wdir revision") % self.display_id
2986 2988 )
2987 2989 if self._inner._writinghandles is None:
2988 2990 msg = b'adding revision outside `revlog._writing` context'
2989 2991 raise error.ProgrammingError(msg)
2990 2992
2991 2993 btext = [rawtext]
2992 2994
2993 2995 curr = len(self)
2994 2996 prev = curr - 1
2995 2997
2996 2998 offset = self._get_data_offset(prev)
2997 2999
2998 3000 if self._concurrencychecker:
2999 3001 ifh, dfh, sdfh = self._inner._writinghandles
3000 3002 # XXX no checking for the sidedata file
3001 3003 if self._inline:
3002 3004 # offset is "as if" it were in the .d file, so we need to add on
3003 3005 # the size of the entry metadata.
3004 3006 self._concurrencychecker(
3005 3007 ifh, self._indexfile, offset + curr * self.index.entry_size
3006 3008 )
3007 3009 else:
3008 3010 # Entries in the .i are a consistent size.
3009 3011 self._concurrencychecker(
3010 3012 ifh, self._indexfile, curr * self.index.entry_size
3011 3013 )
3012 3014 self._concurrencychecker(dfh, self._datafile, offset)
3013 3015
3014 3016 p1r, p2r = self.rev(p1), self.rev(p2)
3015 3017
3016 3018 # full versions are inserted when the needed deltas
3017 3019 # become comparable to the uncompressed text
3018 3020 if rawtext is None:
3019 3021 # need rawtext size, before changed by flag processors, which is
3020 3022 # the non-raw size. use revlog explicitly to avoid filelog's extra
3021 3023 # logic that might remove metadata size.
3022 3024 textlen = mdiff.patchedsize(
3023 3025 revlog.size(self, cachedelta[0]), cachedelta[1]
3024 3026 )
3025 3027 else:
3026 3028 textlen = len(rawtext)
3027 3029
3028 3030 if deltacomputer is None:
3029 3031 write_debug = None
3030 3032 if self.delta_config.debug_delta:
3031 3033 write_debug = transaction._report
3032 3034 deltacomputer = deltautil.deltacomputer(
3033 3035 self, write_debug=write_debug
3034 3036 )
3035 3037
3036 3038 if cachedelta is not None and len(cachedelta) == 2:
3037 3039 # If the cached delta has no information about how it should be
3038 3040 # reused, add the default reuse instruction according to the
3039 3041 # revlog's configuration.
3040 3042 if (
3041 3043 self.delta_config.general_delta
3042 3044 and self.delta_config.lazy_delta_base
3043 3045 ):
3044 3046 delta_base_reuse = DELTA_BASE_REUSE_TRY
3045 3047 else:
3046 3048 delta_base_reuse = DELTA_BASE_REUSE_NO
3047 3049 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3048 3050
3049 3051 revinfo = revlogutils.revisioninfo(
3050 3052 node,
3051 3053 p1,
3052 3054 p2,
3053 3055 btext,
3054 3056 textlen,
3055 3057 cachedelta,
3056 3058 flags,
3057 3059 )
3058 3060
3059 3061 deltainfo = deltacomputer.finddeltainfo(revinfo)
3060 3062
3061 3063 compression_mode = COMP_MODE_INLINE
3062 3064 if self._docket is not None:
3063 3065 default_comp = self._docket.default_compression_header
3064 3066 r = deltautil.delta_compression(default_comp, deltainfo)
3065 3067 compression_mode, deltainfo = r
3066 3068
3067 3069 sidedata_compression_mode = COMP_MODE_INLINE
3068 3070 if sidedata and self.feature_config.has_side_data:
3069 3071 sidedata_compression_mode = COMP_MODE_PLAIN
3070 3072 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3071 3073 sidedata_offset = self._docket.sidedata_end
3072 3074 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3073 3075 if (
3074 3076 h != b'u'
3075 3077 and comp_sidedata[0:1] != b'\0'
3076 3078 and len(comp_sidedata) < len(serialized_sidedata)
3077 3079 ):
3078 3080 assert not h
3079 3081 if (
3080 3082 comp_sidedata[0:1]
3081 3083 == self._docket.default_compression_header
3082 3084 ):
3083 3085 sidedata_compression_mode = COMP_MODE_DEFAULT
3084 3086 serialized_sidedata = comp_sidedata
3085 3087 else:
3086 3088 sidedata_compression_mode = COMP_MODE_INLINE
3087 3089 serialized_sidedata = comp_sidedata
3088 3090 else:
3089 3091 serialized_sidedata = b""
3090 3092 # Don't store the offset if the sidedata is empty, that way
3091 3093 # we can easily detect empty sidedata and they will be no different
3092 3094 # than ones we manually add.
3093 3095 sidedata_offset = 0
3094 3096
3095 3097 rank = RANK_UNKNOWN
3096 3098 if self.feature_config.compute_rank:
3097 3099 if (p1r, p2r) == (nullrev, nullrev):
3098 3100 rank = 1
3099 3101 elif p1r != nullrev and p2r == nullrev:
3100 3102 rank = 1 + self.fast_rank(p1r)
3101 3103 elif p1r == nullrev and p2r != nullrev:
3102 3104 rank = 1 + self.fast_rank(p2r)
3103 3105 else: # merge node
3104 3106 if rustdagop is not None and self.index.rust_ext_compat:
3105 3107 rank = rustdagop.rank(self.index, p1r, p2r)
3106 3108 else:
3107 3109 pmin, pmax = sorted((p1r, p2r))
3108 3110 rank = 1 + self.fast_rank(pmax)
3109 3111 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3110 3112
3111 3113 e = revlogutils.entry(
3112 3114 flags=flags,
3113 3115 data_offset=offset,
3114 3116 data_compressed_length=deltainfo.deltalen,
3115 3117 data_uncompressed_length=textlen,
3116 3118 data_compression_mode=compression_mode,
3117 3119 data_delta_base=deltainfo.base,
3118 3120 link_rev=link,
3119 3121 parent_rev_1=p1r,
3120 3122 parent_rev_2=p2r,
3121 3123 node_id=node,
3122 3124 sidedata_offset=sidedata_offset,
3123 3125 sidedata_compressed_length=len(serialized_sidedata),
3124 3126 sidedata_compression_mode=sidedata_compression_mode,
3125 3127 rank=rank,
3126 3128 )
3127 3129
3128 3130 self.index.append(e)
3129 3131 entry = self.index.entry_binary(curr)
3130 3132 if curr == 0 and self._docket is None:
3131 3133 header = self._format_flags | self._format_version
3132 3134 header = self.index.pack_header(header)
3133 3135 entry = header + entry
3134 3136 self._writeentry(
3135 3137 transaction,
3136 3138 entry,
3137 3139 deltainfo.data,
3138 3140 link,
3139 3141 offset,
3140 3142 serialized_sidedata,
3141 3143 sidedata_offset,
3142 3144 )
3143 3145
3144 3146 rawtext = btext[0]
3145 3147
3146 3148 if alwayscache and rawtext is None:
3147 3149 rawtext = deltacomputer.buildtext(revinfo)
3148 3150
3149 3151 if type(rawtext) == bytes: # only accept immutable objects
3150 3152 self._revisioncache = (node, curr, rawtext)
3151 3153 self._chainbasecache[curr] = deltainfo.chainbase
3152 3154 return curr
3153 3155
3154 3156 def _get_data_offset(self, prev):
3155 3157 """Returns the current offset in the (in-transaction) data file.
3156 3158 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3157 3159 file to store that information: since sidedata can be rewritten to the
3158 3160 end of the data file within a transaction, you can have cases where, for
3159 3161 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3160 3162 to `n - 1`'s sidedata being written after `n`'s data.
3161 3163
3162 3164 TODO cache this in a docket file before getting out of experimental."""
3163 3165 if self._docket is None:
3164 3166 return self.end(prev)
3165 3167 else:
3166 3168 return self._docket.data_end
3167 3169
3168 3170 def _writeentry(
3169 3171 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3170 3172 ):
3171 3173 # Files opened in a+ mode have inconsistent behavior on various
3172 3174 # platforms. Windows requires that a file positioning call be made
3173 3175 # when the file handle transitions between reads and writes. See
3174 3176 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3175 3177 # platforms, Python or the platform itself can be buggy. Some versions
3176 3178 # of Solaris have been observed to not append at the end of the file
3177 3179 # if the file was seeked to before the end. See issue4943 for more.
3178 3180 #
3179 3181 # We work around this issue by inserting a seek() before writing.
3180 3182 # Note: This is likely not necessary on Python 3. However, because
3181 3183 # the file handle is reused for reads and may be seeked there, we need
3182 3184 # to be careful before changing this.
3183 3185 if self._inner._writinghandles is None:
3184 3186 msg = b'adding revision outside `revlog._writing` context'
3185 3187 raise error.ProgrammingError(msg)
3186 3188 ifh, dfh, sdfh = self._inner._writinghandles
3187 3189 if self._docket is None:
3188 3190 ifh.seek(0, os.SEEK_END)
3189 3191 else:
3190 3192 ifh.seek(self._docket.index_end, os.SEEK_SET)
3191 3193 if dfh:
3192 3194 if self._docket is None:
3193 3195 dfh.seek(0, os.SEEK_END)
3194 3196 else:
3195 3197 dfh.seek(self._docket.data_end, os.SEEK_SET)
3196 3198 if sdfh:
3197 3199 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3198 3200
3199 3201 curr = len(self) - 1
3200 3202 if not self._inline:
3201 3203 transaction.add(self._datafile, offset)
3202 3204 if self._sidedatafile:
3203 3205 transaction.add(self._sidedatafile, sidedata_offset)
3204 3206 transaction.add(self._indexfile, curr * len(entry))
3205 3207 if data[0]:
3206 3208 dfh.write(data[0])
3207 3209 dfh.write(data[1])
3208 3210 if sidedata:
3209 3211 sdfh.write(sidedata)
3210 3212 ifh.write(entry)
3211 3213 else:
3212 3214 offset += curr * self.index.entry_size
3213 3215 transaction.add(self._indexfile, offset)
3214 3216 ifh.write(entry)
3215 3217 ifh.write(data[0])
3216 3218 ifh.write(data[1])
3217 3219 assert not sidedata
3218 3220 self._enforceinlinesize(transaction)
3219 3221 if self._docket is not None:
3220 3222 # revlog-v2 always has 3 writing handles, help Pytype
3221 3223 wh1 = self._inner._writinghandles[0]
3222 3224 wh2 = self._inner._writinghandles[1]
3223 3225 wh3 = self._inner._writinghandles[2]
3224 3226 assert wh1 is not None
3225 3227 assert wh2 is not None
3226 3228 assert wh3 is not None
3227 3229 self._docket.index_end = wh1.tell()
3228 3230 self._docket.data_end = wh2.tell()
3229 3231 self._docket.sidedata_end = wh3.tell()
3230 3232
3231 3233 nodemaputil.setup_persistent_nodemap(transaction, self)
3232 3234
3233 3235 def addgroup(
3234 3236 self,
3235 3237 deltas,
3236 3238 linkmapper,
3237 3239 transaction,
3238 3240 alwayscache=False,
3239 3241 addrevisioncb=None,
3240 3242 duplicaterevisioncb=None,
3241 3243 debug_info=None,
3242 3244 delta_base_reuse_policy=None,
3243 3245 ):
3244 3246 """
3245 3247 add a delta group
3246 3248
3247 3249 given a set of deltas, add them to the revision log. the
3248 3250 first delta is against its parent, which should be in our
3249 3251 log, the rest are against the previous delta.
3250 3252
3251 3253 If ``addrevisioncb`` is defined, it will be called with arguments of
3252 3254 this revlog and the node that was added.
3253 3255 """
3254 3256
3255 3257 if self._adding_group:
3256 3258 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3257 3259
3258 3260 # read the default delta-base reuse policy from revlog config if the
3259 3261 # group did not specify one.
3260 3262 if delta_base_reuse_policy is None:
3261 3263 if (
3262 3264 self.delta_config.general_delta
3263 3265 and self.delta_config.lazy_delta_base
3264 3266 ):
3265 3267 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3266 3268 else:
3267 3269 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3268 3270
3269 3271 self._adding_group = True
3270 3272 empty = True
3271 3273 try:
3272 3274 with self._writing(transaction):
3273 3275 write_debug = None
3274 3276 if self.delta_config.debug_delta:
3275 3277 write_debug = transaction._report
3276 3278 deltacomputer = deltautil.deltacomputer(
3277 3279 self,
3278 3280 write_debug=write_debug,
3279 3281 debug_info=debug_info,
3280 3282 )
3281 3283 # loop through our set of deltas
3282 3284 for data in deltas:
3283 3285 (
3284 3286 node,
3285 3287 p1,
3286 3288 p2,
3287 3289 linknode,
3288 3290 deltabase,
3289 3291 delta,
3290 3292 flags,
3291 3293 sidedata,
3292 3294 ) = data
3293 3295 link = linkmapper(linknode)
3294 3296 flags = flags or REVIDX_DEFAULT_FLAGS
3295 3297
3296 3298 rev = self.index.get_rev(node)
3297 3299 if rev is not None:
3298 3300 # this can happen if two branches make the same change
3299 3301 self._nodeduplicatecallback(transaction, rev)
3300 3302 if duplicaterevisioncb:
3301 3303 duplicaterevisioncb(self, rev)
3302 3304 empty = False
3303 3305 continue
3304 3306
3305 3307 for p in (p1, p2):
3306 3308 if not self.index.has_node(p):
3307 3309 raise error.LookupError(
3308 3310 p, self.radix, _(b'unknown parent')
3309 3311 )
3310 3312
3311 3313 if not self.index.has_node(deltabase):
3312 3314 raise error.LookupError(
3313 3315 deltabase, self.display_id, _(b'unknown delta base')
3314 3316 )
3315 3317
3316 3318 baserev = self.rev(deltabase)
3317 3319
3318 3320 if baserev != nullrev and self.iscensored(baserev):
3319 3321 # if base is censored, delta must be full replacement in a
3320 3322 # single patch operation
3321 3323 hlen = struct.calcsize(b">lll")
3322 3324 oldlen = self.rawsize(baserev)
3323 3325 newlen = len(delta) - hlen
3324 3326 if delta[:hlen] != mdiff.replacediffheader(
3325 3327 oldlen, newlen
3326 3328 ):
3327 3329 raise error.CensoredBaseError(
3328 3330 self.display_id, self.node(baserev)
3329 3331 )
3330 3332
3331 3333 if not flags and self._peek_iscensored(baserev, delta):
3332 3334 flags |= REVIDX_ISCENSORED
3333 3335
3334 3336 # We assume consumers of addrevisioncb will want to retrieve
3335 3337 # the added revision, which will require a call to
3336 3338 # revision(). revision() will fast path if there is a cache
3337 3339 # hit. So, we tell _addrevision() to always cache in this case.
3338 3340 # We're only using addgroup() in the context of changegroup
3339 3341 # generation so the revision data can always be handled as raw
3340 3342 # by the flagprocessor.
3341 3343 rev = self._addrevision(
3342 3344 node,
3343 3345 None,
3344 3346 transaction,
3345 3347 link,
3346 3348 p1,
3347 3349 p2,
3348 3350 flags,
3349 3351 (baserev, delta, delta_base_reuse_policy),
3350 3352 alwayscache=alwayscache,
3351 3353 deltacomputer=deltacomputer,
3352 3354 sidedata=sidedata,
3353 3355 )
3354 3356
3355 3357 if addrevisioncb:
3356 3358 addrevisioncb(self, rev)
3357 3359 empty = False
3358 3360 finally:
3359 3361 self._adding_group = False
3360 3362 return not empty
3361 3363
3362 3364 def iscensored(self, rev):
3363 3365 """Check if a file revision is censored."""
3364 3366 if not self.feature_config.censorable:
3365 3367 return False
3366 3368
3367 3369 return self.flags(rev) & REVIDX_ISCENSORED
3368 3370
3369 3371 def _peek_iscensored(self, baserev, delta):
3370 3372 """Quickly check if a delta produces a censored revision."""
3371 3373 if not self.feature_config.censorable:
3372 3374 return False
3373 3375
3374 3376 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3375 3377
3376 3378 def getstrippoint(self, minlink):
3377 3379 """find the minimum rev that must be stripped to strip the linkrev
3378 3380
3379 3381 Returns a tuple containing the minimum rev and a set of all revs that
3380 3382 have linkrevs that will be broken by this strip.
3381 3383 """
3382 3384 return storageutil.resolvestripinfo(
3383 3385 minlink,
3384 3386 len(self) - 1,
3385 3387 self.headrevs(),
3386 3388 self.linkrev,
3387 3389 self.parentrevs,
3388 3390 )
3389 3391
3390 3392 def strip(self, minlink, transaction):
3391 3393 """truncate the revlog on the first revision with a linkrev >= minlink
3392 3394
3393 3395 This function is called when we're stripping revision minlink and
3394 3396 its descendants from the repository.
3395 3397
3396 3398 We have to remove all revisions with linkrev >= minlink, because
3397 3399 the equivalent changelog revisions will be renumbered after the
3398 3400 strip.
3399 3401
3400 3402 So we truncate the revlog on the first of these revisions, and
3401 3403 trust that the caller has saved the revisions that shouldn't be
3402 3404 removed and that it'll re-add them after this truncation.
3403 3405 """
3404 3406 if len(self) == 0:
3405 3407 return
3406 3408
3407 3409 rev, _ = self.getstrippoint(minlink)
3408 3410 if rev == len(self):
3409 3411 return
3410 3412
3411 3413 # first truncate the files on disk
3412 3414 data_end = self.start(rev)
3413 3415 if not self._inline:
3414 3416 transaction.add(self._datafile, data_end)
3415 3417 end = rev * self.index.entry_size
3416 3418 else:
3417 3419 end = data_end + (rev * self.index.entry_size)
3418 3420
3419 3421 if self._sidedatafile:
3420 3422 sidedata_end = self.sidedata_cut_off(rev)
3421 3423 transaction.add(self._sidedatafile, sidedata_end)
3422 3424
3423 3425 transaction.add(self._indexfile, end)
3424 3426 if self._docket is not None:
3425 3427 # XXX we could, leverage the docket while stripping. However it is
3426 3428 # not powerfull enough at the time of this comment
3427 3429 self._docket.index_end = end
3428 3430 self._docket.data_end = data_end
3429 3431 self._docket.sidedata_end = sidedata_end
3430 3432 self._docket.write(transaction, stripping=True)
3431 3433
3432 3434 # then reset internal state in memory to forget those revisions
3433 3435 self._revisioncache = None
3434 3436 self._chaininfocache = util.lrucachedict(500)
3435 3437 self._inner._segmentfile.clear_cache()
3436 3438 self._inner._segmentfile_sidedata.clear_cache()
3437 3439
3438 3440 del self.index[rev:-1]
3439 3441
3440 3442 def checksize(self):
3441 3443 """Check size of index and data files
3442 3444
3443 3445 return a (dd, di) tuple.
3444 3446 - dd: extra bytes for the "data" file
3445 3447 - di: extra bytes for the "index" file
3446 3448
3447 3449 A healthy revlog will return (0, 0).
3448 3450 """
3449 3451 expected = 0
3450 3452 if len(self):
3451 3453 expected = max(0, self.end(len(self) - 1))
3452 3454
3453 3455 try:
3454 3456 with self._datafp() as f:
3455 3457 f.seek(0, io.SEEK_END)
3456 3458 actual = f.tell()
3457 3459 dd = actual - expected
3458 3460 except FileNotFoundError:
3459 3461 dd = 0
3460 3462
3461 3463 try:
3462 3464 f = self.opener(self._indexfile)
3463 3465 f.seek(0, io.SEEK_END)
3464 3466 actual = f.tell()
3465 3467 f.close()
3466 3468 s = self.index.entry_size
3467 3469 i = max(0, actual // s)
3468 3470 di = actual - (i * s)
3469 3471 if self._inline:
3470 3472 databytes = 0
3471 3473 for r in self:
3472 3474 databytes += max(0, self.length(r))
3473 3475 dd = 0
3474 3476 di = actual - len(self) * s - databytes
3475 3477 except FileNotFoundError:
3476 3478 di = 0
3477 3479
3478 3480 return (dd, di)
3479 3481
3480 3482 def files(self):
3481 3483 """return list of files that compose this revlog"""
3482 3484 res = [self._indexfile]
3483 3485 if self._docket_file is None:
3484 3486 if not self._inline:
3485 3487 res.append(self._datafile)
3486 3488 else:
3487 3489 res.append(self._docket_file)
3488 3490 res.extend(self._docket.old_index_filepaths(include_empty=False))
3489 3491 if self._docket.data_end:
3490 3492 res.append(self._datafile)
3491 3493 res.extend(self._docket.old_data_filepaths(include_empty=False))
3492 3494 if self._docket.sidedata_end:
3493 3495 res.append(self._sidedatafile)
3494 3496 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3495 3497 return res
3496 3498
3497 3499 def emitrevisions(
3498 3500 self,
3499 3501 nodes,
3500 3502 nodesorder=None,
3501 3503 revisiondata=False,
3502 3504 assumehaveparentrevisions=False,
3503 3505 deltamode=repository.CG_DELTAMODE_STD,
3504 3506 sidedata_helpers=None,
3505 3507 debug_info=None,
3506 3508 ):
3507 3509 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3508 3510 raise error.ProgrammingError(
3509 3511 b'unhandled value for nodesorder: %s' % nodesorder
3510 3512 )
3511 3513
3512 3514 if nodesorder is None and not self.delta_config.general_delta:
3513 3515 nodesorder = b'storage'
3514 3516
3515 3517 if (
3516 3518 not self._storedeltachains
3517 3519 and deltamode != repository.CG_DELTAMODE_PREV
3518 3520 ):
3519 3521 deltamode = repository.CG_DELTAMODE_FULL
3520 3522
3521 3523 return storageutil.emitrevisions(
3522 3524 self,
3523 3525 nodes,
3524 3526 nodesorder,
3525 3527 revlogrevisiondelta,
3526 3528 deltaparentfn=self.deltaparent,
3527 3529 candeltafn=self._candelta,
3528 3530 rawsizefn=self.rawsize,
3529 3531 revdifffn=self.revdiff,
3530 3532 flagsfn=self.flags,
3531 3533 deltamode=deltamode,
3532 3534 revisiondata=revisiondata,
3533 3535 assumehaveparentrevisions=assumehaveparentrevisions,
3534 3536 sidedata_helpers=sidedata_helpers,
3535 3537 debug_info=debug_info,
3536 3538 )
3537 3539
3538 3540 DELTAREUSEALWAYS = b'always'
3539 3541 DELTAREUSESAMEREVS = b'samerevs'
3540 3542 DELTAREUSENEVER = b'never'
3541 3543
3542 3544 DELTAREUSEFULLADD = b'fulladd'
3543 3545
3544 3546 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3545 3547
3546 3548 def clone(
3547 3549 self,
3548 3550 tr,
3549 3551 destrevlog,
3550 3552 addrevisioncb=None,
3551 3553 deltareuse=DELTAREUSESAMEREVS,
3552 3554 forcedeltabothparents=None,
3553 3555 sidedata_helpers=None,
3554 3556 ):
3555 3557 """Copy this revlog to another, possibly with format changes.
3556 3558
3557 3559 The destination revlog will contain the same revisions and nodes.
3558 3560 However, it may not be bit-for-bit identical due to e.g. delta encoding
3559 3561 differences.
3560 3562
3561 3563 The ``deltareuse`` argument control how deltas from the existing revlog
3562 3564 are preserved in the destination revlog. The argument can have the
3563 3565 following values:
3564 3566
3565 3567 DELTAREUSEALWAYS
3566 3568 Deltas will always be reused (if possible), even if the destination
3567 3569 revlog would not select the same revisions for the delta. This is the
3568 3570 fastest mode of operation.
3569 3571 DELTAREUSESAMEREVS
3570 3572 Deltas will be reused if the destination revlog would pick the same
3571 3573 revisions for the delta. This mode strikes a balance between speed
3572 3574 and optimization.
3573 3575 DELTAREUSENEVER
3574 3576 Deltas will never be reused. This is the slowest mode of execution.
3575 3577 This mode can be used to recompute deltas (e.g. if the diff/delta
3576 3578 algorithm changes).
3577 3579 DELTAREUSEFULLADD
3578 3580 Revision will be re-added as if their were new content. This is
3579 3581 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3580 3582 eg: large file detection and handling.
3581 3583
3582 3584 Delta computation can be slow, so the choice of delta reuse policy can
3583 3585 significantly affect run time.
3584 3586
3585 3587 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3586 3588 two extremes. Deltas will be reused if they are appropriate. But if the
3587 3589 delta could choose a better revision, it will do so. This means if you
3588 3590 are converting a non-generaldelta revlog to a generaldelta revlog,
3589 3591 deltas will be recomputed if the delta's parent isn't a parent of the
3590 3592 revision.
3591 3593
3592 3594 In addition to the delta policy, the ``forcedeltabothparents``
3593 3595 argument controls whether to force compute deltas against both parents
3594 3596 for merges. By default, the current default is used.
3595 3597
3596 3598 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3597 3599 `sidedata_helpers`.
3598 3600 """
3599 3601 if deltareuse not in self.DELTAREUSEALL:
3600 3602 raise ValueError(
3601 3603 _(b'value for deltareuse invalid: %s') % deltareuse
3602 3604 )
3603 3605
3604 3606 if len(destrevlog):
3605 3607 raise ValueError(_(b'destination revlog is not empty'))
3606 3608
3607 3609 if getattr(self, 'filteredrevs', None):
3608 3610 raise ValueError(_(b'source revlog has filtered revisions'))
3609 3611 if getattr(destrevlog, 'filteredrevs', None):
3610 3612 raise ValueError(_(b'destination revlog has filtered revisions'))
3611 3613
3612 3614 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3613 3615 # if possible.
3614 3616 old_delta_config = destrevlog.delta_config
3615 3617 destrevlog.delta_config = destrevlog.delta_config.copy()
3616 3618
3617 3619 try:
3618 3620 if deltareuse == self.DELTAREUSEALWAYS:
3619 3621 destrevlog.delta_config.lazy_delta_base = True
3620 3622 destrevlog.delta_config.lazy_delta = True
3621 3623 elif deltareuse == self.DELTAREUSESAMEREVS:
3622 3624 destrevlog.delta_config.lazy_delta_base = False
3623 3625 destrevlog.delta_config.lazy_delta = True
3624 3626 elif deltareuse == self.DELTAREUSENEVER:
3625 3627 destrevlog.delta_config.lazy_delta_base = False
3626 3628 destrevlog.delta_config.lazy_delta = False
3627 3629
3628 3630 delta_both_parents = (
3629 3631 forcedeltabothparents or old_delta_config.delta_both_parents
3630 3632 )
3631 3633 destrevlog.delta_config.delta_both_parents = delta_both_parents
3632 3634
3633 3635 with self.reading(), destrevlog._writing(tr):
3634 3636 self._clone(
3635 3637 tr,
3636 3638 destrevlog,
3637 3639 addrevisioncb,
3638 3640 deltareuse,
3639 3641 forcedeltabothparents,
3640 3642 sidedata_helpers,
3641 3643 )
3642 3644
3643 3645 finally:
3644 3646 destrevlog.delta_config = old_delta_config
3645 3647
3646 3648 def _clone(
3647 3649 self,
3648 3650 tr,
3649 3651 destrevlog,
3650 3652 addrevisioncb,
3651 3653 deltareuse,
3652 3654 forcedeltabothparents,
3653 3655 sidedata_helpers,
3654 3656 ):
3655 3657 """perform the core duty of `revlog.clone` after parameter processing"""
3656 3658 write_debug = None
3657 3659 if self.delta_config.debug_delta:
3658 3660 write_debug = tr._report
3659 3661 deltacomputer = deltautil.deltacomputer(
3660 3662 destrevlog,
3661 3663 write_debug=write_debug,
3662 3664 )
3663 3665 index = self.index
3664 3666 for rev in self:
3665 3667 entry = index[rev]
3666 3668
3667 3669 # Some classes override linkrev to take filtered revs into
3668 3670 # account. Use raw entry from index.
3669 3671 flags = entry[0] & 0xFFFF
3670 3672 linkrev = entry[4]
3671 3673 p1 = index[entry[5]][7]
3672 3674 p2 = index[entry[6]][7]
3673 3675 node = entry[7]
3674 3676
3675 3677 # (Possibly) reuse the delta from the revlog if allowed and
3676 3678 # the revlog chunk is a delta.
3677 3679 cachedelta = None
3678 3680 rawtext = None
3679 3681 if deltareuse == self.DELTAREUSEFULLADD:
3680 3682 text = self._revisiondata(rev)
3681 3683 sidedata = self.sidedata(rev)
3682 3684
3683 3685 if sidedata_helpers is not None:
3684 3686 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3685 3687 self, sidedata_helpers, sidedata, rev
3686 3688 )
3687 3689 flags = flags | new_flags[0] & ~new_flags[1]
3688 3690
3689 3691 destrevlog.addrevision(
3690 3692 text,
3691 3693 tr,
3692 3694 linkrev,
3693 3695 p1,
3694 3696 p2,
3695 3697 cachedelta=cachedelta,
3696 3698 node=node,
3697 3699 flags=flags,
3698 3700 deltacomputer=deltacomputer,
3699 3701 sidedata=sidedata,
3700 3702 )
3701 3703 else:
3702 3704 if destrevlog.delta_config.lazy_delta:
3703 3705 dp = self.deltaparent(rev)
3704 3706 if dp != nullrev:
3705 3707 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3706 3708
3707 3709 sidedata = None
3708 3710 if not cachedelta:
3709 3711 try:
3710 3712 rawtext = self._revisiondata(rev)
3711 3713 except error.CensoredNodeError as censored:
3712 3714 assert flags & REVIDX_ISCENSORED
3713 3715 rawtext = censored.tombstone
3714 3716 sidedata = self.sidedata(rev)
3715 3717 if sidedata is None:
3716 3718 sidedata = self.sidedata(rev)
3717 3719
3718 3720 if sidedata_helpers is not None:
3719 3721 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3720 3722 self, sidedata_helpers, sidedata, rev
3721 3723 )
3722 3724 flags = flags | new_flags[0] & ~new_flags[1]
3723 3725
3724 3726 destrevlog._addrevision(
3725 3727 node,
3726 3728 rawtext,
3727 3729 tr,
3728 3730 linkrev,
3729 3731 p1,
3730 3732 p2,
3731 3733 flags,
3732 3734 cachedelta,
3733 3735 deltacomputer=deltacomputer,
3734 3736 sidedata=sidedata,
3735 3737 )
3736 3738
3737 3739 if addrevisioncb:
3738 3740 addrevisioncb(self, rev, node)
3739 3741
3740 3742 def censorrevision(self, tr, censornode, tombstone=b''):
3741 3743 if self._format_version == REVLOGV0:
3742 3744 raise error.RevlogError(
3743 3745 _(b'cannot censor with version %d revlogs')
3744 3746 % self._format_version
3745 3747 )
3746 3748 elif self._format_version == REVLOGV1:
3747 3749 rewrite.v1_censor(self, tr, censornode, tombstone)
3748 3750 else:
3749 3751 rewrite.v2_censor(self, tr, censornode, tombstone)
3750 3752
3751 3753 def verifyintegrity(self, state):
3752 3754 """Verifies the integrity of the revlog.
3753 3755
3754 3756 Yields ``revlogproblem`` instances describing problems that are
3755 3757 found.
3756 3758 """
3757 3759 dd, di = self.checksize()
3758 3760 if dd:
3759 3761 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3760 3762 if di:
3761 3763 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3762 3764
3763 3765 version = self._format_version
3764 3766
3765 3767 # The verifier tells us what version revlog we should be.
3766 3768 if version != state[b'expectedversion']:
3767 3769 yield revlogproblem(
3768 3770 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3769 3771 % (self.display_id, version, state[b'expectedversion'])
3770 3772 )
3771 3773
3772 3774 state[b'skipread'] = set()
3773 3775 state[b'safe_renamed'] = set()
3774 3776
3775 3777 for rev in self:
3776 3778 node = self.node(rev)
3777 3779
3778 3780 # Verify contents. 4 cases to care about:
3779 3781 #
3780 3782 # common: the most common case
3781 3783 # rename: with a rename
3782 3784 # meta: file content starts with b'\1\n', the metadata
3783 3785 # header defined in filelog.py, but without a rename
3784 3786 # ext: content stored externally
3785 3787 #
3786 3788 # More formally, their differences are shown below:
3787 3789 #
3788 3790 # | common | rename | meta | ext
3789 3791 # -------------------------------------------------------
3790 3792 # flags() | 0 | 0 | 0 | not 0
3791 3793 # renamed() | False | True | False | ?
3792 3794 # rawtext[0:2]=='\1\n'| False | True | True | ?
3793 3795 #
3794 3796 # "rawtext" means the raw text stored in revlog data, which
3795 3797 # could be retrieved by "rawdata(rev)". "text"
3796 3798 # mentioned below is "revision(rev)".
3797 3799 #
3798 3800 # There are 3 different lengths stored physically:
3799 3801 # 1. L1: rawsize, stored in revlog index
3800 3802 # 2. L2: len(rawtext), stored in revlog data
3801 3803 # 3. L3: len(text), stored in revlog data if flags==0, or
3802 3804 # possibly somewhere else if flags!=0
3803 3805 #
3804 3806 # L1 should be equal to L2. L3 could be different from them.
3805 3807 # "text" may or may not affect commit hash depending on flag
3806 3808 # processors (see flagutil.addflagprocessor).
3807 3809 #
3808 3810 # | common | rename | meta | ext
3809 3811 # -------------------------------------------------
3810 3812 # rawsize() | L1 | L1 | L1 | L1
3811 3813 # size() | L1 | L2-LM | L1(*) | L1 (?)
3812 3814 # len(rawtext) | L2 | L2 | L2 | L2
3813 3815 # len(text) | L2 | L2 | L2 | L3
3814 3816 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3815 3817 #
3816 3818 # LM: length of metadata, depending on rawtext
3817 3819 # (*): not ideal, see comment in filelog.size
3818 3820 # (?): could be "- len(meta)" if the resolved content has
3819 3821 # rename metadata
3820 3822 #
3821 3823 # Checks needed to be done:
3822 3824 # 1. length check: L1 == L2, in all cases.
3823 3825 # 2. hash check: depending on flag processor, we may need to
3824 3826 # use either "text" (external), or "rawtext" (in revlog).
3825 3827
3826 3828 try:
3827 3829 skipflags = state.get(b'skipflags', 0)
3828 3830 if skipflags:
3829 3831 skipflags &= self.flags(rev)
3830 3832
3831 3833 _verify_revision(self, skipflags, state, node)
3832 3834
3833 3835 l1 = self.rawsize(rev)
3834 3836 l2 = len(self.rawdata(node))
3835 3837
3836 3838 if l1 != l2:
3837 3839 yield revlogproblem(
3838 3840 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3839 3841 node=node,
3840 3842 )
3841 3843
3842 3844 except error.CensoredNodeError:
3843 3845 if state[b'erroroncensored']:
3844 3846 yield revlogproblem(
3845 3847 error=_(b'censored file data'), node=node
3846 3848 )
3847 3849 state[b'skipread'].add(node)
3848 3850 except Exception as e:
3849 3851 yield revlogproblem(
3850 3852 error=_(b'unpacking %s: %s')
3851 3853 % (short(node), stringutil.forcebytestr(e)),
3852 3854 node=node,
3853 3855 )
3854 3856 state[b'skipread'].add(node)
3855 3857
3856 3858 def storageinfo(
3857 3859 self,
3858 3860 exclusivefiles=False,
3859 3861 sharedfiles=False,
3860 3862 revisionscount=False,
3861 3863 trackedsize=False,
3862 3864 storedsize=False,
3863 3865 ):
3864 3866 d = {}
3865 3867
3866 3868 if exclusivefiles:
3867 3869 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3868 3870 if not self._inline:
3869 3871 d[b'exclusivefiles'].append((self.opener, self._datafile))
3870 3872
3871 3873 if sharedfiles:
3872 3874 d[b'sharedfiles'] = []
3873 3875
3874 3876 if revisionscount:
3875 3877 d[b'revisionscount'] = len(self)
3876 3878
3877 3879 if trackedsize:
3878 3880 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3879 3881
3880 3882 if storedsize:
3881 3883 d[b'storedsize'] = sum(
3882 3884 self.opener.stat(path).st_size for path in self.files()
3883 3885 )
3884 3886
3885 3887 return d
3886 3888
3887 3889 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3888 3890 if not self.feature_config.has_side_data:
3889 3891 return
3890 3892 # revlog formats with sidedata support does not support inline
3891 3893 assert not self._inline
3892 3894 if not helpers[1] and not helpers[2]:
3893 3895 # Nothing to generate or remove
3894 3896 return
3895 3897
3896 3898 new_entries = []
3897 3899 # append the new sidedata
3898 3900 with self._writing(transaction):
3899 3901 ifh, dfh, sdfh = self._inner._writinghandles
3900 3902 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3901 3903
3902 3904 current_offset = sdfh.tell()
3903 3905 for rev in range(startrev, endrev + 1):
3904 3906 entry = self.index[rev]
3905 3907 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3906 3908 store=self,
3907 3909 sidedata_helpers=helpers,
3908 3910 sidedata={},
3909 3911 rev=rev,
3910 3912 )
3911 3913
3912 3914 serialized_sidedata = sidedatautil.serialize_sidedata(
3913 3915 new_sidedata
3914 3916 )
3915 3917
3916 3918 sidedata_compression_mode = COMP_MODE_INLINE
3917 3919 if serialized_sidedata and self.feature_config.has_side_data:
3918 3920 sidedata_compression_mode = COMP_MODE_PLAIN
3919 3921 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3920 3922 if (
3921 3923 h != b'u'
3922 3924 and comp_sidedata[0] != b'\0'
3923 3925 and len(comp_sidedata) < len(serialized_sidedata)
3924 3926 ):
3925 3927 assert not h
3926 3928 if (
3927 3929 comp_sidedata[0]
3928 3930 == self._docket.default_compression_header
3929 3931 ):
3930 3932 sidedata_compression_mode = COMP_MODE_DEFAULT
3931 3933 serialized_sidedata = comp_sidedata
3932 3934 else:
3933 3935 sidedata_compression_mode = COMP_MODE_INLINE
3934 3936 serialized_sidedata = comp_sidedata
3935 3937 if entry[8] != 0 or entry[9] != 0:
3936 3938 # rewriting entries that already have sidedata is not
3937 3939 # supported yet, because it introduces garbage data in the
3938 3940 # revlog.
3939 3941 msg = b"rewriting existing sidedata is not supported yet"
3940 3942 raise error.Abort(msg)
3941 3943
3942 3944 # Apply (potential) flags to add and to remove after running
3943 3945 # the sidedata helpers
3944 3946 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3945 3947 entry_update = (
3946 3948 current_offset,
3947 3949 len(serialized_sidedata),
3948 3950 new_offset_flags,
3949 3951 sidedata_compression_mode,
3950 3952 )
3951 3953
3952 3954 # the sidedata computation might have move the file cursors around
3953 3955 sdfh.seek(current_offset, os.SEEK_SET)
3954 3956 sdfh.write(serialized_sidedata)
3955 3957 new_entries.append(entry_update)
3956 3958 current_offset += len(serialized_sidedata)
3957 3959 self._docket.sidedata_end = sdfh.tell()
3958 3960
3959 3961 # rewrite the new index entries
3960 3962 ifh.seek(startrev * self.index.entry_size)
3961 3963 for i, e in enumerate(new_entries):
3962 3964 rev = startrev + i
3963 3965 self.index.replace_sidedata_info(rev, *e)
3964 3966 packed = self.index.entry_binary(rev)
3965 3967 if rev == 0 and self._docket is None:
3966 3968 header = self._format_flags | self._format_version
3967 3969 header = self.index.pack_header(header)
3968 3970 packed = header + packed
3969 3971 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now