##// END OF EJS Templates
rust-index: renamed `MixedIndex` as `Index`...
Georges Racinet on incendie.racinet.fr -
r52147:f94c1033 default
parent child Browse files
Show More
@@ -1,4062 +1,4062 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import functools
20 20 import io
21 21 import os
22 22 import struct
23 23 import weakref
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if hasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 def parse_index_v1_mixed(data, inline, default_header):
228 def parse_index_v1_rust(data, inline, default_header):
229 229 cache = (0, data) if inline else None
230 return rustrevlog.MixedIndex(data, default_header), cache
230 return rustrevlog.Index(data, default_header), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class _Config:
246 246 def copy(self):
247 247 return self.__class__(**self.__dict__)
248 248
249 249
250 250 @attr.s()
251 251 class FeatureConfig(_Config):
252 252 """Hold configuration values about the available revlog features"""
253 253
254 254 # the default compression engine
255 255 compression_engine = attr.ib(default=b'zlib')
256 256 # compression engines options
257 257 compression_engine_options = attr.ib(default=attr.Factory(dict))
258 258
259 259 # can we use censor on this revlog
260 260 censorable = attr.ib(default=False)
261 261 # does this revlog use the "side data" feature
262 262 has_side_data = attr.ib(default=False)
263 263 # might remove rank configuration once the computation has no impact
264 264 compute_rank = attr.ib(default=False)
265 265 # parent order is supposed to be semantically irrelevant, so we
266 266 # normally resort parents to ensure that the first parent is non-null,
267 267 # if there is a non-null parent at all.
268 268 # filelog abuses the parent order as flag to mark some instances of
269 269 # meta-encoded files, so allow it to disable this behavior.
270 270 canonical_parent_order = attr.ib(default=False)
271 271 # can ellipsis commit be used
272 272 enable_ellipsis = attr.ib(default=False)
273 273
274 274 def copy(self):
275 275 new = super().copy()
276 276 new.compression_engine_options = self.compression_engine_options.copy()
277 277 return new
278 278
279 279
280 280 @attr.s()
281 281 class DataConfig(_Config):
282 282 """Hold configuration value about how the revlog data are read"""
283 283
284 284 # should we try to open the "pending" version of the revlog
285 285 try_pending = attr.ib(default=False)
286 286 # should we try to open the "splitted" version of the revlog
287 287 try_split = attr.ib(default=False)
288 288 # When True, indexfile should be opened with checkambig=True at writing,
289 289 # to avoid file stat ambiguity.
290 290 check_ambig = attr.ib(default=False)
291 291
292 292 # If true, use mmap instead of reading to deal with large index
293 293 mmap_large_index = attr.ib(default=False)
294 294 # how much data is large
295 295 mmap_index_threshold = attr.ib(default=None)
296 296 # How much data to read and cache into the raw revlog data cache.
297 297 chunk_cache_size = attr.ib(default=65536)
298 298
299 299 # The size of the uncompressed cache compared to the largest revision seen.
300 300 uncompressed_cache_factor = attr.ib(default=None)
301 301
302 302 # The number of chunk cached
303 303 uncompressed_cache_count = attr.ib(default=None)
304 304
305 305 # Allow sparse reading of the revlog data
306 306 with_sparse_read = attr.ib(default=False)
307 307 # minimal density of a sparse read chunk
308 308 sr_density_threshold = attr.ib(default=0.50)
309 309 # minimal size of data we skip when performing sparse read
310 310 sr_min_gap_size = attr.ib(default=262144)
311 311
312 312 # are delta encoded against arbitrary bases.
313 313 generaldelta = attr.ib(default=False)
314 314
315 315
316 316 @attr.s()
317 317 class DeltaConfig(_Config):
318 318 """Hold configuration value about how new delta are computed
319 319
320 320 Some attributes are duplicated from DataConfig to help havign each object
321 321 self contained.
322 322 """
323 323
324 324 # can delta be encoded against arbitrary bases.
325 325 general_delta = attr.ib(default=False)
326 326 # Allow sparse writing of the revlog data
327 327 sparse_revlog = attr.ib(default=False)
328 328 # maximum length of a delta chain
329 329 max_chain_len = attr.ib(default=None)
330 330 # Maximum distance between delta chain base start and end
331 331 max_deltachain_span = attr.ib(default=-1)
332 332 # If `upper_bound_comp` is not None, this is the expected maximal gain from
333 333 # compression for the data content.
334 334 upper_bound_comp = attr.ib(default=None)
335 335 # Should we try a delta against both parent
336 336 delta_both_parents = attr.ib(default=True)
337 337 # Test delta base candidate group by chunk of this maximal size.
338 338 candidate_group_chunk_size = attr.ib(default=0)
339 339 # Should we display debug information about delta computation
340 340 debug_delta = attr.ib(default=False)
341 341 # trust incoming delta by default
342 342 lazy_delta = attr.ib(default=True)
343 343 # trust the base of incoming delta by default
344 344 lazy_delta_base = attr.ib(default=False)
345 345
346 346
347 347 class _InnerRevlog:
348 348 """An inner layer of the revlog object
349 349
350 350 That layer exist to be able to delegate some operation to Rust, its
351 351 boundaries are arbitrary and based on what we can delegate to Rust.
352 352 """
353 353
354 354 def __init__(
355 355 self,
356 356 opener,
357 357 index,
358 358 index_file,
359 359 data_file,
360 360 sidedata_file,
361 361 inline,
362 362 data_config,
363 363 delta_config,
364 364 feature_config,
365 365 chunk_cache,
366 366 default_compression_header,
367 367 ):
368 368 self.opener = opener
369 369 self.index = index
370 370
371 371 self.index_file = index_file
372 372 self.data_file = data_file
373 373 self.sidedata_file = sidedata_file
374 374 self.inline = inline
375 375 self.data_config = data_config
376 376 self.delta_config = delta_config
377 377 self.feature_config = feature_config
378 378
379 379 # used during diverted write.
380 380 self._orig_index_file = None
381 381
382 382 self._default_compression_header = default_compression_header
383 383
384 384 # index
385 385
386 386 # 3-tuple of file handles being used for active writing.
387 387 self._writinghandles = None
388 388
389 389 self._segmentfile = randomaccessfile.randomaccessfile(
390 390 self.opener,
391 391 (self.index_file if self.inline else self.data_file),
392 392 self.data_config.chunk_cache_size,
393 393 chunk_cache,
394 394 )
395 395 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
396 396 self.opener,
397 397 self.sidedata_file,
398 398 self.data_config.chunk_cache_size,
399 399 )
400 400
401 401 # revlog header -> revlog compressor
402 402 self._decompressors = {}
403 403 # 3-tuple of (node, rev, text) for a raw revision.
404 404 self._revisioncache = None
405 405
406 406 # cache some uncompressed chunks
407 407 # rev β†’ uncompressed_chunk
408 408 #
409 409 # the max cost is dynamically updated to be proportionnal to the
410 410 # size of revision we actually encounter.
411 411 self._uncompressed_chunk_cache = None
412 412 if self.data_config.uncompressed_cache_factor is not None:
413 413 self._uncompressed_chunk_cache = util.lrucachedict(
414 414 self.data_config.uncompressed_cache_count,
415 415 maxcost=65536, # some arbitrary initial value
416 416 )
417 417
418 418 self._delay_buffer = None
419 419
420 420 def __len__(self):
421 421 return len(self.index)
422 422
423 423 def clear_cache(self):
424 424 assert not self.is_delaying
425 425 self._revisioncache = None
426 426 if self._uncompressed_chunk_cache is not None:
427 427 self._uncompressed_chunk_cache.clear()
428 428 self._segmentfile.clear_cache()
429 429 self._segmentfile_sidedata.clear_cache()
430 430
431 431 @property
432 432 def canonical_index_file(self):
433 433 if self._orig_index_file is not None:
434 434 return self._orig_index_file
435 435 return self.index_file
436 436
437 437 @property
438 438 def is_delaying(self):
439 439 """is the revlog is currently delaying the visibility of written data?
440 440
441 441 The delaying mechanism can be either in-memory or written on disk in a
442 442 side-file."""
443 443 return (self._delay_buffer is not None) or (
444 444 self._orig_index_file is not None
445 445 )
446 446
447 447 # Derived from index values.
448 448
449 449 def start(self, rev):
450 450 """the offset of the data chunk for this revision"""
451 451 return int(self.index[rev][0] >> 16)
452 452
453 453 def length(self, rev):
454 454 """the length of the data chunk for this revision"""
455 455 return self.index[rev][1]
456 456
457 457 def end(self, rev):
458 458 """the end of the data chunk for this revision"""
459 459 return self.start(rev) + self.length(rev)
460 460
461 461 def deltaparent(self, rev):
462 462 """return deltaparent of the given revision"""
463 463 base = self.index[rev][3]
464 464 if base == rev:
465 465 return nullrev
466 466 elif self.delta_config.general_delta:
467 467 return base
468 468 else:
469 469 return rev - 1
470 470
471 471 def issnapshot(self, rev):
472 472 """tells whether rev is a snapshot"""
473 473 if not self.delta_config.sparse_revlog:
474 474 return self.deltaparent(rev) == nullrev
475 475 elif hasattr(self.index, 'issnapshot'):
476 476 # directly assign the method to cache the testing and access
477 477 self.issnapshot = self.index.issnapshot
478 478 return self.issnapshot(rev)
479 479 if rev == nullrev:
480 480 return True
481 481 entry = self.index[rev]
482 482 base = entry[3]
483 483 if base == rev:
484 484 return True
485 485 if base == nullrev:
486 486 return True
487 487 p1 = entry[5]
488 488 while self.length(p1) == 0:
489 489 b = self.deltaparent(p1)
490 490 if b == p1:
491 491 break
492 492 p1 = b
493 493 p2 = entry[6]
494 494 while self.length(p2) == 0:
495 495 b = self.deltaparent(p2)
496 496 if b == p2:
497 497 break
498 498 p2 = b
499 499 if base == p1 or base == p2:
500 500 return False
501 501 return self.issnapshot(base)
502 502
503 503 def _deltachain(self, rev, stoprev=None):
504 504 """Obtain the delta chain for a revision.
505 505
506 506 ``stoprev`` specifies a revision to stop at. If not specified, we
507 507 stop at the base of the chain.
508 508
509 509 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
510 510 revs in ascending order and ``stopped`` is a bool indicating whether
511 511 ``stoprev`` was hit.
512 512 """
513 513 generaldelta = self.delta_config.general_delta
514 514 # Try C implementation.
515 515 try:
516 516 return self.index.deltachain(rev, stoprev, generaldelta)
517 517 except AttributeError:
518 518 pass
519 519
520 520 chain = []
521 521
522 522 # Alias to prevent attribute lookup in tight loop.
523 523 index = self.index
524 524
525 525 iterrev = rev
526 526 e = index[iterrev]
527 527 while iterrev != e[3] and iterrev != stoprev:
528 528 chain.append(iterrev)
529 529 if generaldelta:
530 530 iterrev = e[3]
531 531 else:
532 532 iterrev -= 1
533 533 e = index[iterrev]
534 534
535 535 if iterrev == stoprev:
536 536 stopped = True
537 537 else:
538 538 chain.append(iterrev)
539 539 stopped = False
540 540
541 541 chain.reverse()
542 542 return chain, stopped
543 543
544 544 @util.propertycache
545 545 def _compressor(self):
546 546 engine = util.compengines[self.feature_config.compression_engine]
547 547 return engine.revlogcompressor(
548 548 self.feature_config.compression_engine_options
549 549 )
550 550
551 551 @util.propertycache
552 552 def _decompressor(self):
553 553 """the default decompressor"""
554 554 if self._default_compression_header is None:
555 555 return None
556 556 t = self._default_compression_header
557 557 c = self._get_decompressor(t)
558 558 return c.decompress
559 559
560 560 def _get_decompressor(self, t):
561 561 try:
562 562 compressor = self._decompressors[t]
563 563 except KeyError:
564 564 try:
565 565 engine = util.compengines.forrevlogheader(t)
566 566 compressor = engine.revlogcompressor(
567 567 self.feature_config.compression_engine_options
568 568 )
569 569 self._decompressors[t] = compressor
570 570 except KeyError:
571 571 raise error.RevlogError(
572 572 _(b'unknown compression type %s') % binascii.hexlify(t)
573 573 )
574 574 return compressor
575 575
576 576 def compress(self, data):
577 577 """Generate a possibly-compressed representation of data."""
578 578 if not data:
579 579 return b'', data
580 580
581 581 compressed = self._compressor.compress(data)
582 582
583 583 if compressed:
584 584 # The revlog compressor added the header in the returned data.
585 585 return b'', compressed
586 586
587 587 if data[0:1] == b'\0':
588 588 return b'', data
589 589 return b'u', data
590 590
591 591 def decompress(self, data):
592 592 """Decompress a revlog chunk.
593 593
594 594 The chunk is expected to begin with a header identifying the
595 595 format type so it can be routed to an appropriate decompressor.
596 596 """
597 597 if not data:
598 598 return data
599 599
600 600 # Revlogs are read much more frequently than they are written and many
601 601 # chunks only take microseconds to decompress, so performance is
602 602 # important here.
603 603 #
604 604 # We can make a few assumptions about revlogs:
605 605 #
606 606 # 1) the majority of chunks will be compressed (as opposed to inline
607 607 # raw data).
608 608 # 2) decompressing *any* data will likely by at least 10x slower than
609 609 # returning raw inline data.
610 610 # 3) we want to prioritize common and officially supported compression
611 611 # engines
612 612 #
613 613 # It follows that we want to optimize for "decompress compressed data
614 614 # when encoded with common and officially supported compression engines"
615 615 # case over "raw data" and "data encoded by less common or non-official
616 616 # compression engines." That is why we have the inline lookup first
617 617 # followed by the compengines lookup.
618 618 #
619 619 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
620 620 # compressed chunks. And this matters for changelog and manifest reads.
621 621 t = data[0:1]
622 622
623 623 if t == b'x':
624 624 try:
625 625 return _zlibdecompress(data)
626 626 except zlib.error as e:
627 627 raise error.RevlogError(
628 628 _(b'revlog decompress error: %s')
629 629 % stringutil.forcebytestr(e)
630 630 )
631 631 # '\0' is more common than 'u' so it goes first.
632 632 elif t == b'\0':
633 633 return data
634 634 elif t == b'u':
635 635 return util.buffer(data, 1)
636 636
637 637 compressor = self._get_decompressor(t)
638 638
639 639 return compressor.decompress(data)
640 640
641 641 @contextlib.contextmanager
642 642 def reading(self):
643 643 """Context manager that keeps data and sidedata files open for reading"""
644 644 if len(self.index) == 0:
645 645 yield # nothing to be read
646 646 elif self._delay_buffer is not None and self.inline:
647 647 msg = "revlog with delayed write should not be inline"
648 648 raise error.ProgrammingError(msg)
649 649 else:
650 650 with self._segmentfile.reading():
651 651 with self._segmentfile_sidedata.reading():
652 652 yield
653 653
654 654 @property
655 655 def is_writing(self):
656 656 """True is a writing context is open"""
657 657 return self._writinghandles is not None
658 658
659 659 @property
660 660 def is_open(self):
661 661 """True if any file handle is being held
662 662
663 663 Used for assert and debug in the python code"""
664 664 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
665 665
666 666 @contextlib.contextmanager
667 667 def writing(self, transaction, data_end=None, sidedata_end=None):
668 668 """Open the revlog files for writing
669 669
670 670 Add content to a revlog should be done within such context.
671 671 """
672 672 if self.is_writing:
673 673 yield
674 674 else:
675 675 ifh = dfh = sdfh = None
676 676 try:
677 677 r = len(self.index)
678 678 # opening the data file.
679 679 dsize = 0
680 680 if r:
681 681 dsize = self.end(r - 1)
682 682 dfh = None
683 683 if not self.inline:
684 684 try:
685 685 dfh = self.opener(self.data_file, mode=b"r+")
686 686 if data_end is None:
687 687 dfh.seek(0, os.SEEK_END)
688 688 else:
689 689 dfh.seek(data_end, os.SEEK_SET)
690 690 except FileNotFoundError:
691 691 dfh = self.opener(self.data_file, mode=b"w+")
692 692 transaction.add(self.data_file, dsize)
693 693 if self.sidedata_file is not None:
694 694 assert sidedata_end is not None
695 695 # revlog-v2 does not inline, help Pytype
696 696 assert dfh is not None
697 697 try:
698 698 sdfh = self.opener(self.sidedata_file, mode=b"r+")
699 699 dfh.seek(sidedata_end, os.SEEK_SET)
700 700 except FileNotFoundError:
701 701 sdfh = self.opener(self.sidedata_file, mode=b"w+")
702 702 transaction.add(self.sidedata_file, sidedata_end)
703 703
704 704 # opening the index file.
705 705 isize = r * self.index.entry_size
706 706 ifh = self.__index_write_fp()
707 707 if self.inline:
708 708 transaction.add(self.index_file, dsize + isize)
709 709 else:
710 710 transaction.add(self.index_file, isize)
711 711 # exposing all file handle for writing.
712 712 self._writinghandles = (ifh, dfh, sdfh)
713 713 self._segmentfile.writing_handle = ifh if self.inline else dfh
714 714 self._segmentfile_sidedata.writing_handle = sdfh
715 715 yield
716 716 finally:
717 717 self._writinghandles = None
718 718 self._segmentfile.writing_handle = None
719 719 self._segmentfile_sidedata.writing_handle = None
720 720 if dfh is not None:
721 721 dfh.close()
722 722 if sdfh is not None:
723 723 sdfh.close()
724 724 # closing the index file last to avoid exposing referent to
725 725 # potential unflushed data content.
726 726 if ifh is not None:
727 727 ifh.close()
728 728
729 729 def __index_write_fp(self, index_end=None):
730 730 """internal method to open the index file for writing
731 731
732 732 You should not use this directly and use `_writing` instead
733 733 """
734 734 try:
735 735 if self._delay_buffer is None:
736 736 f = self.opener(
737 737 self.index_file,
738 738 mode=b"r+",
739 739 checkambig=self.data_config.check_ambig,
740 740 )
741 741 else:
742 742 # check_ambig affect we way we open file for writing, however
743 743 # here, we do not actually open a file for writting as write
744 744 # will appened to a delay_buffer. So check_ambig is not
745 745 # meaningful and unneeded here.
746 746 f = randomaccessfile.appender(
747 747 self.opener, self.index_file, b"r+", self._delay_buffer
748 748 )
749 749 if index_end is None:
750 750 f.seek(0, os.SEEK_END)
751 751 else:
752 752 f.seek(index_end, os.SEEK_SET)
753 753 return f
754 754 except FileNotFoundError:
755 755 if self._delay_buffer is None:
756 756 return self.opener(
757 757 self.index_file,
758 758 mode=b"w+",
759 759 checkambig=self.data_config.check_ambig,
760 760 )
761 761 else:
762 762 return randomaccessfile.appender(
763 763 self.opener, self.index_file, b"w+", self._delay_buffer
764 764 )
765 765
766 766 def __index_new_fp(self):
767 767 """internal method to create a new index file for writing
768 768
769 769 You should not use this unless you are upgrading from inline revlog
770 770 """
771 771 return self.opener(
772 772 self.index_file,
773 773 mode=b"w",
774 774 checkambig=self.data_config.check_ambig,
775 775 atomictemp=True,
776 776 )
777 777
778 778 def split_inline(self, tr, header, new_index_file_path=None):
779 779 """split the data of an inline revlog into an index and a data file"""
780 780 assert self._delay_buffer is None
781 781 existing_handles = False
782 782 if self._writinghandles is not None:
783 783 existing_handles = True
784 784 fp = self._writinghandles[0]
785 785 fp.flush()
786 786 fp.close()
787 787 # We can't use the cached file handle after close(). So prevent
788 788 # its usage.
789 789 self._writinghandles = None
790 790 self._segmentfile.writing_handle = None
791 791 # No need to deal with sidedata writing handle as it is only
792 792 # relevant with revlog-v2 which is never inline, not reaching
793 793 # this code
794 794
795 795 new_dfh = self.opener(self.data_file, mode=b"w+")
796 796 new_dfh.truncate(0) # drop any potentially existing data
797 797 try:
798 798 with self.reading():
799 799 for r in range(len(self.index)):
800 800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
801 801 new_dfh.flush()
802 802
803 803 if new_index_file_path is not None:
804 804 self.index_file = new_index_file_path
805 805 with self.__index_new_fp() as fp:
806 806 self.inline = False
807 807 for i in range(len(self.index)):
808 808 e = self.index.entry_binary(i)
809 809 if i == 0:
810 810 packed_header = self.index.pack_header(header)
811 811 e = packed_header + e
812 812 fp.write(e)
813 813
814 814 # If we don't use side-write, the temp file replace the real
815 815 # index when we exit the context manager
816 816
817 817 self._segmentfile = randomaccessfile.randomaccessfile(
818 818 self.opener,
819 819 self.data_file,
820 820 self.data_config.chunk_cache_size,
821 821 )
822 822
823 823 if existing_handles:
824 824 # switched from inline to conventional reopen the index
825 825 ifh = self.__index_write_fp()
826 826 self._writinghandles = (ifh, new_dfh, None)
827 827 self._segmentfile.writing_handle = new_dfh
828 828 new_dfh = None
829 829 # No need to deal with sidedata writing handle as it is only
830 830 # relevant with revlog-v2 which is never inline, not reaching
831 831 # this code
832 832 finally:
833 833 if new_dfh is not None:
834 834 new_dfh.close()
835 835 return self.index_file
836 836
837 837 def get_segment_for_revs(self, startrev, endrev):
838 838 """Obtain a segment of raw data corresponding to a range of revisions.
839 839
840 840 Accepts the start and end revisions and an optional already-open
841 841 file handle to be used for reading. If the file handle is read, its
842 842 seek position will not be preserved.
843 843
844 844 Requests for data may be satisfied by a cache.
845 845
846 846 Returns a 2-tuple of (offset, data) for the requested range of
847 847 revisions. Offset is the integer offset from the beginning of the
848 848 revlog and data is a str or buffer of the raw byte data.
849 849
850 850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
851 851 to determine where each revision's data begins and ends.
852 852
853 853 API: we should consider making this a private part of the InnerRevlog
854 854 at some point.
855 855 """
856 856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
857 857 # (functions are expensive).
858 858 index = self.index
859 859 istart = index[startrev]
860 860 start = int(istart[0] >> 16)
861 861 if startrev == endrev:
862 862 end = start + istart[1]
863 863 else:
864 864 iend = index[endrev]
865 865 end = int(iend[0] >> 16) + iend[1]
866 866
867 867 if self.inline:
868 868 start += (startrev + 1) * self.index.entry_size
869 869 end += (endrev + 1) * self.index.entry_size
870 870 length = end - start
871 871
872 872 return start, self._segmentfile.read_chunk(start, length)
873 873
874 874 def _chunk(self, rev):
875 875 """Obtain a single decompressed chunk for a revision.
876 876
877 877 Accepts an integer revision and an optional already-open file handle
878 878 to be used for reading. If used, the seek position of the file will not
879 879 be preserved.
880 880
881 881 Returns a str holding uncompressed data for the requested revision.
882 882 """
883 883 if self._uncompressed_chunk_cache is not None:
884 884 uncomp = self._uncompressed_chunk_cache.get(rev)
885 885 if uncomp is not None:
886 886 return uncomp
887 887
888 888 compression_mode = self.index[rev][10]
889 889 data = self.get_segment_for_revs(rev, rev)[1]
890 890 if compression_mode == COMP_MODE_PLAIN:
891 891 uncomp = data
892 892 elif compression_mode == COMP_MODE_DEFAULT:
893 893 uncomp = self._decompressor(data)
894 894 elif compression_mode == COMP_MODE_INLINE:
895 895 uncomp = self.decompress(data)
896 896 else:
897 897 msg = b'unknown compression mode %d'
898 898 msg %= compression_mode
899 899 raise error.RevlogError(msg)
900 900 if self._uncompressed_chunk_cache is not None:
901 901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
902 902 return uncomp
903 903
904 904 def _chunks(self, revs, targetsize=None):
905 905 """Obtain decompressed chunks for the specified revisions.
906 906
907 907 Accepts an iterable of numeric revisions that are assumed to be in
908 908 ascending order. Also accepts an optional already-open file handle
909 909 to be used for reading. If used, the seek position of the file will
910 910 not be preserved.
911 911
912 912 This function is similar to calling ``self._chunk()`` multiple times,
913 913 but is faster.
914 914
915 915 Returns a list with decompressed data for each requested revision.
916 916 """
917 917 if not revs:
918 918 return []
919 919 start = self.start
920 920 length = self.length
921 921 inline = self.inline
922 922 iosize = self.index.entry_size
923 923 buffer = util.buffer
924 924
925 925 fetched_revs = []
926 926 fadd = fetched_revs.append
927 927
928 928 chunks = []
929 929 ladd = chunks.append
930 930
931 931 if self._uncompressed_chunk_cache is None:
932 932 fetched_revs = revs
933 933 else:
934 934 for rev in revs:
935 935 cached_value = self._uncompressed_chunk_cache.get(rev)
936 936 if cached_value is None:
937 937 fadd(rev)
938 938 else:
939 939 ladd((rev, cached_value))
940 940
941 941 if not fetched_revs:
942 942 slicedchunks = ()
943 943 elif not self.data_config.with_sparse_read:
944 944 slicedchunks = (fetched_revs,)
945 945 else:
946 946 slicedchunks = deltautil.slicechunk(
947 947 self,
948 948 fetched_revs,
949 949 targetsize=targetsize,
950 950 )
951 951
952 952 for revschunk in slicedchunks:
953 953 firstrev = revschunk[0]
954 954 # Skip trailing revisions with empty diff
955 955 for lastrev in revschunk[::-1]:
956 956 if length(lastrev) != 0:
957 957 break
958 958
959 959 try:
960 960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
961 961 except OverflowError:
962 962 # issue4215 - we can't cache a run of chunks greater than
963 963 # 2G on Windows
964 964 for rev in revschunk:
965 965 ladd((rev, self._chunk(rev)))
966 966
967 967 decomp = self.decompress
968 968 # self._decompressor might be None, but will not be used in that case
969 969 def_decomp = self._decompressor
970 970 for rev in revschunk:
971 971 chunkstart = start(rev)
972 972 if inline:
973 973 chunkstart += (rev + 1) * iosize
974 974 chunklength = length(rev)
975 975 comp_mode = self.index[rev][10]
976 976 c = buffer(data, chunkstart - offset, chunklength)
977 977 if comp_mode == COMP_MODE_PLAIN:
978 978 c = c
979 979 elif comp_mode == COMP_MODE_INLINE:
980 980 c = decomp(c)
981 981 elif comp_mode == COMP_MODE_DEFAULT:
982 982 c = def_decomp(c)
983 983 else:
984 984 msg = b'unknown compression mode %d'
985 985 msg %= comp_mode
986 986 raise error.RevlogError(msg)
987 987 ladd((rev, c))
988 988 if self._uncompressed_chunk_cache is not None:
989 989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
990 990
991 991 chunks.sort()
992 992 return [x[1] for x in chunks]
993 993
994 994 def raw_text(self, node, rev):
995 995 """return the possibly unvalidated rawtext for a revision
996 996
997 997 returns (rev, rawtext, validated)
998 998 """
999 999
1000 1000 # revision in the cache (could be useful to apply delta)
1001 1001 cachedrev = None
1002 1002 # An intermediate text to apply deltas to
1003 1003 basetext = None
1004 1004
1005 1005 # Check if we have the entry in cache
1006 1006 # The cache entry looks like (node, rev, rawtext)
1007 1007 if self._revisioncache:
1008 1008 cachedrev = self._revisioncache[1]
1009 1009
1010 1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1011 1011 if stopped:
1012 1012 basetext = self._revisioncache[2]
1013 1013
1014 1014 # drop cache to save memory, the caller is expected to
1015 1015 # update self._inner._revisioncache after validating the text
1016 1016 self._revisioncache = None
1017 1017
1018 1018 targetsize = None
1019 1019 rawsize = self.index[rev][2]
1020 1020 if 0 <= rawsize:
1021 1021 targetsize = 4 * rawsize
1022 1022
1023 1023 if self._uncompressed_chunk_cache is not None:
1024 1024 # dynamically update the uncompressed_chunk_cache size to the
1025 1025 # largest revision we saw in this revlog.
1026 1026 factor = self.data_config.uncompressed_cache_factor
1027 1027 candidate_size = rawsize * factor
1028 1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1029 1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1030 1030
1031 1031 bins = self._chunks(chain, targetsize=targetsize)
1032 1032 if basetext is None:
1033 1033 basetext = bytes(bins[0])
1034 1034 bins = bins[1:]
1035 1035
1036 1036 rawtext = mdiff.patches(basetext, bins)
1037 1037 del basetext # let us have a chance to free memory early
1038 1038 return (rev, rawtext, False)
1039 1039
1040 1040 def sidedata(self, rev, sidedata_end):
1041 1041 """Return the sidedata for a given revision number."""
1042 1042 index_entry = self.index[rev]
1043 1043 sidedata_offset = index_entry[8]
1044 1044 sidedata_size = index_entry[9]
1045 1045
1046 1046 if self.inline:
1047 1047 sidedata_offset += self.index.entry_size * (1 + rev)
1048 1048 if sidedata_size == 0:
1049 1049 return {}
1050 1050
1051 1051 if sidedata_end < sidedata_offset + sidedata_size:
1052 1052 filename = self.sidedata_file
1053 1053 end = sidedata_end
1054 1054 offset = sidedata_offset
1055 1055 length = sidedata_size
1056 1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1057 1057 raise error.RevlogError(m)
1058 1058
1059 1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1060 1060 sidedata_offset, sidedata_size
1061 1061 )
1062 1062
1063 1063 comp = self.index[rev][11]
1064 1064 if comp == COMP_MODE_PLAIN:
1065 1065 segment = comp_segment
1066 1066 elif comp == COMP_MODE_DEFAULT:
1067 1067 segment = self._decompressor(comp_segment)
1068 1068 elif comp == COMP_MODE_INLINE:
1069 1069 segment = self.decompress(comp_segment)
1070 1070 else:
1071 1071 msg = b'unknown compression mode %d'
1072 1072 msg %= comp
1073 1073 raise error.RevlogError(msg)
1074 1074
1075 1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1076 1076 return sidedata
1077 1077
1078 1078 def write_entry(
1079 1079 self,
1080 1080 transaction,
1081 1081 entry,
1082 1082 data,
1083 1083 link,
1084 1084 offset,
1085 1085 sidedata,
1086 1086 sidedata_offset,
1087 1087 index_end,
1088 1088 data_end,
1089 1089 sidedata_end,
1090 1090 ):
1091 1091 # Files opened in a+ mode have inconsistent behavior on various
1092 1092 # platforms. Windows requires that a file positioning call be made
1093 1093 # when the file handle transitions between reads and writes. See
1094 1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1095 1095 # platforms, Python or the platform itself can be buggy. Some versions
1096 1096 # of Solaris have been observed to not append at the end of the file
1097 1097 # if the file was seeked to before the end. See issue4943 for more.
1098 1098 #
1099 1099 # We work around this issue by inserting a seek() before writing.
1100 1100 # Note: This is likely not necessary on Python 3. However, because
1101 1101 # the file handle is reused for reads and may be seeked there, we need
1102 1102 # to be careful before changing this.
1103 1103 if self._writinghandles is None:
1104 1104 msg = b'adding revision outside `revlog._writing` context'
1105 1105 raise error.ProgrammingError(msg)
1106 1106 ifh, dfh, sdfh = self._writinghandles
1107 1107 if index_end is None:
1108 1108 ifh.seek(0, os.SEEK_END)
1109 1109 else:
1110 1110 ifh.seek(index_end, os.SEEK_SET)
1111 1111 if dfh:
1112 1112 if data_end is None:
1113 1113 dfh.seek(0, os.SEEK_END)
1114 1114 else:
1115 1115 dfh.seek(data_end, os.SEEK_SET)
1116 1116 if sdfh:
1117 1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1118 1118
1119 1119 curr = len(self.index) - 1
1120 1120 if not self.inline:
1121 1121 transaction.add(self.data_file, offset)
1122 1122 if self.sidedata_file:
1123 1123 transaction.add(self.sidedata_file, sidedata_offset)
1124 1124 transaction.add(self.canonical_index_file, curr * len(entry))
1125 1125 if data[0]:
1126 1126 dfh.write(data[0])
1127 1127 dfh.write(data[1])
1128 1128 if sidedata:
1129 1129 sdfh.write(sidedata)
1130 1130 if self._delay_buffer is None:
1131 1131 ifh.write(entry)
1132 1132 else:
1133 1133 self._delay_buffer.append(entry)
1134 1134 elif self._delay_buffer is not None:
1135 1135 msg = b'invalid delayed write on inline revlog'
1136 1136 raise error.ProgrammingError(msg)
1137 1137 else:
1138 1138 offset += curr * self.index.entry_size
1139 1139 transaction.add(self.canonical_index_file, offset)
1140 1140 assert not sidedata
1141 1141 ifh.write(entry)
1142 1142 ifh.write(data[0])
1143 1143 ifh.write(data[1])
1144 1144 return (
1145 1145 ifh.tell(),
1146 1146 dfh.tell() if dfh else None,
1147 1147 sdfh.tell() if sdfh else None,
1148 1148 )
1149 1149
1150 1150 def _divert_index(self):
1151 1151 return self.index_file + b'.a'
1152 1152
1153 1153 def delay(self):
1154 1154 assert not self.is_open
1155 1155 if self.inline:
1156 1156 msg = "revlog with delayed write should not be inline"
1157 1157 raise error.ProgrammingError(msg)
1158 1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1159 1159 # delay or divert already in place
1160 1160 return None
1161 1161 elif len(self.index) == 0:
1162 1162 self._orig_index_file = self.index_file
1163 1163 self.index_file = self._divert_index()
1164 1164 assert self._orig_index_file is not None
1165 1165 assert self.index_file is not None
1166 1166 if self.opener.exists(self.index_file):
1167 1167 self.opener.unlink(self.index_file)
1168 1168 return self.index_file
1169 1169 else:
1170 1170 self._delay_buffer = []
1171 1171 return None
1172 1172
1173 1173 def write_pending(self):
1174 1174 assert not self.is_open
1175 1175 if self.inline:
1176 1176 msg = "revlog with delayed write should not be inline"
1177 1177 raise error.ProgrammingError(msg)
1178 1178 if self._orig_index_file is not None:
1179 1179 return None, True
1180 1180 any_pending = False
1181 1181 pending_index_file = self._divert_index()
1182 1182 if self.opener.exists(pending_index_file):
1183 1183 self.opener.unlink(pending_index_file)
1184 1184 util.copyfile(
1185 1185 self.opener.join(self.index_file),
1186 1186 self.opener.join(pending_index_file),
1187 1187 )
1188 1188 if self._delay_buffer:
1189 1189 with self.opener(pending_index_file, b'r+') as ifh:
1190 1190 ifh.seek(0, os.SEEK_END)
1191 1191 ifh.write(b"".join(self._delay_buffer))
1192 1192 any_pending = True
1193 1193 self._delay_buffer = None
1194 1194 self._orig_index_file = self.index_file
1195 1195 self.index_file = pending_index_file
1196 1196 return self.index_file, any_pending
1197 1197
1198 1198 def finalize_pending(self):
1199 1199 assert not self.is_open
1200 1200 if self.inline:
1201 1201 msg = "revlog with delayed write should not be inline"
1202 1202 raise error.ProgrammingError(msg)
1203 1203
1204 1204 delay = self._delay_buffer is not None
1205 1205 divert = self._orig_index_file is not None
1206 1206
1207 1207 if delay and divert:
1208 1208 assert False, "unreachable"
1209 1209 elif delay:
1210 1210 if self._delay_buffer:
1211 1211 with self.opener(self.index_file, b'r+') as ifh:
1212 1212 ifh.seek(0, os.SEEK_END)
1213 1213 ifh.write(b"".join(self._delay_buffer))
1214 1214 self._delay_buffer = None
1215 1215 elif divert:
1216 1216 if self.opener.exists(self.index_file):
1217 1217 self.opener.rename(
1218 1218 self.index_file,
1219 1219 self._orig_index_file,
1220 1220 checkambig=True,
1221 1221 )
1222 1222 self.index_file = self._orig_index_file
1223 1223 self._orig_index_file = None
1224 1224 else:
1225 1225 msg = b"not delay or divert found on this revlog"
1226 1226 raise error.ProgrammingError(msg)
1227 1227 return self.canonical_index_file
1228 1228
1229 1229
1230 1230 class revlog:
1231 1231 """
1232 1232 the underlying revision storage object
1233 1233
1234 1234 A revlog consists of two parts, an index and the revision data.
1235 1235
1236 1236 The index is a file with a fixed record size containing
1237 1237 information on each revision, including its nodeid (hash), the
1238 1238 nodeids of its parents, the position and offset of its data within
1239 1239 the data file, and the revision it's based on. Finally, each entry
1240 1240 contains a linkrev entry that can serve as a pointer to external
1241 1241 data.
1242 1242
1243 1243 The revision data itself is a linear collection of data chunks.
1244 1244 Each chunk represents a revision and is usually represented as a
1245 1245 delta against the previous chunk. To bound lookup time, runs of
1246 1246 deltas are limited to about 2 times the length of the original
1247 1247 version data. This makes retrieval of a version proportional to
1248 1248 its size, or O(1) relative to the number of revisions.
1249 1249
1250 1250 Both pieces of the revlog are written to in an append-only
1251 1251 fashion, which means we never need to rewrite a file to insert or
1252 1252 remove data, and can use some simple techniques to avoid the need
1253 1253 for locking while reading.
1254 1254
1255 1255 If checkambig, indexfile is opened with checkambig=True at
1256 1256 writing, to avoid file stat ambiguity.
1257 1257
1258 1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1259 1259 index will be mmapped rather than read if it is larger than the
1260 1260 configured threshold.
1261 1261
1262 1262 If censorable is True, the revlog can have censored revisions.
1263 1263
1264 1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1265 1265 compression for the data content.
1266 1266
1267 1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1268 1268 file handle, a filename, and an expected position. It should check whether
1269 1269 the current position in the file handle is valid, and log/warn/fail (by
1270 1270 raising).
1271 1271
1272 1272 See mercurial/revlogutils/contants.py for details about the content of an
1273 1273 index entry.
1274 1274 """
1275 1275
1276 1276 _flagserrorclass = error.RevlogError
1277 1277
1278 1278 @staticmethod
1279 1279 def is_inline_index(header_bytes):
1280 1280 """Determine if a revlog is inline from the initial bytes of the index"""
1281 1281 if len(header_bytes) == 0:
1282 1282 return True
1283 1283
1284 1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1285 1285
1286 1286 _format_flags = header & ~0xFFFF
1287 1287 _format_version = header & 0xFFFF
1288 1288
1289 1289 features = FEATURES_BY_VERSION[_format_version]
1290 1290 return features[b'inline'](_format_flags)
1291 1291
1292 1292 def __init__(
1293 1293 self,
1294 1294 opener,
1295 1295 target,
1296 1296 radix,
1297 1297 postfix=None, # only exist for `tmpcensored` now
1298 1298 checkambig=False,
1299 1299 mmaplargeindex=False,
1300 1300 censorable=False,
1301 1301 upperboundcomp=None,
1302 1302 persistentnodemap=False,
1303 1303 concurrencychecker=None,
1304 1304 trypending=False,
1305 1305 try_split=False,
1306 1306 canonical_parent_order=True,
1307 1307 data_config=None,
1308 1308 delta_config=None,
1309 1309 feature_config=None,
1310 1310 may_inline=True, # may inline new revlog
1311 1311 ):
1312 1312 """
1313 1313 create a revlog object
1314 1314
1315 1315 opener is a function that abstracts the file opening operation
1316 1316 and can be used to implement COW semantics or the like.
1317 1317
1318 1318 `target`: a (KIND, ID) tuple that identify the content stored in
1319 1319 this revlog. It help the rest of the code to understand what the revlog
1320 1320 is about without having to resort to heuristic and index filename
1321 1321 analysis. Note: that this must be reliably be set by normal code, but
1322 1322 that test, debug, or performance measurement code might not set this to
1323 1323 accurate value.
1324 1324 """
1325 1325
1326 1326 self.radix = radix
1327 1327
1328 1328 self._docket_file = None
1329 1329 self._indexfile = None
1330 1330 self._datafile = None
1331 1331 self._sidedatafile = None
1332 1332 self._nodemap_file = None
1333 1333 self.postfix = postfix
1334 1334 self._trypending = trypending
1335 1335 self._try_split = try_split
1336 1336 self._may_inline = may_inline
1337 1337 self.opener = opener
1338 1338 if persistentnodemap:
1339 1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1340 1340
1341 1341 assert target[0] in ALL_KINDS
1342 1342 assert len(target) == 2
1343 1343 self.target = target
1344 1344 if feature_config is not None:
1345 1345 self.feature_config = feature_config.copy()
1346 1346 elif b'feature-config' in self.opener.options:
1347 1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1348 1348 else:
1349 1349 self.feature_config = FeatureConfig()
1350 1350 self.feature_config.censorable = censorable
1351 1351 self.feature_config.canonical_parent_order = canonical_parent_order
1352 1352 if data_config is not None:
1353 1353 self.data_config = data_config.copy()
1354 1354 elif b'data-config' in self.opener.options:
1355 1355 self.data_config = self.opener.options[b'data-config'].copy()
1356 1356 else:
1357 1357 self.data_config = DataConfig()
1358 1358 self.data_config.check_ambig = checkambig
1359 1359 self.data_config.mmap_large_index = mmaplargeindex
1360 1360 if delta_config is not None:
1361 1361 self.delta_config = delta_config.copy()
1362 1362 elif b'delta-config' in self.opener.options:
1363 1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1364 1364 else:
1365 1365 self.delta_config = DeltaConfig()
1366 1366 self.delta_config.upper_bound_comp = upperboundcomp
1367 1367
1368 1368 # Maps rev to chain base rev.
1369 1369 self._chainbasecache = util.lrucachedict(100)
1370 1370
1371 1371 self.index = None
1372 1372 self._docket = None
1373 1373 self._nodemap_docket = None
1374 1374 # Mapping of partial identifiers to full nodes.
1375 1375 self._pcache = {}
1376 1376
1377 1377 # other optionnals features
1378 1378
1379 1379 # Make copy of flag processors so each revlog instance can support
1380 1380 # custom flags.
1381 1381 self._flagprocessors = dict(flagutil.flagprocessors)
1382 1382 # prevent nesting of addgroup
1383 1383 self._adding_group = None
1384 1384
1385 1385 chunk_cache = self._loadindex()
1386 1386 self._load_inner(chunk_cache)
1387 1387 self._concurrencychecker = concurrencychecker
1388 1388
1389 1389 def _init_opts(self):
1390 1390 """process options (from above/config) to setup associated default revlog mode
1391 1391
1392 1392 These values might be affected when actually reading on disk information.
1393 1393
1394 1394 The relevant values are returned for use in _loadindex().
1395 1395
1396 1396 * newversionflags:
1397 1397 version header to use if we need to create a new revlog
1398 1398
1399 1399 * mmapindexthreshold:
1400 1400 minimal index size for start to use mmap
1401 1401
1402 1402 * force_nodemap:
1403 1403 force the usage of a "development" version of the nodemap code
1404 1404 """
1405 1405 opts = self.opener.options
1406 1406
1407 1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1408 1408 new_header = CHANGELOGV2
1409 1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1410 1410 self.feature_config.compute_rank = compute_rank
1411 1411 elif b'revlogv2' in opts:
1412 1412 new_header = REVLOGV2
1413 1413 elif b'revlogv1' in opts:
1414 1414 new_header = REVLOGV1
1415 1415 if self._may_inline:
1416 1416 new_header |= FLAG_INLINE_DATA
1417 1417 if b'generaldelta' in opts:
1418 1418 new_header |= FLAG_GENERALDELTA
1419 1419 elif b'revlogv0' in self.opener.options:
1420 1420 new_header = REVLOGV0
1421 1421 else:
1422 1422 new_header = REVLOG_DEFAULT_VERSION
1423 1423
1424 1424 mmapindexthreshold = None
1425 1425 if self.data_config.mmap_large_index:
1426 1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1427 1427 if self.feature_config.enable_ellipsis:
1428 1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1429 1429
1430 1430 # revlog v0 doesn't have flag processors
1431 1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1432 1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1433 1433
1434 1434 chunk_cache_size = self.data_config.chunk_cache_size
1435 1435 if chunk_cache_size <= 0:
1436 1436 raise error.RevlogError(
1437 1437 _(b'revlog chunk cache size %r is not greater than 0')
1438 1438 % chunk_cache_size
1439 1439 )
1440 1440 elif chunk_cache_size & (chunk_cache_size - 1):
1441 1441 raise error.RevlogError(
1442 1442 _(b'revlog chunk cache size %r is not a power of 2')
1443 1443 % chunk_cache_size
1444 1444 )
1445 1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1446 1446 return new_header, mmapindexthreshold, force_nodemap
1447 1447
1448 1448 def _get_data(self, filepath, mmap_threshold, size=None):
1449 1449 """return a file content with or without mmap
1450 1450
1451 1451 If the file is missing return the empty string"""
1452 1452 try:
1453 1453 with self.opener(filepath) as fp:
1454 1454 if mmap_threshold is not None:
1455 1455 file_size = self.opener.fstat(fp).st_size
1456 1456 if file_size >= mmap_threshold:
1457 1457 if size is not None:
1458 1458 # avoid potentiel mmap crash
1459 1459 size = min(file_size, size)
1460 1460 # TODO: should .close() to release resources without
1461 1461 # relying on Python GC
1462 1462 if size is None:
1463 1463 return util.buffer(util.mmapread(fp))
1464 1464 else:
1465 1465 return util.buffer(util.mmapread(fp, size))
1466 1466 if size is None:
1467 1467 return fp.read()
1468 1468 else:
1469 1469 return fp.read(size)
1470 1470 except FileNotFoundError:
1471 1471 return b''
1472 1472
1473 1473 def get_streams(self, max_linkrev, force_inline=False):
1474 1474 """return a list of streams that represent this revlog
1475 1475
1476 1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1477 1477
1478 1478 This streams data for all revisions that refer to a changelog revision up
1479 1479 to `max_linkrev`.
1480 1480
1481 1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1482 1482
1483 1483 It returns is a list of three-tuple:
1484 1484
1485 1485 [
1486 1486 (filename, bytes_stream, stream_size),
1487 1487 …
1488 1488 ]
1489 1489 """
1490 1490 n = len(self)
1491 1491 index = self.index
1492 1492 while n > 0:
1493 1493 linkrev = index[n - 1][4]
1494 1494 if linkrev < max_linkrev:
1495 1495 break
1496 1496 # note: this loop will rarely go through multiple iterations, since
1497 1497 # it only traverses commits created during the current streaming
1498 1498 # pull operation.
1499 1499 #
1500 1500 # If this become a problem, using a binary search should cap the
1501 1501 # runtime of this.
1502 1502 n = n - 1
1503 1503 if n == 0:
1504 1504 # no data to send
1505 1505 return []
1506 1506 index_size = n * index.entry_size
1507 1507 data_size = self.end(n - 1)
1508 1508
1509 1509 # XXX we might have been split (or stripped) since the object
1510 1510 # initialization, We need to close this race too, but having a way to
1511 1511 # pre-open the file we feed to the revlog and never closing them before
1512 1512 # we are done streaming.
1513 1513
1514 1514 if self._inline:
1515 1515
1516 1516 def get_stream():
1517 1517 with self.opener(self._indexfile, mode=b"r") as fp:
1518 1518 yield None
1519 1519 size = index_size + data_size
1520 1520 if size <= 65536:
1521 1521 yield fp.read(size)
1522 1522 else:
1523 1523 yield from util.filechunkiter(fp, limit=size)
1524 1524
1525 1525 inline_stream = get_stream()
1526 1526 next(inline_stream)
1527 1527 return [
1528 1528 (self._indexfile, inline_stream, index_size + data_size),
1529 1529 ]
1530 1530 elif force_inline:
1531 1531
1532 1532 def get_stream():
1533 1533 with self.reading():
1534 1534 yield None
1535 1535
1536 1536 for rev in range(n):
1537 1537 idx = self.index.entry_binary(rev)
1538 1538 if rev == 0 and self._docket is None:
1539 1539 # re-inject the inline flag
1540 1540 header = self._format_flags
1541 1541 header |= self._format_version
1542 1542 header |= FLAG_INLINE_DATA
1543 1543 header = self.index.pack_header(header)
1544 1544 idx = header + idx
1545 1545 yield idx
1546 1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1547 1547
1548 1548 inline_stream = get_stream()
1549 1549 next(inline_stream)
1550 1550 return [
1551 1551 (self._indexfile, inline_stream, index_size + data_size),
1552 1552 ]
1553 1553 else:
1554 1554
1555 1555 def get_index_stream():
1556 1556 with self.opener(self._indexfile, mode=b"r") as fp:
1557 1557 yield None
1558 1558 if index_size <= 65536:
1559 1559 yield fp.read(index_size)
1560 1560 else:
1561 1561 yield from util.filechunkiter(fp, limit=index_size)
1562 1562
1563 1563 def get_data_stream():
1564 1564 with self._datafp() as fp:
1565 1565 yield None
1566 1566 if data_size <= 65536:
1567 1567 yield fp.read(data_size)
1568 1568 else:
1569 1569 yield from util.filechunkiter(fp, limit=data_size)
1570 1570
1571 1571 index_stream = get_index_stream()
1572 1572 next(index_stream)
1573 1573 data_stream = get_data_stream()
1574 1574 next(data_stream)
1575 1575 return [
1576 1576 (self._datafile, data_stream, data_size),
1577 1577 (self._indexfile, index_stream, index_size),
1578 1578 ]
1579 1579
1580 1580 def _loadindex(self, docket=None):
1581 1581
1582 1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1583 1583
1584 1584 if self.postfix is not None:
1585 1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1586 1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1587 1587 entry_point = b'%s.i.a' % self.radix
1588 1588 elif self._try_split and self.opener.exists(self._split_index_file):
1589 1589 entry_point = self._split_index_file
1590 1590 else:
1591 1591 entry_point = b'%s.i' % self.radix
1592 1592
1593 1593 if docket is not None:
1594 1594 self._docket = docket
1595 1595 self._docket_file = entry_point
1596 1596 else:
1597 1597 self._initempty = True
1598 1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1599 1599 if len(entry_data) > 0:
1600 1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1601 1601 self._initempty = False
1602 1602 else:
1603 1603 header = new_header
1604 1604
1605 1605 self._format_flags = header & ~0xFFFF
1606 1606 self._format_version = header & 0xFFFF
1607 1607
1608 1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1609 1609 if supported_flags is None:
1610 1610 msg = _(b'unknown version (%d) in revlog %s')
1611 1611 msg %= (self._format_version, self.display_id)
1612 1612 raise error.RevlogError(msg)
1613 1613 elif self._format_flags & ~supported_flags:
1614 1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1615 1615 display_flag = self._format_flags >> 16
1616 1616 msg %= (display_flag, self._format_version, self.display_id)
1617 1617 raise error.RevlogError(msg)
1618 1618
1619 1619 features = FEATURES_BY_VERSION[self._format_version]
1620 1620 self._inline = features[b'inline'](self._format_flags)
1621 1621 self.delta_config.general_delta = features[b'generaldelta'](
1622 1622 self._format_flags
1623 1623 )
1624 1624 self.feature_config.has_side_data = features[b'sidedata']
1625 1625
1626 1626 if not features[b'docket']:
1627 1627 self._indexfile = entry_point
1628 1628 index_data = entry_data
1629 1629 else:
1630 1630 self._docket_file = entry_point
1631 1631 if self._initempty:
1632 1632 self._docket = docketutil.default_docket(self, header)
1633 1633 else:
1634 1634 self._docket = docketutil.parse_docket(
1635 1635 self, entry_data, use_pending=self._trypending
1636 1636 )
1637 1637
1638 1638 if self._docket is not None:
1639 1639 self._indexfile = self._docket.index_filepath()
1640 1640 index_data = b''
1641 1641 index_size = self._docket.index_end
1642 1642 if index_size > 0:
1643 1643 index_data = self._get_data(
1644 1644 self._indexfile, mmapindexthreshold, size=index_size
1645 1645 )
1646 1646 if len(index_data) < index_size:
1647 1647 msg = _(b'too few index data for %s: got %d, expected %d')
1648 1648 msg %= (self.display_id, len(index_data), index_size)
1649 1649 raise error.RevlogError(msg)
1650 1650
1651 1651 self._inline = False
1652 1652 # generaldelta implied by version 2 revlogs.
1653 1653 self.delta_config.general_delta = True
1654 1654 # the logic for persistent nodemap will be dealt with within the
1655 1655 # main docket, so disable it for now.
1656 1656 self._nodemap_file = None
1657 1657
1658 1658 if self._docket is not None:
1659 1659 self._datafile = self._docket.data_filepath()
1660 1660 self._sidedatafile = self._docket.sidedata_filepath()
1661 1661 elif self.postfix is None:
1662 1662 self._datafile = b'%s.d' % self.radix
1663 1663 else:
1664 1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1665 1665
1666 1666 self.nodeconstants = sha1nodeconstants
1667 1667 self.nullid = self.nodeconstants.nullid
1668 1668
1669 1669 # sparse-revlog can't be on without general-delta (issue6056)
1670 1670 if not self.delta_config.general_delta:
1671 1671 self.delta_config.sparse_revlog = False
1672 1672
1673 1673 self._storedeltachains = True
1674 1674
1675 1675 devel_nodemap = (
1676 1676 self._nodemap_file
1677 1677 and force_nodemap
1678 1678 and parse_index_v1_nodemap is not None
1679 1679 )
1680 1680
1681 1681 use_rust_index = False
1682 1682 if rustrevlog is not None:
1683 1683 if self._nodemap_file is not None:
1684 1684 use_rust_index = True
1685 1685 else:
1686 1686 # Using the CIndex is not longer possible, as the
1687 1687 # `AncestorsIterator` and `LazyAncestors` classes now require
1688 1688 # a Rust index for instantiation.
1689 1689 use_rust_index = True
1690 1690
1691 1691 self._parse_index = parse_index_v1
1692 1692 if self._format_version == REVLOGV0:
1693 1693 self._parse_index = revlogv0.parse_index_v0
1694 1694 elif self._format_version == REVLOGV2:
1695 1695 self._parse_index = parse_index_v2
1696 1696 elif self._format_version == CHANGELOGV2:
1697 1697 self._parse_index = parse_index_cl_v2
1698 1698 elif devel_nodemap:
1699 1699 self._parse_index = parse_index_v1_nodemap
1700 1700 elif use_rust_index:
1701 1701 self._parse_index = functools.partial(
1702 parse_index_v1_mixed, default_header=new_header
1702 parse_index_v1_rust, default_header=new_header
1703 1703 )
1704 1704 try:
1705 1705 d = self._parse_index(index_data, self._inline)
1706 1706 index, chunkcache = d
1707 1707 use_nodemap = (
1708 1708 not self._inline
1709 1709 and self._nodemap_file is not None
1710 1710 and hasattr(index, 'update_nodemap_data')
1711 1711 )
1712 1712 if use_nodemap:
1713 1713 nodemap_data = nodemaputil.persisted_data(self)
1714 1714 if nodemap_data is not None:
1715 1715 docket = nodemap_data[0]
1716 1716 if (
1717 1717 len(d[0]) > docket.tip_rev
1718 1718 and d[0][docket.tip_rev][7] == docket.tip_node
1719 1719 ):
1720 1720 # no changelog tampering
1721 1721 self._nodemap_docket = docket
1722 1722 index.update_nodemap_data(*nodemap_data)
1723 1723 except (ValueError, IndexError):
1724 1724 raise error.RevlogError(
1725 1725 _(b"index %s is corrupted") % self.display_id
1726 1726 )
1727 1727 self.index = index
1728 1728 # revnum -> (chain-length, sum-delta-length)
1729 1729 self._chaininfocache = util.lrucachedict(500)
1730 1730
1731 1731 return chunkcache
1732 1732
1733 1733 def _load_inner(self, chunk_cache):
1734 1734 if self._docket is None:
1735 1735 default_compression_header = None
1736 1736 else:
1737 1737 default_compression_header = self._docket.default_compression_header
1738 1738
1739 1739 self._inner = _InnerRevlog(
1740 1740 opener=self.opener,
1741 1741 index=self.index,
1742 1742 index_file=self._indexfile,
1743 1743 data_file=self._datafile,
1744 1744 sidedata_file=self._sidedatafile,
1745 1745 inline=self._inline,
1746 1746 data_config=self.data_config,
1747 1747 delta_config=self.delta_config,
1748 1748 feature_config=self.feature_config,
1749 1749 chunk_cache=chunk_cache,
1750 1750 default_compression_header=default_compression_header,
1751 1751 )
1752 1752
1753 1753 def get_revlog(self):
1754 1754 """simple function to mirror API of other not-really-revlog API"""
1755 1755 return self
1756 1756
1757 1757 @util.propertycache
1758 1758 def revlog_kind(self):
1759 1759 return self.target[0]
1760 1760
1761 1761 @util.propertycache
1762 1762 def display_id(self):
1763 1763 """The public facing "ID" of the revlog that we use in message"""
1764 1764 if self.revlog_kind == KIND_FILELOG:
1765 1765 # Reference the file without the "data/" prefix, so it is familiar
1766 1766 # to the user.
1767 1767 return self.target[1]
1768 1768 else:
1769 1769 return self.radix
1770 1770
1771 1771 def _datafp(self, mode=b'r'):
1772 1772 """file object for the revlog's data file"""
1773 1773 return self.opener(self._datafile, mode=mode)
1774 1774
1775 1775 def tiprev(self):
1776 1776 return len(self.index) - 1
1777 1777
1778 1778 def tip(self):
1779 1779 return self.node(self.tiprev())
1780 1780
1781 1781 def __contains__(self, rev):
1782 1782 return 0 <= rev < len(self)
1783 1783
1784 1784 def __len__(self):
1785 1785 return len(self.index)
1786 1786
1787 1787 def __iter__(self):
1788 1788 return iter(range(len(self)))
1789 1789
1790 1790 def revs(self, start=0, stop=None):
1791 1791 """iterate over all rev in this revlog (from start to stop)"""
1792 1792 return storageutil.iterrevs(len(self), start=start, stop=stop)
1793 1793
1794 1794 def hasnode(self, node):
1795 1795 try:
1796 1796 self.rev(node)
1797 1797 return True
1798 1798 except KeyError:
1799 1799 return False
1800 1800
1801 1801 def _candelta(self, baserev, rev):
1802 1802 """whether two revisions (baserev, rev) can be delta-ed or not"""
1803 1803 # Disable delta if either rev requires a content-changing flag
1804 1804 # processor (ex. LFS). This is because such flag processor can alter
1805 1805 # the rawtext content that the delta will be based on, and two clients
1806 1806 # could have a same revlog node with different flags (i.e. different
1807 1807 # rawtext contents) and the delta could be incompatible.
1808 1808 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1809 1809 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1810 1810 ):
1811 1811 return False
1812 1812 return True
1813 1813
1814 1814 def update_caches(self, transaction):
1815 1815 """update on disk cache
1816 1816
1817 1817 If a transaction is passed, the update may be delayed to transaction
1818 1818 commit."""
1819 1819 if self._nodemap_file is not None:
1820 1820 if transaction is None:
1821 1821 nodemaputil.update_persistent_nodemap(self)
1822 1822 else:
1823 1823 nodemaputil.setup_persistent_nodemap(transaction, self)
1824 1824
1825 1825 def clearcaches(self):
1826 1826 """Clear in-memory caches"""
1827 1827 self._chainbasecache.clear()
1828 1828 self._inner.clear_cache()
1829 1829 self._pcache = {}
1830 1830 self._nodemap_docket = None
1831 1831 self.index.clearcaches()
1832 1832 # The python code is the one responsible for validating the docket, we
1833 1833 # end up having to refresh it here.
1834 1834 use_nodemap = (
1835 1835 not self._inline
1836 1836 and self._nodemap_file is not None
1837 1837 and hasattr(self.index, 'update_nodemap_data')
1838 1838 )
1839 1839 if use_nodemap:
1840 1840 nodemap_data = nodemaputil.persisted_data(self)
1841 1841 if nodemap_data is not None:
1842 1842 self._nodemap_docket = nodemap_data[0]
1843 1843 self.index.update_nodemap_data(*nodemap_data)
1844 1844
1845 1845 def rev(self, node):
1846 1846 """return the revision number associated with a <nodeid>"""
1847 1847 try:
1848 1848 return self.index.rev(node)
1849 1849 except TypeError:
1850 1850 raise
1851 1851 except error.RevlogError:
1852 1852 # parsers.c radix tree lookup failed
1853 1853 if (
1854 1854 node == self.nodeconstants.wdirid
1855 1855 or node in self.nodeconstants.wdirfilenodeids
1856 1856 ):
1857 1857 raise error.WdirUnsupported
1858 1858 raise error.LookupError(node, self.display_id, _(b'no node'))
1859 1859
1860 1860 # Accessors for index entries.
1861 1861
1862 1862 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1863 1863 # are flags.
1864 1864 def start(self, rev):
1865 1865 return int(self.index[rev][0] >> 16)
1866 1866
1867 1867 def sidedata_cut_off(self, rev):
1868 1868 sd_cut_off = self.index[rev][8]
1869 1869 if sd_cut_off != 0:
1870 1870 return sd_cut_off
1871 1871 # This is some annoying dance, because entries without sidedata
1872 1872 # currently use 0 as their ofsset. (instead of previous-offset +
1873 1873 # previous-size)
1874 1874 #
1875 1875 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1876 1876 # In the meantime, we need this.
1877 1877 while 0 <= rev:
1878 1878 e = self.index[rev]
1879 1879 if e[9] != 0:
1880 1880 return e[8] + e[9]
1881 1881 rev -= 1
1882 1882 return 0
1883 1883
1884 1884 def flags(self, rev):
1885 1885 return self.index[rev][0] & 0xFFFF
1886 1886
1887 1887 def length(self, rev):
1888 1888 return self.index[rev][1]
1889 1889
1890 1890 def sidedata_length(self, rev):
1891 1891 if not self.feature_config.has_side_data:
1892 1892 return 0
1893 1893 return self.index[rev][9]
1894 1894
1895 1895 def rawsize(self, rev):
1896 1896 """return the length of the uncompressed text for a given revision"""
1897 1897 l = self.index[rev][2]
1898 1898 if l >= 0:
1899 1899 return l
1900 1900
1901 1901 t = self.rawdata(rev)
1902 1902 return len(t)
1903 1903
1904 1904 def size(self, rev):
1905 1905 """length of non-raw text (processed by a "read" flag processor)"""
1906 1906 # fast path: if no "read" flag processor could change the content,
1907 1907 # size is rawsize. note: ELLIPSIS is known to not change the content.
1908 1908 flags = self.flags(rev)
1909 1909 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1910 1910 return self.rawsize(rev)
1911 1911
1912 1912 return len(self.revision(rev))
1913 1913
1914 1914 def fast_rank(self, rev):
1915 1915 """Return the rank of a revision if already known, or None otherwise.
1916 1916
1917 1917 The rank of a revision is the size of the sub-graph it defines as a
1918 1918 head. Equivalently, the rank of a revision `r` is the size of the set
1919 1919 `ancestors(r)`, `r` included.
1920 1920
1921 1921 This method returns the rank retrieved from the revlog in constant
1922 1922 time. It makes no attempt at computing unknown values for versions of
1923 1923 the revlog which do not persist the rank.
1924 1924 """
1925 1925 rank = self.index[rev][ENTRY_RANK]
1926 1926 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1927 1927 return None
1928 1928 if rev == nullrev:
1929 1929 return 0 # convention
1930 1930 return rank
1931 1931
1932 1932 def chainbase(self, rev):
1933 1933 base = self._chainbasecache.get(rev)
1934 1934 if base is not None:
1935 1935 return base
1936 1936
1937 1937 index = self.index
1938 1938 iterrev = rev
1939 1939 base = index[iterrev][3]
1940 1940 while base != iterrev:
1941 1941 iterrev = base
1942 1942 base = index[iterrev][3]
1943 1943
1944 1944 self._chainbasecache[rev] = base
1945 1945 return base
1946 1946
1947 1947 def linkrev(self, rev):
1948 1948 return self.index[rev][4]
1949 1949
1950 1950 def parentrevs(self, rev):
1951 1951 try:
1952 1952 entry = self.index[rev]
1953 1953 except IndexError:
1954 1954 if rev == wdirrev:
1955 1955 raise error.WdirUnsupported
1956 1956 raise
1957 1957
1958 1958 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1959 1959 return entry[6], entry[5]
1960 1960 else:
1961 1961 return entry[5], entry[6]
1962 1962
1963 1963 # fast parentrevs(rev) where rev isn't filtered
1964 1964 _uncheckedparentrevs = parentrevs
1965 1965
1966 1966 def node(self, rev):
1967 1967 try:
1968 1968 return self.index[rev][7]
1969 1969 except IndexError:
1970 1970 if rev == wdirrev:
1971 1971 raise error.WdirUnsupported
1972 1972 raise
1973 1973
1974 1974 # Derived from index values.
1975 1975
1976 1976 def end(self, rev):
1977 1977 return self.start(rev) + self.length(rev)
1978 1978
1979 1979 def parents(self, node):
1980 1980 i = self.index
1981 1981 d = i[self.rev(node)]
1982 1982 # inline node() to avoid function call overhead
1983 1983 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1984 1984 return i[d[6]][7], i[d[5]][7]
1985 1985 else:
1986 1986 return i[d[5]][7], i[d[6]][7]
1987 1987
1988 1988 def chainlen(self, rev):
1989 1989 return self._chaininfo(rev)[0]
1990 1990
1991 1991 def _chaininfo(self, rev):
1992 1992 chaininfocache = self._chaininfocache
1993 1993 if rev in chaininfocache:
1994 1994 return chaininfocache[rev]
1995 1995 index = self.index
1996 1996 generaldelta = self.delta_config.general_delta
1997 1997 iterrev = rev
1998 1998 e = index[iterrev]
1999 1999 clen = 0
2000 2000 compresseddeltalen = 0
2001 2001 while iterrev != e[3]:
2002 2002 clen += 1
2003 2003 compresseddeltalen += e[1]
2004 2004 if generaldelta:
2005 2005 iterrev = e[3]
2006 2006 else:
2007 2007 iterrev -= 1
2008 2008 if iterrev in chaininfocache:
2009 2009 t = chaininfocache[iterrev]
2010 2010 clen += t[0]
2011 2011 compresseddeltalen += t[1]
2012 2012 break
2013 2013 e = index[iterrev]
2014 2014 else:
2015 2015 # Add text length of base since decompressing that also takes
2016 2016 # work. For cache hits the length is already included.
2017 2017 compresseddeltalen += e[1]
2018 2018 r = (clen, compresseddeltalen)
2019 2019 chaininfocache[rev] = r
2020 2020 return r
2021 2021
2022 2022 def _deltachain(self, rev, stoprev=None):
2023 2023 return self._inner._deltachain(rev, stoprev=stoprev)
2024 2024
2025 2025 def ancestors(self, revs, stoprev=0, inclusive=False):
2026 2026 """Generate the ancestors of 'revs' in reverse revision order.
2027 2027 Does not generate revs lower than stoprev.
2028 2028
2029 2029 See the documentation for ancestor.lazyancestors for more details."""
2030 2030
2031 2031 # first, make sure start revisions aren't filtered
2032 2032 revs = list(revs)
2033 2033 checkrev = self.node
2034 2034 for r in revs:
2035 2035 checkrev(r)
2036 2036 # and we're sure ancestors aren't filtered as well
2037 2037
2038 2038 if rustancestor is not None and self.index.rust_ext_compat:
2039 2039 lazyancestors = rustancestor.LazyAncestors
2040 2040 arg = self.index
2041 2041 else:
2042 2042 lazyancestors = ancestor.lazyancestors
2043 2043 arg = self._uncheckedparentrevs
2044 2044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2045 2045
2046 2046 def descendants(self, revs):
2047 2047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2048 2048
2049 2049 def findcommonmissing(self, common=None, heads=None):
2050 2050 """Return a tuple of the ancestors of common and the ancestors of heads
2051 2051 that are not ancestors of common. In revset terminology, we return the
2052 2052 tuple:
2053 2053
2054 2054 ::common, (::heads) - (::common)
2055 2055
2056 2056 The list is sorted by revision number, meaning it is
2057 2057 topologically sorted.
2058 2058
2059 2059 'heads' and 'common' are both lists of node IDs. If heads is
2060 2060 not supplied, uses all of the revlog's heads. If common is not
2061 2061 supplied, uses nullid."""
2062 2062 if common is None:
2063 2063 common = [self.nullid]
2064 2064 if heads is None:
2065 2065 heads = self.heads()
2066 2066
2067 2067 common = [self.rev(n) for n in common]
2068 2068 heads = [self.rev(n) for n in heads]
2069 2069
2070 2070 # we want the ancestors, but inclusive
2071 2071 class lazyset:
2072 2072 def __init__(self, lazyvalues):
2073 2073 self.addedvalues = set()
2074 2074 self.lazyvalues = lazyvalues
2075 2075
2076 2076 def __contains__(self, value):
2077 2077 return value in self.addedvalues or value in self.lazyvalues
2078 2078
2079 2079 def __iter__(self):
2080 2080 added = self.addedvalues
2081 2081 for r in added:
2082 2082 yield r
2083 2083 for r in self.lazyvalues:
2084 2084 if not r in added:
2085 2085 yield r
2086 2086
2087 2087 def add(self, value):
2088 2088 self.addedvalues.add(value)
2089 2089
2090 2090 def update(self, values):
2091 2091 self.addedvalues.update(values)
2092 2092
2093 2093 has = lazyset(self.ancestors(common))
2094 2094 has.add(nullrev)
2095 2095 has.update(common)
2096 2096
2097 2097 # take all ancestors from heads that aren't in has
2098 2098 missing = set()
2099 2099 visit = collections.deque(r for r in heads if r not in has)
2100 2100 while visit:
2101 2101 r = visit.popleft()
2102 2102 if r in missing:
2103 2103 continue
2104 2104 else:
2105 2105 missing.add(r)
2106 2106 for p in self.parentrevs(r):
2107 2107 if p not in has:
2108 2108 visit.append(p)
2109 2109 missing = list(missing)
2110 2110 missing.sort()
2111 2111 return has, [self.node(miss) for miss in missing]
2112 2112
2113 2113 def incrementalmissingrevs(self, common=None):
2114 2114 """Return an object that can be used to incrementally compute the
2115 2115 revision numbers of the ancestors of arbitrary sets that are not
2116 2116 ancestors of common. This is an ancestor.incrementalmissingancestors
2117 2117 object.
2118 2118
2119 2119 'common' is a list of revision numbers. If common is not supplied, uses
2120 2120 nullrev.
2121 2121 """
2122 2122 if common is None:
2123 2123 common = [nullrev]
2124 2124
2125 2125 if rustancestor is not None and self.index.rust_ext_compat:
2126 2126 return rustancestor.MissingAncestors(self.index, common)
2127 2127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2128 2128
2129 2129 def findmissingrevs(self, common=None, heads=None):
2130 2130 """Return the revision numbers of the ancestors of heads that
2131 2131 are not ancestors of common.
2132 2132
2133 2133 More specifically, return a list of revision numbers corresponding to
2134 2134 nodes N such that every N satisfies the following constraints:
2135 2135
2136 2136 1. N is an ancestor of some node in 'heads'
2137 2137 2. N is not an ancestor of any node in 'common'
2138 2138
2139 2139 The list is sorted by revision number, meaning it is
2140 2140 topologically sorted.
2141 2141
2142 2142 'heads' and 'common' are both lists of revision numbers. If heads is
2143 2143 not supplied, uses all of the revlog's heads. If common is not
2144 2144 supplied, uses nullid."""
2145 2145 if common is None:
2146 2146 common = [nullrev]
2147 2147 if heads is None:
2148 2148 heads = self.headrevs()
2149 2149
2150 2150 inc = self.incrementalmissingrevs(common=common)
2151 2151 return inc.missingancestors(heads)
2152 2152
2153 2153 def findmissing(self, common=None, heads=None):
2154 2154 """Return the ancestors of heads that are not ancestors of common.
2155 2155
2156 2156 More specifically, return a list of nodes N such that every N
2157 2157 satisfies the following constraints:
2158 2158
2159 2159 1. N is an ancestor of some node in 'heads'
2160 2160 2. N is not an ancestor of any node in 'common'
2161 2161
2162 2162 The list is sorted by revision number, meaning it is
2163 2163 topologically sorted.
2164 2164
2165 2165 'heads' and 'common' are both lists of node IDs. If heads is
2166 2166 not supplied, uses all of the revlog's heads. If common is not
2167 2167 supplied, uses nullid."""
2168 2168 if common is None:
2169 2169 common = [self.nullid]
2170 2170 if heads is None:
2171 2171 heads = self.heads()
2172 2172
2173 2173 common = [self.rev(n) for n in common]
2174 2174 heads = [self.rev(n) for n in heads]
2175 2175
2176 2176 inc = self.incrementalmissingrevs(common=common)
2177 2177 return [self.node(r) for r in inc.missingancestors(heads)]
2178 2178
2179 2179 def nodesbetween(self, roots=None, heads=None):
2180 2180 """Return a topological path from 'roots' to 'heads'.
2181 2181
2182 2182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2183 2183 topologically sorted list of all nodes N that satisfy both of
2184 2184 these constraints:
2185 2185
2186 2186 1. N is a descendant of some node in 'roots'
2187 2187 2. N is an ancestor of some node in 'heads'
2188 2188
2189 2189 Every node is considered to be both a descendant and an ancestor
2190 2190 of itself, so every reachable node in 'roots' and 'heads' will be
2191 2191 included in 'nodes'.
2192 2192
2193 2193 'outroots' is the list of reachable nodes in 'roots', i.e., the
2194 2194 subset of 'roots' that is returned in 'nodes'. Likewise,
2195 2195 'outheads' is the subset of 'heads' that is also in 'nodes'.
2196 2196
2197 2197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2198 2198 unspecified, uses nullid as the only root. If 'heads' is
2199 2199 unspecified, uses list of all of the revlog's heads."""
2200 2200 nonodes = ([], [], [])
2201 2201 if roots is not None:
2202 2202 roots = list(roots)
2203 2203 if not roots:
2204 2204 return nonodes
2205 2205 lowestrev = min([self.rev(n) for n in roots])
2206 2206 else:
2207 2207 roots = [self.nullid] # Everybody's a descendant of nullid
2208 2208 lowestrev = nullrev
2209 2209 if (lowestrev == nullrev) and (heads is None):
2210 2210 # We want _all_ the nodes!
2211 2211 return (
2212 2212 [self.node(r) for r in self],
2213 2213 [self.nullid],
2214 2214 list(self.heads()),
2215 2215 )
2216 2216 if heads is None:
2217 2217 # All nodes are ancestors, so the latest ancestor is the last
2218 2218 # node.
2219 2219 highestrev = len(self) - 1
2220 2220 # Set ancestors to None to signal that every node is an ancestor.
2221 2221 ancestors = None
2222 2222 # Set heads to an empty dictionary for later discovery of heads
2223 2223 heads = {}
2224 2224 else:
2225 2225 heads = list(heads)
2226 2226 if not heads:
2227 2227 return nonodes
2228 2228 ancestors = set()
2229 2229 # Turn heads into a dictionary so we can remove 'fake' heads.
2230 2230 # Also, later we will be using it to filter out the heads we can't
2231 2231 # find from roots.
2232 2232 heads = dict.fromkeys(heads, False)
2233 2233 # Start at the top and keep marking parents until we're done.
2234 2234 nodestotag = set(heads)
2235 2235 # Remember where the top was so we can use it as a limit later.
2236 2236 highestrev = max([self.rev(n) for n in nodestotag])
2237 2237 while nodestotag:
2238 2238 # grab a node to tag
2239 2239 n = nodestotag.pop()
2240 2240 # Never tag nullid
2241 2241 if n == self.nullid:
2242 2242 continue
2243 2243 # A node's revision number represents its place in a
2244 2244 # topologically sorted list of nodes.
2245 2245 r = self.rev(n)
2246 2246 if r >= lowestrev:
2247 2247 if n not in ancestors:
2248 2248 # If we are possibly a descendant of one of the roots
2249 2249 # and we haven't already been marked as an ancestor
2250 2250 ancestors.add(n) # Mark as ancestor
2251 2251 # Add non-nullid parents to list of nodes to tag.
2252 2252 nodestotag.update(
2253 2253 [p for p in self.parents(n) if p != self.nullid]
2254 2254 )
2255 2255 elif n in heads: # We've seen it before, is it a fake head?
2256 2256 # So it is, real heads should not be the ancestors of
2257 2257 # any other heads.
2258 2258 heads.pop(n)
2259 2259 if not ancestors:
2260 2260 return nonodes
2261 2261 # Now that we have our set of ancestors, we want to remove any
2262 2262 # roots that are not ancestors.
2263 2263
2264 2264 # If one of the roots was nullid, everything is included anyway.
2265 2265 if lowestrev > nullrev:
2266 2266 # But, since we weren't, let's recompute the lowest rev to not
2267 2267 # include roots that aren't ancestors.
2268 2268
2269 2269 # Filter out roots that aren't ancestors of heads
2270 2270 roots = [root for root in roots if root in ancestors]
2271 2271 # Recompute the lowest revision
2272 2272 if roots:
2273 2273 lowestrev = min([self.rev(root) for root in roots])
2274 2274 else:
2275 2275 # No more roots? Return empty list
2276 2276 return nonodes
2277 2277 else:
2278 2278 # We are descending from nullid, and don't need to care about
2279 2279 # any other roots.
2280 2280 lowestrev = nullrev
2281 2281 roots = [self.nullid]
2282 2282 # Transform our roots list into a set.
2283 2283 descendants = set(roots)
2284 2284 # Also, keep the original roots so we can filter out roots that aren't
2285 2285 # 'real' roots (i.e. are descended from other roots).
2286 2286 roots = descendants.copy()
2287 2287 # Our topologically sorted list of output nodes.
2288 2288 orderedout = []
2289 2289 # Don't start at nullid since we don't want nullid in our output list,
2290 2290 # and if nullid shows up in descendants, empty parents will look like
2291 2291 # they're descendants.
2292 2292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2293 2293 n = self.node(r)
2294 2294 isdescendant = False
2295 2295 if lowestrev == nullrev: # Everybody is a descendant of nullid
2296 2296 isdescendant = True
2297 2297 elif n in descendants:
2298 2298 # n is already a descendant
2299 2299 isdescendant = True
2300 2300 # This check only needs to be done here because all the roots
2301 2301 # will start being marked is descendants before the loop.
2302 2302 if n in roots:
2303 2303 # If n was a root, check if it's a 'real' root.
2304 2304 p = tuple(self.parents(n))
2305 2305 # If any of its parents are descendants, it's not a root.
2306 2306 if (p[0] in descendants) or (p[1] in descendants):
2307 2307 roots.remove(n)
2308 2308 else:
2309 2309 p = tuple(self.parents(n))
2310 2310 # A node is a descendant if either of its parents are
2311 2311 # descendants. (We seeded the dependents list with the roots
2312 2312 # up there, remember?)
2313 2313 if (p[0] in descendants) or (p[1] in descendants):
2314 2314 descendants.add(n)
2315 2315 isdescendant = True
2316 2316 if isdescendant and ((ancestors is None) or (n in ancestors)):
2317 2317 # Only include nodes that are both descendants and ancestors.
2318 2318 orderedout.append(n)
2319 2319 if (ancestors is not None) and (n in heads):
2320 2320 # We're trying to figure out which heads are reachable
2321 2321 # from roots.
2322 2322 # Mark this head as having been reached
2323 2323 heads[n] = True
2324 2324 elif ancestors is None:
2325 2325 # Otherwise, we're trying to discover the heads.
2326 2326 # Assume this is a head because if it isn't, the next step
2327 2327 # will eventually remove it.
2328 2328 heads[n] = True
2329 2329 # But, obviously its parents aren't.
2330 2330 for p in self.parents(n):
2331 2331 heads.pop(p, None)
2332 2332 heads = [head for head, flag in heads.items() if flag]
2333 2333 roots = list(roots)
2334 2334 assert orderedout
2335 2335 assert roots
2336 2336 assert heads
2337 2337 return (orderedout, roots, heads)
2338 2338
2339 2339 def headrevs(self, revs=None):
2340 2340 if revs is None:
2341 2341 try:
2342 2342 return self.index.headrevs()
2343 2343 except AttributeError:
2344 2344 return self._headrevs()
2345 2345 if rustdagop is not None and self.index.rust_ext_compat:
2346 2346 return rustdagop.headrevs(self.index, revs)
2347 2347 return dagop.headrevs(revs, self._uncheckedparentrevs)
2348 2348
2349 2349 def computephases(self, roots):
2350 2350 return self.index.computephasesmapsets(roots)
2351 2351
2352 2352 def _headrevs(self):
2353 2353 count = len(self)
2354 2354 if not count:
2355 2355 return [nullrev]
2356 2356 # we won't iter over filtered rev so nobody is a head at start
2357 2357 ishead = [0] * (count + 1)
2358 2358 index = self.index
2359 2359 for r in self:
2360 2360 ishead[r] = 1 # I may be an head
2361 2361 e = index[r]
2362 2362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2363 2363 return [r for r, val in enumerate(ishead) if val]
2364 2364
2365 2365 def heads(self, start=None, stop=None):
2366 2366 """return the list of all nodes that have no children
2367 2367
2368 2368 if start is specified, only heads that are descendants of
2369 2369 start will be returned
2370 2370 if stop is specified, it will consider all the revs from stop
2371 2371 as if they had no children
2372 2372 """
2373 2373 if start is None and stop is None:
2374 2374 if not len(self):
2375 2375 return [self.nullid]
2376 2376 return [self.node(r) for r in self.headrevs()]
2377 2377
2378 2378 if start is None:
2379 2379 start = nullrev
2380 2380 else:
2381 2381 start = self.rev(start)
2382 2382
2383 2383 stoprevs = {self.rev(n) for n in stop or []}
2384 2384
2385 2385 revs = dagop.headrevssubset(
2386 2386 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2387 2387 )
2388 2388
2389 2389 return [self.node(rev) for rev in revs]
2390 2390
2391 2391 def children(self, node):
2392 2392 """find the children of a given node"""
2393 2393 c = []
2394 2394 p = self.rev(node)
2395 2395 for r in self.revs(start=p + 1):
2396 2396 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2397 2397 if prevs:
2398 2398 for pr in prevs:
2399 2399 if pr == p:
2400 2400 c.append(self.node(r))
2401 2401 elif p == nullrev:
2402 2402 c.append(self.node(r))
2403 2403 return c
2404 2404
2405 2405 def commonancestorsheads(self, a, b):
2406 2406 """calculate all the heads of the common ancestors of nodes a and b"""
2407 2407 a, b = self.rev(a), self.rev(b)
2408 2408 ancs = self._commonancestorsheads(a, b)
2409 2409 return pycompat.maplist(self.node, ancs)
2410 2410
2411 2411 def _commonancestorsheads(self, *revs):
2412 2412 """calculate all the heads of the common ancestors of revs"""
2413 2413 try:
2414 2414 ancs = self.index.commonancestorsheads(*revs)
2415 2415 except (AttributeError, OverflowError): # C implementation failed
2416 2416 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2417 2417 return ancs
2418 2418
2419 2419 def isancestor(self, a, b):
2420 2420 """return True if node a is an ancestor of node b
2421 2421
2422 2422 A revision is considered an ancestor of itself."""
2423 2423 a, b = self.rev(a), self.rev(b)
2424 2424 return self.isancestorrev(a, b)
2425 2425
2426 2426 def isancestorrev(self, a, b):
2427 2427 """return True if revision a is an ancestor of revision b
2428 2428
2429 2429 A revision is considered an ancestor of itself.
2430 2430
2431 2431 The implementation of this is trivial but the use of
2432 2432 reachableroots is not."""
2433 2433 if a == nullrev:
2434 2434 return True
2435 2435 elif a == b:
2436 2436 return True
2437 2437 elif a > b:
2438 2438 return False
2439 2439 return bool(self.reachableroots(a, [b], [a], includepath=False))
2440 2440
2441 2441 def reachableroots(self, minroot, heads, roots, includepath=False):
2442 2442 """return (heads(::(<roots> and <roots>::<heads>)))
2443 2443
2444 2444 If includepath is True, return (<roots>::<heads>)."""
2445 2445 try:
2446 2446 return self.index.reachableroots2(
2447 2447 minroot, heads, roots, includepath
2448 2448 )
2449 2449 except AttributeError:
2450 2450 return dagop._reachablerootspure(
2451 2451 self.parentrevs, minroot, roots, heads, includepath
2452 2452 )
2453 2453
2454 2454 def ancestor(self, a, b):
2455 2455 """calculate the "best" common ancestor of nodes a and b"""
2456 2456
2457 2457 a, b = self.rev(a), self.rev(b)
2458 2458 try:
2459 2459 ancs = self.index.ancestors(a, b)
2460 2460 except (AttributeError, OverflowError):
2461 2461 ancs = ancestor.ancestors(self.parentrevs, a, b)
2462 2462 if ancs:
2463 2463 # choose a consistent winner when there's a tie
2464 2464 return min(map(self.node, ancs))
2465 2465 return self.nullid
2466 2466
2467 2467 def _match(self, id):
2468 2468 if isinstance(id, int):
2469 2469 # rev
2470 2470 return self.node(id)
2471 2471 if len(id) == self.nodeconstants.nodelen:
2472 2472 # possibly a binary node
2473 2473 # odds of a binary node being all hex in ASCII are 1 in 10**25
2474 2474 try:
2475 2475 node = id
2476 2476 self.rev(node) # quick search the index
2477 2477 return node
2478 2478 except error.LookupError:
2479 2479 pass # may be partial hex id
2480 2480 try:
2481 2481 # str(rev)
2482 2482 rev = int(id)
2483 2483 if b"%d" % rev != id:
2484 2484 raise ValueError
2485 2485 if rev < 0:
2486 2486 rev = len(self) + rev
2487 2487 if rev < 0 or rev >= len(self):
2488 2488 raise ValueError
2489 2489 return self.node(rev)
2490 2490 except (ValueError, OverflowError):
2491 2491 pass
2492 2492 if len(id) == 2 * self.nodeconstants.nodelen:
2493 2493 try:
2494 2494 # a full hex nodeid?
2495 2495 node = bin(id)
2496 2496 self.rev(node)
2497 2497 return node
2498 2498 except (binascii.Error, error.LookupError):
2499 2499 pass
2500 2500
2501 2501 def _partialmatch(self, id):
2502 2502 # we don't care wdirfilenodeids as they should be always full hash
2503 2503 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2504 2504 ambiguous = False
2505 2505 try:
2506 2506 partial = self.index.partialmatch(id)
2507 2507 if partial and self.hasnode(partial):
2508 2508 if maybewdir:
2509 2509 # single 'ff...' match in radix tree, ambiguous with wdir
2510 2510 ambiguous = True
2511 2511 else:
2512 2512 return partial
2513 2513 elif maybewdir:
2514 2514 # no 'ff...' match in radix tree, wdir identified
2515 2515 raise error.WdirUnsupported
2516 2516 else:
2517 2517 return None
2518 2518 except error.RevlogError:
2519 2519 # parsers.c radix tree lookup gave multiple matches
2520 2520 # fast path: for unfiltered changelog, radix tree is accurate
2521 2521 if not getattr(self, 'filteredrevs', None):
2522 2522 ambiguous = True
2523 2523 # fall through to slow path that filters hidden revisions
2524 2524 except (AttributeError, ValueError):
2525 2525 # we are pure python, or key is not hex
2526 2526 pass
2527 2527 if ambiguous:
2528 2528 raise error.AmbiguousPrefixLookupError(
2529 2529 id, self.display_id, _(b'ambiguous identifier')
2530 2530 )
2531 2531
2532 2532 if id in self._pcache:
2533 2533 return self._pcache[id]
2534 2534
2535 2535 if len(id) <= 40:
2536 2536 # hex(node)[:...]
2537 2537 l = len(id) // 2 * 2 # grab an even number of digits
2538 2538 try:
2539 2539 # we're dropping the last digit, so let's check that it's hex,
2540 2540 # to avoid the expensive computation below if it's not
2541 2541 if len(id) % 2 > 0:
2542 2542 if not (id[-1] in hexdigits):
2543 2543 return None
2544 2544 prefix = bin(id[:l])
2545 2545 except binascii.Error:
2546 2546 pass
2547 2547 else:
2548 2548 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2549 2549 nl = [
2550 2550 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2551 2551 ]
2552 2552 if self.nodeconstants.nullhex.startswith(id):
2553 2553 nl.append(self.nullid)
2554 2554 if len(nl) > 0:
2555 2555 if len(nl) == 1 and not maybewdir:
2556 2556 self._pcache[id] = nl[0]
2557 2557 return nl[0]
2558 2558 raise error.AmbiguousPrefixLookupError(
2559 2559 id, self.display_id, _(b'ambiguous identifier')
2560 2560 )
2561 2561 if maybewdir:
2562 2562 raise error.WdirUnsupported
2563 2563 return None
2564 2564
2565 2565 def lookup(self, id):
2566 2566 """locate a node based on:
2567 2567 - revision number or str(revision number)
2568 2568 - nodeid or subset of hex nodeid
2569 2569 """
2570 2570 n = self._match(id)
2571 2571 if n is not None:
2572 2572 return n
2573 2573 n = self._partialmatch(id)
2574 2574 if n:
2575 2575 return n
2576 2576
2577 2577 raise error.LookupError(id, self.display_id, _(b'no match found'))
2578 2578
2579 2579 def shortest(self, node, minlength=1):
2580 2580 """Find the shortest unambiguous prefix that matches node."""
2581 2581
2582 2582 def isvalid(prefix):
2583 2583 try:
2584 2584 matchednode = self._partialmatch(prefix)
2585 2585 except error.AmbiguousPrefixLookupError:
2586 2586 return False
2587 2587 except error.WdirUnsupported:
2588 2588 # single 'ff...' match
2589 2589 return True
2590 2590 if matchednode is None:
2591 2591 raise error.LookupError(node, self.display_id, _(b'no node'))
2592 2592 return True
2593 2593
2594 2594 def maybewdir(prefix):
2595 2595 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2596 2596
2597 2597 hexnode = hex(node)
2598 2598
2599 2599 def disambiguate(hexnode, minlength):
2600 2600 """Disambiguate against wdirid."""
2601 2601 for length in range(minlength, len(hexnode) + 1):
2602 2602 prefix = hexnode[:length]
2603 2603 if not maybewdir(prefix):
2604 2604 return prefix
2605 2605
2606 2606 if not getattr(self, 'filteredrevs', None):
2607 2607 try:
2608 2608 length = max(self.index.shortest(node), minlength)
2609 2609 return disambiguate(hexnode, length)
2610 2610 except error.RevlogError:
2611 2611 if node != self.nodeconstants.wdirid:
2612 2612 raise error.LookupError(
2613 2613 node, self.display_id, _(b'no node')
2614 2614 )
2615 2615 except AttributeError:
2616 2616 # Fall through to pure code
2617 2617 pass
2618 2618
2619 2619 if node == self.nodeconstants.wdirid:
2620 2620 for length in range(minlength, len(hexnode) + 1):
2621 2621 prefix = hexnode[:length]
2622 2622 if isvalid(prefix):
2623 2623 return prefix
2624 2624
2625 2625 for length in range(minlength, len(hexnode) + 1):
2626 2626 prefix = hexnode[:length]
2627 2627 if isvalid(prefix):
2628 2628 return disambiguate(hexnode, length)
2629 2629
2630 2630 def cmp(self, node, text):
2631 2631 """compare text with a given file revision
2632 2632
2633 2633 returns True if text is different than what is stored.
2634 2634 """
2635 2635 p1, p2 = self.parents(node)
2636 2636 return storageutil.hashrevisionsha1(text, p1, p2) != node
2637 2637
2638 2638 def deltaparent(self, rev):
2639 2639 """return deltaparent of the given revision"""
2640 2640 base = self.index[rev][3]
2641 2641 if base == rev:
2642 2642 return nullrev
2643 2643 elif self.delta_config.general_delta:
2644 2644 return base
2645 2645 else:
2646 2646 return rev - 1
2647 2647
2648 2648 def issnapshot(self, rev):
2649 2649 """tells whether rev is a snapshot"""
2650 2650 ret = self._inner.issnapshot(rev)
2651 2651 self.issnapshot = self._inner.issnapshot
2652 2652 return ret
2653 2653
2654 2654 def snapshotdepth(self, rev):
2655 2655 """number of snapshot in the chain before this one"""
2656 2656 if not self.issnapshot(rev):
2657 2657 raise error.ProgrammingError(b'revision %d not a snapshot')
2658 2658 return len(self._inner._deltachain(rev)[0]) - 1
2659 2659
2660 2660 def revdiff(self, rev1, rev2):
2661 2661 """return or calculate a delta between two revisions
2662 2662
2663 2663 The delta calculated is in binary form and is intended to be written to
2664 2664 revlog data directly. So this function needs raw revision data.
2665 2665 """
2666 2666 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2667 2667 return bytes(self._inner._chunk(rev2))
2668 2668
2669 2669 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2670 2670
2671 2671 def revision(self, nodeorrev):
2672 2672 """return an uncompressed revision of a given node or revision
2673 2673 number.
2674 2674 """
2675 2675 return self._revisiondata(nodeorrev)
2676 2676
2677 2677 def sidedata(self, nodeorrev):
2678 2678 """a map of extra data related to the changeset but not part of the hash
2679 2679
2680 2680 This function currently return a dictionary. However, more advanced
2681 2681 mapping object will likely be used in the future for a more
2682 2682 efficient/lazy code.
2683 2683 """
2684 2684 # deal with <nodeorrev> argument type
2685 2685 if isinstance(nodeorrev, int):
2686 2686 rev = nodeorrev
2687 2687 else:
2688 2688 rev = self.rev(nodeorrev)
2689 2689 return self._sidedata(rev)
2690 2690
2691 2691 def _rawtext(self, node, rev):
2692 2692 """return the possibly unvalidated rawtext for a revision
2693 2693
2694 2694 returns (rev, rawtext, validated)
2695 2695 """
2696 2696 # Check if we have the entry in cache
2697 2697 # The cache entry looks like (node, rev, rawtext)
2698 2698 if self._inner._revisioncache:
2699 2699 if self._inner._revisioncache[0] == node:
2700 2700 return (rev, self._inner._revisioncache[2], True)
2701 2701
2702 2702 if rev is None:
2703 2703 rev = self.rev(node)
2704 2704
2705 2705 return self._inner.raw_text(node, rev)
2706 2706
2707 2707 def _revisiondata(self, nodeorrev, raw=False):
2708 2708 # deal with <nodeorrev> argument type
2709 2709 if isinstance(nodeorrev, int):
2710 2710 rev = nodeorrev
2711 2711 node = self.node(rev)
2712 2712 else:
2713 2713 node = nodeorrev
2714 2714 rev = None
2715 2715
2716 2716 # fast path the special `nullid` rev
2717 2717 if node == self.nullid:
2718 2718 return b""
2719 2719
2720 2720 # ``rawtext`` is the text as stored inside the revlog. Might be the
2721 2721 # revision or might need to be processed to retrieve the revision.
2722 2722 rev, rawtext, validated = self._rawtext(node, rev)
2723 2723
2724 2724 if raw and validated:
2725 2725 # if we don't want to process the raw text and that raw
2726 2726 # text is cached, we can exit early.
2727 2727 return rawtext
2728 2728 if rev is None:
2729 2729 rev = self.rev(node)
2730 2730 # the revlog's flag for this revision
2731 2731 # (usually alter its state or content)
2732 2732 flags = self.flags(rev)
2733 2733
2734 2734 if validated and flags == REVIDX_DEFAULT_FLAGS:
2735 2735 # no extra flags set, no flag processor runs, text = rawtext
2736 2736 return rawtext
2737 2737
2738 2738 if raw:
2739 2739 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2740 2740 text = rawtext
2741 2741 else:
2742 2742 r = flagutil.processflagsread(self, rawtext, flags)
2743 2743 text, validatehash = r
2744 2744 if validatehash:
2745 2745 self.checkhash(text, node, rev=rev)
2746 2746 if not validated:
2747 2747 self._inner._revisioncache = (node, rev, rawtext)
2748 2748
2749 2749 return text
2750 2750
2751 2751 def _sidedata(self, rev):
2752 2752 """Return the sidedata for a given revision number."""
2753 2753 sidedata_end = None
2754 2754 if self._docket is not None:
2755 2755 sidedata_end = self._docket.sidedata_end
2756 2756 return self._inner.sidedata(rev, sidedata_end)
2757 2757
2758 2758 def rawdata(self, nodeorrev):
2759 2759 """return an uncompressed raw data of a given node or revision number."""
2760 2760 return self._revisiondata(nodeorrev, raw=True)
2761 2761
2762 2762 def hash(self, text, p1, p2):
2763 2763 """Compute a node hash.
2764 2764
2765 2765 Available as a function so that subclasses can replace the hash
2766 2766 as needed.
2767 2767 """
2768 2768 return storageutil.hashrevisionsha1(text, p1, p2)
2769 2769
2770 2770 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2771 2771 """Check node hash integrity.
2772 2772
2773 2773 Available as a function so that subclasses can extend hash mismatch
2774 2774 behaviors as needed.
2775 2775 """
2776 2776 try:
2777 2777 if p1 is None and p2 is None:
2778 2778 p1, p2 = self.parents(node)
2779 2779 if node != self.hash(text, p1, p2):
2780 2780 # Clear the revision cache on hash failure. The revision cache
2781 2781 # only stores the raw revision and clearing the cache does have
2782 2782 # the side-effect that we won't have a cache hit when the raw
2783 2783 # revision data is accessed. But this case should be rare and
2784 2784 # it is extra work to teach the cache about the hash
2785 2785 # verification state.
2786 2786 if (
2787 2787 self._inner._revisioncache
2788 2788 and self._inner._revisioncache[0] == node
2789 2789 ):
2790 2790 self._inner._revisioncache = None
2791 2791
2792 2792 revornode = rev
2793 2793 if revornode is None:
2794 2794 revornode = templatefilters.short(hex(node))
2795 2795 raise error.RevlogError(
2796 2796 _(b"integrity check failed on %s:%s")
2797 2797 % (self.display_id, pycompat.bytestr(revornode))
2798 2798 )
2799 2799 except error.RevlogError:
2800 2800 if self.feature_config.censorable and storageutil.iscensoredtext(
2801 2801 text
2802 2802 ):
2803 2803 raise error.CensoredNodeError(self.display_id, node, text)
2804 2804 raise
2805 2805
2806 2806 @property
2807 2807 def _split_index_file(self):
2808 2808 """the path where to expect the index of an ongoing splitting operation
2809 2809
2810 2810 The file will only exist if a splitting operation is in progress, but
2811 2811 it is always expected at the same location."""
2812 2812 parts = self.radix.split(b'/')
2813 2813 if len(parts) > 1:
2814 2814 # adds a '-s' prefix to the ``data/` or `meta/` base
2815 2815 head = parts[0] + b'-s'
2816 2816 mids = parts[1:-1]
2817 2817 tail = parts[-1] + b'.i'
2818 2818 pieces = [head] + mids + [tail]
2819 2819 return b'/'.join(pieces)
2820 2820 else:
2821 2821 # the revlog is stored at the root of the store (changelog or
2822 2822 # manifest), no risk of collision.
2823 2823 return self.radix + b'.i.s'
2824 2824
2825 2825 def _enforceinlinesize(self, tr, side_write=True):
2826 2826 """Check if the revlog is too big for inline and convert if so.
2827 2827
2828 2828 This should be called after revisions are added to the revlog. If the
2829 2829 revlog has grown too large to be an inline revlog, it will convert it
2830 2830 to use multiple index and data files.
2831 2831 """
2832 2832 tiprev = len(self) - 1
2833 2833 total_size = self.start(tiprev) + self.length(tiprev)
2834 2834 if not self._inline or (self._may_inline and total_size < _maxinline):
2835 2835 return
2836 2836
2837 2837 if self._docket is not None:
2838 2838 msg = b"inline revlog should not have a docket"
2839 2839 raise error.ProgrammingError(msg)
2840 2840
2841 2841 # In the common case, we enforce inline size because the revlog has
2842 2842 # been appened too. And in such case, it must have an initial offset
2843 2843 # recorded in the transaction.
2844 2844 troffset = tr.findoffset(self._inner.canonical_index_file)
2845 2845 pre_touched = troffset is not None
2846 2846 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2847 2847 raise error.RevlogError(
2848 2848 _(b"%s not found in the transaction") % self._indexfile
2849 2849 )
2850 2850
2851 2851 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2852 2852 tr.add(self._datafile, 0)
2853 2853
2854 2854 new_index_file_path = None
2855 2855 if side_write:
2856 2856 old_index_file_path = self._indexfile
2857 2857 new_index_file_path = self._split_index_file
2858 2858 opener = self.opener
2859 2859 weak_self = weakref.ref(self)
2860 2860
2861 2861 # the "split" index replace the real index when the transaction is
2862 2862 # finalized
2863 2863 def finalize_callback(tr):
2864 2864 opener.rename(
2865 2865 new_index_file_path,
2866 2866 old_index_file_path,
2867 2867 checkambig=True,
2868 2868 )
2869 2869 maybe_self = weak_self()
2870 2870 if maybe_self is not None:
2871 2871 maybe_self._indexfile = old_index_file_path
2872 2872 maybe_self._inner.index_file = maybe_self._indexfile
2873 2873
2874 2874 def abort_callback(tr):
2875 2875 maybe_self = weak_self()
2876 2876 if maybe_self is not None:
2877 2877 maybe_self._indexfile = old_index_file_path
2878 2878 maybe_self._inner.inline = True
2879 2879 maybe_self._inner.index_file = old_index_file_path
2880 2880
2881 2881 tr.registertmp(new_index_file_path)
2882 2882 if self.target[1] is not None:
2883 2883 callback_id = b'000-revlog-split-%d-%s' % self.target
2884 2884 else:
2885 2885 callback_id = b'000-revlog-split-%d' % self.target[0]
2886 2886 tr.addfinalize(callback_id, finalize_callback)
2887 2887 tr.addabort(callback_id, abort_callback)
2888 2888
2889 2889 self._format_flags &= ~FLAG_INLINE_DATA
2890 2890 self._inner.split_inline(
2891 2891 tr,
2892 2892 self._format_flags | self._format_version,
2893 2893 new_index_file_path=new_index_file_path,
2894 2894 )
2895 2895
2896 2896 self._inline = False
2897 2897 if new_index_file_path is not None:
2898 2898 self._indexfile = new_index_file_path
2899 2899
2900 2900 nodemaputil.setup_persistent_nodemap(tr, self)
2901 2901
2902 2902 def _nodeduplicatecallback(self, transaction, node):
2903 2903 """called when trying to add a node already stored."""
2904 2904
2905 2905 @contextlib.contextmanager
2906 2906 def reading(self):
2907 2907 with self._inner.reading():
2908 2908 yield
2909 2909
2910 2910 @contextlib.contextmanager
2911 2911 def _writing(self, transaction):
2912 2912 if self._trypending:
2913 2913 msg = b'try to write in a `trypending` revlog: %s'
2914 2914 msg %= self.display_id
2915 2915 raise error.ProgrammingError(msg)
2916 2916 if self._inner.is_writing:
2917 2917 yield
2918 2918 else:
2919 2919 data_end = None
2920 2920 sidedata_end = None
2921 2921 if self._docket is not None:
2922 2922 data_end = self._docket.data_end
2923 2923 sidedata_end = self._docket.sidedata_end
2924 2924 with self._inner.writing(
2925 2925 transaction,
2926 2926 data_end=data_end,
2927 2927 sidedata_end=sidedata_end,
2928 2928 ):
2929 2929 yield
2930 2930 if self._docket is not None:
2931 2931 self._write_docket(transaction)
2932 2932
2933 2933 @property
2934 2934 def is_delaying(self):
2935 2935 return self._inner.is_delaying
2936 2936
2937 2937 def _write_docket(self, transaction):
2938 2938 """write the current docket on disk
2939 2939
2940 2940 Exist as a method to help changelog to implement transaction logic
2941 2941
2942 2942 We could also imagine using the same transaction logic for all revlog
2943 2943 since docket are cheap."""
2944 2944 self._docket.write(transaction)
2945 2945
2946 2946 def addrevision(
2947 2947 self,
2948 2948 text,
2949 2949 transaction,
2950 2950 link,
2951 2951 p1,
2952 2952 p2,
2953 2953 cachedelta=None,
2954 2954 node=None,
2955 2955 flags=REVIDX_DEFAULT_FLAGS,
2956 2956 deltacomputer=None,
2957 2957 sidedata=None,
2958 2958 ):
2959 2959 """add a revision to the log
2960 2960
2961 2961 text - the revision data to add
2962 2962 transaction - the transaction object used for rollback
2963 2963 link - the linkrev data to add
2964 2964 p1, p2 - the parent nodeids of the revision
2965 2965 cachedelta - an optional precomputed delta
2966 2966 node - nodeid of revision; typically node is not specified, and it is
2967 2967 computed by default as hash(text, p1, p2), however subclasses might
2968 2968 use different hashing method (and override checkhash() in such case)
2969 2969 flags - the known flags to set on the revision
2970 2970 deltacomputer - an optional deltacomputer instance shared between
2971 2971 multiple calls
2972 2972 """
2973 2973 if link == nullrev:
2974 2974 raise error.RevlogError(
2975 2975 _(b"attempted to add linkrev -1 to %s") % self.display_id
2976 2976 )
2977 2977
2978 2978 if sidedata is None:
2979 2979 sidedata = {}
2980 2980 elif sidedata and not self.feature_config.has_side_data:
2981 2981 raise error.ProgrammingError(
2982 2982 _(b"trying to add sidedata to a revlog who don't support them")
2983 2983 )
2984 2984
2985 2985 if flags:
2986 2986 node = node or self.hash(text, p1, p2)
2987 2987
2988 2988 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2989 2989
2990 2990 # If the flag processor modifies the revision data, ignore any provided
2991 2991 # cachedelta.
2992 2992 if rawtext != text:
2993 2993 cachedelta = None
2994 2994
2995 2995 if len(rawtext) > _maxentrysize:
2996 2996 raise error.RevlogError(
2997 2997 _(
2998 2998 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2999 2999 )
3000 3000 % (self.display_id, len(rawtext))
3001 3001 )
3002 3002
3003 3003 node = node or self.hash(rawtext, p1, p2)
3004 3004 rev = self.index.get_rev(node)
3005 3005 if rev is not None:
3006 3006 return rev
3007 3007
3008 3008 if validatehash:
3009 3009 self.checkhash(rawtext, node, p1=p1, p2=p2)
3010 3010
3011 3011 return self.addrawrevision(
3012 3012 rawtext,
3013 3013 transaction,
3014 3014 link,
3015 3015 p1,
3016 3016 p2,
3017 3017 node,
3018 3018 flags,
3019 3019 cachedelta=cachedelta,
3020 3020 deltacomputer=deltacomputer,
3021 3021 sidedata=sidedata,
3022 3022 )
3023 3023
3024 3024 def addrawrevision(
3025 3025 self,
3026 3026 rawtext,
3027 3027 transaction,
3028 3028 link,
3029 3029 p1,
3030 3030 p2,
3031 3031 node,
3032 3032 flags,
3033 3033 cachedelta=None,
3034 3034 deltacomputer=None,
3035 3035 sidedata=None,
3036 3036 ):
3037 3037 """add a raw revision with known flags, node and parents
3038 3038 useful when reusing a revision not stored in this revlog (ex: received
3039 3039 over wire, or read from an external bundle).
3040 3040 """
3041 3041 with self._writing(transaction):
3042 3042 return self._addrevision(
3043 3043 node,
3044 3044 rawtext,
3045 3045 transaction,
3046 3046 link,
3047 3047 p1,
3048 3048 p2,
3049 3049 flags,
3050 3050 cachedelta,
3051 3051 deltacomputer=deltacomputer,
3052 3052 sidedata=sidedata,
3053 3053 )
3054 3054
3055 3055 def compress(self, data):
3056 3056 return self._inner.compress(data)
3057 3057
3058 3058 def decompress(self, data):
3059 3059 return self._inner.decompress(data)
3060 3060
3061 3061 def _addrevision(
3062 3062 self,
3063 3063 node,
3064 3064 rawtext,
3065 3065 transaction,
3066 3066 link,
3067 3067 p1,
3068 3068 p2,
3069 3069 flags,
3070 3070 cachedelta,
3071 3071 alwayscache=False,
3072 3072 deltacomputer=None,
3073 3073 sidedata=None,
3074 3074 ):
3075 3075 """internal function to add revisions to the log
3076 3076
3077 3077 see addrevision for argument descriptions.
3078 3078
3079 3079 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3080 3080
3081 3081 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3082 3082 be used.
3083 3083
3084 3084 invariants:
3085 3085 - rawtext is optional (can be None); if not set, cachedelta must be set.
3086 3086 if both are set, they must correspond to each other.
3087 3087 """
3088 3088 if node == self.nullid:
3089 3089 raise error.RevlogError(
3090 3090 _(b"%s: attempt to add null revision") % self.display_id
3091 3091 )
3092 3092 if (
3093 3093 node == self.nodeconstants.wdirid
3094 3094 or node in self.nodeconstants.wdirfilenodeids
3095 3095 ):
3096 3096 raise error.RevlogError(
3097 3097 _(b"%s: attempt to add wdir revision") % self.display_id
3098 3098 )
3099 3099 if self._inner._writinghandles is None:
3100 3100 msg = b'adding revision outside `revlog._writing` context'
3101 3101 raise error.ProgrammingError(msg)
3102 3102
3103 3103 btext = [rawtext]
3104 3104
3105 3105 curr = len(self)
3106 3106 prev = curr - 1
3107 3107
3108 3108 offset = self._get_data_offset(prev)
3109 3109
3110 3110 if self._concurrencychecker:
3111 3111 ifh, dfh, sdfh = self._inner._writinghandles
3112 3112 # XXX no checking for the sidedata file
3113 3113 if self._inline:
3114 3114 # offset is "as if" it were in the .d file, so we need to add on
3115 3115 # the size of the entry metadata.
3116 3116 self._concurrencychecker(
3117 3117 ifh, self._indexfile, offset + curr * self.index.entry_size
3118 3118 )
3119 3119 else:
3120 3120 # Entries in the .i are a consistent size.
3121 3121 self._concurrencychecker(
3122 3122 ifh, self._indexfile, curr * self.index.entry_size
3123 3123 )
3124 3124 self._concurrencychecker(dfh, self._datafile, offset)
3125 3125
3126 3126 p1r, p2r = self.rev(p1), self.rev(p2)
3127 3127
3128 3128 # full versions are inserted when the needed deltas
3129 3129 # become comparable to the uncompressed text
3130 3130 if rawtext is None:
3131 3131 # need rawtext size, before changed by flag processors, which is
3132 3132 # the non-raw size. use revlog explicitly to avoid filelog's extra
3133 3133 # logic that might remove metadata size.
3134 3134 textlen = mdiff.patchedsize(
3135 3135 revlog.size(self, cachedelta[0]), cachedelta[1]
3136 3136 )
3137 3137 else:
3138 3138 textlen = len(rawtext)
3139 3139
3140 3140 if deltacomputer is None:
3141 3141 write_debug = None
3142 3142 if self.delta_config.debug_delta:
3143 3143 write_debug = transaction._report
3144 3144 deltacomputer = deltautil.deltacomputer(
3145 3145 self, write_debug=write_debug
3146 3146 )
3147 3147
3148 3148 if cachedelta is not None and len(cachedelta) == 2:
3149 3149 # If the cached delta has no information about how it should be
3150 3150 # reused, add the default reuse instruction according to the
3151 3151 # revlog's configuration.
3152 3152 if (
3153 3153 self.delta_config.general_delta
3154 3154 and self.delta_config.lazy_delta_base
3155 3155 ):
3156 3156 delta_base_reuse = DELTA_BASE_REUSE_TRY
3157 3157 else:
3158 3158 delta_base_reuse = DELTA_BASE_REUSE_NO
3159 3159 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3160 3160
3161 3161 revinfo = revlogutils.revisioninfo(
3162 3162 node,
3163 3163 p1,
3164 3164 p2,
3165 3165 btext,
3166 3166 textlen,
3167 3167 cachedelta,
3168 3168 flags,
3169 3169 )
3170 3170
3171 3171 deltainfo = deltacomputer.finddeltainfo(revinfo)
3172 3172
3173 3173 compression_mode = COMP_MODE_INLINE
3174 3174 if self._docket is not None:
3175 3175 default_comp = self._docket.default_compression_header
3176 3176 r = deltautil.delta_compression(default_comp, deltainfo)
3177 3177 compression_mode, deltainfo = r
3178 3178
3179 3179 sidedata_compression_mode = COMP_MODE_INLINE
3180 3180 if sidedata and self.feature_config.has_side_data:
3181 3181 sidedata_compression_mode = COMP_MODE_PLAIN
3182 3182 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3183 3183 sidedata_offset = self._docket.sidedata_end
3184 3184 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3185 3185 if (
3186 3186 h != b'u'
3187 3187 and comp_sidedata[0:1] != b'\0'
3188 3188 and len(comp_sidedata) < len(serialized_sidedata)
3189 3189 ):
3190 3190 assert not h
3191 3191 if (
3192 3192 comp_sidedata[0:1]
3193 3193 == self._docket.default_compression_header
3194 3194 ):
3195 3195 sidedata_compression_mode = COMP_MODE_DEFAULT
3196 3196 serialized_sidedata = comp_sidedata
3197 3197 else:
3198 3198 sidedata_compression_mode = COMP_MODE_INLINE
3199 3199 serialized_sidedata = comp_sidedata
3200 3200 else:
3201 3201 serialized_sidedata = b""
3202 3202 # Don't store the offset if the sidedata is empty, that way
3203 3203 # we can easily detect empty sidedata and they will be no different
3204 3204 # than ones we manually add.
3205 3205 sidedata_offset = 0
3206 3206
3207 3207 rank = RANK_UNKNOWN
3208 3208 if self.feature_config.compute_rank:
3209 3209 if (p1r, p2r) == (nullrev, nullrev):
3210 3210 rank = 1
3211 3211 elif p1r != nullrev and p2r == nullrev:
3212 3212 rank = 1 + self.fast_rank(p1r)
3213 3213 elif p1r == nullrev and p2r != nullrev:
3214 3214 rank = 1 + self.fast_rank(p2r)
3215 3215 else: # merge node
3216 3216 if rustdagop is not None and self.index.rust_ext_compat:
3217 3217 rank = rustdagop.rank(self.index, p1r, p2r)
3218 3218 else:
3219 3219 pmin, pmax = sorted((p1r, p2r))
3220 3220 rank = 1 + self.fast_rank(pmax)
3221 3221 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3222 3222
3223 3223 e = revlogutils.entry(
3224 3224 flags=flags,
3225 3225 data_offset=offset,
3226 3226 data_compressed_length=deltainfo.deltalen,
3227 3227 data_uncompressed_length=textlen,
3228 3228 data_compression_mode=compression_mode,
3229 3229 data_delta_base=deltainfo.base,
3230 3230 link_rev=link,
3231 3231 parent_rev_1=p1r,
3232 3232 parent_rev_2=p2r,
3233 3233 node_id=node,
3234 3234 sidedata_offset=sidedata_offset,
3235 3235 sidedata_compressed_length=len(serialized_sidedata),
3236 3236 sidedata_compression_mode=sidedata_compression_mode,
3237 3237 rank=rank,
3238 3238 )
3239 3239
3240 3240 self.index.append(e)
3241 3241 entry = self.index.entry_binary(curr)
3242 3242 if curr == 0 and self._docket is None:
3243 3243 header = self._format_flags | self._format_version
3244 3244 header = self.index.pack_header(header)
3245 3245 entry = header + entry
3246 3246 self._writeentry(
3247 3247 transaction,
3248 3248 entry,
3249 3249 deltainfo.data,
3250 3250 link,
3251 3251 offset,
3252 3252 serialized_sidedata,
3253 3253 sidedata_offset,
3254 3254 )
3255 3255
3256 3256 rawtext = btext[0]
3257 3257
3258 3258 if alwayscache and rawtext is None:
3259 3259 rawtext = deltacomputer.buildtext(revinfo)
3260 3260
3261 3261 if type(rawtext) == bytes: # only accept immutable objects
3262 3262 self._inner._revisioncache = (node, curr, rawtext)
3263 3263 self._chainbasecache[curr] = deltainfo.chainbase
3264 3264 return curr
3265 3265
3266 3266 def _get_data_offset(self, prev):
3267 3267 """Returns the current offset in the (in-transaction) data file.
3268 3268 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3269 3269 file to store that information: since sidedata can be rewritten to the
3270 3270 end of the data file within a transaction, you can have cases where, for
3271 3271 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3272 3272 to `n - 1`'s sidedata being written after `n`'s data.
3273 3273
3274 3274 TODO cache this in a docket file before getting out of experimental."""
3275 3275 if self._docket is None:
3276 3276 return self.end(prev)
3277 3277 else:
3278 3278 return self._docket.data_end
3279 3279
3280 3280 def _writeentry(
3281 3281 self,
3282 3282 transaction,
3283 3283 entry,
3284 3284 data,
3285 3285 link,
3286 3286 offset,
3287 3287 sidedata,
3288 3288 sidedata_offset,
3289 3289 ):
3290 3290 # Files opened in a+ mode have inconsistent behavior on various
3291 3291 # platforms. Windows requires that a file positioning call be made
3292 3292 # when the file handle transitions between reads and writes. See
3293 3293 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3294 3294 # platforms, Python or the platform itself can be buggy. Some versions
3295 3295 # of Solaris have been observed to not append at the end of the file
3296 3296 # if the file was seeked to before the end. See issue4943 for more.
3297 3297 #
3298 3298 # We work around this issue by inserting a seek() before writing.
3299 3299 # Note: This is likely not necessary on Python 3. However, because
3300 3300 # the file handle is reused for reads and may be seeked there, we need
3301 3301 # to be careful before changing this.
3302 3302 index_end = data_end = sidedata_end = None
3303 3303 if self._docket is not None:
3304 3304 index_end = self._docket.index_end
3305 3305 data_end = self._docket.data_end
3306 3306 sidedata_end = self._docket.sidedata_end
3307 3307
3308 3308 files_end = self._inner.write_entry(
3309 3309 transaction,
3310 3310 entry,
3311 3311 data,
3312 3312 link,
3313 3313 offset,
3314 3314 sidedata,
3315 3315 sidedata_offset,
3316 3316 index_end,
3317 3317 data_end,
3318 3318 sidedata_end,
3319 3319 )
3320 3320 self._enforceinlinesize(transaction)
3321 3321 if self._docket is not None:
3322 3322 self._docket.index_end = files_end[0]
3323 3323 self._docket.data_end = files_end[1]
3324 3324 self._docket.sidedata_end = files_end[2]
3325 3325
3326 3326 nodemaputil.setup_persistent_nodemap(transaction, self)
3327 3327
3328 3328 def addgroup(
3329 3329 self,
3330 3330 deltas,
3331 3331 linkmapper,
3332 3332 transaction,
3333 3333 alwayscache=False,
3334 3334 addrevisioncb=None,
3335 3335 duplicaterevisioncb=None,
3336 3336 debug_info=None,
3337 3337 delta_base_reuse_policy=None,
3338 3338 ):
3339 3339 """
3340 3340 add a delta group
3341 3341
3342 3342 given a set of deltas, add them to the revision log. the
3343 3343 first delta is against its parent, which should be in our
3344 3344 log, the rest are against the previous delta.
3345 3345
3346 3346 If ``addrevisioncb`` is defined, it will be called with arguments of
3347 3347 this revlog and the node that was added.
3348 3348 """
3349 3349
3350 3350 if self._adding_group:
3351 3351 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3352 3352
3353 3353 # read the default delta-base reuse policy from revlog config if the
3354 3354 # group did not specify one.
3355 3355 if delta_base_reuse_policy is None:
3356 3356 if (
3357 3357 self.delta_config.general_delta
3358 3358 and self.delta_config.lazy_delta_base
3359 3359 ):
3360 3360 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3361 3361 else:
3362 3362 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3363 3363
3364 3364 self._adding_group = True
3365 3365 empty = True
3366 3366 try:
3367 3367 with self._writing(transaction):
3368 3368 write_debug = None
3369 3369 if self.delta_config.debug_delta:
3370 3370 write_debug = transaction._report
3371 3371 deltacomputer = deltautil.deltacomputer(
3372 3372 self,
3373 3373 write_debug=write_debug,
3374 3374 debug_info=debug_info,
3375 3375 )
3376 3376 # loop through our set of deltas
3377 3377 for data in deltas:
3378 3378 (
3379 3379 node,
3380 3380 p1,
3381 3381 p2,
3382 3382 linknode,
3383 3383 deltabase,
3384 3384 delta,
3385 3385 flags,
3386 3386 sidedata,
3387 3387 ) = data
3388 3388 link = linkmapper(linknode)
3389 3389 flags = flags or REVIDX_DEFAULT_FLAGS
3390 3390
3391 3391 rev = self.index.get_rev(node)
3392 3392 if rev is not None:
3393 3393 # this can happen if two branches make the same change
3394 3394 self._nodeduplicatecallback(transaction, rev)
3395 3395 if duplicaterevisioncb:
3396 3396 duplicaterevisioncb(self, rev)
3397 3397 empty = False
3398 3398 continue
3399 3399
3400 3400 for p in (p1, p2):
3401 3401 if not self.index.has_node(p):
3402 3402 raise error.LookupError(
3403 3403 p, self.radix, _(b'unknown parent')
3404 3404 )
3405 3405
3406 3406 if not self.index.has_node(deltabase):
3407 3407 raise error.LookupError(
3408 3408 deltabase, self.display_id, _(b'unknown delta base')
3409 3409 )
3410 3410
3411 3411 baserev = self.rev(deltabase)
3412 3412
3413 3413 if baserev != nullrev and self.iscensored(baserev):
3414 3414 # if base is censored, delta must be full replacement in a
3415 3415 # single patch operation
3416 3416 hlen = struct.calcsize(b">lll")
3417 3417 oldlen = self.rawsize(baserev)
3418 3418 newlen = len(delta) - hlen
3419 3419 if delta[:hlen] != mdiff.replacediffheader(
3420 3420 oldlen, newlen
3421 3421 ):
3422 3422 raise error.CensoredBaseError(
3423 3423 self.display_id, self.node(baserev)
3424 3424 )
3425 3425
3426 3426 if not flags and self._peek_iscensored(baserev, delta):
3427 3427 flags |= REVIDX_ISCENSORED
3428 3428
3429 3429 # We assume consumers of addrevisioncb will want to retrieve
3430 3430 # the added revision, which will require a call to
3431 3431 # revision(). revision() will fast path if there is a cache
3432 3432 # hit. So, we tell _addrevision() to always cache in this case.
3433 3433 # We're only using addgroup() in the context of changegroup
3434 3434 # generation so the revision data can always be handled as raw
3435 3435 # by the flagprocessor.
3436 3436 rev = self._addrevision(
3437 3437 node,
3438 3438 None,
3439 3439 transaction,
3440 3440 link,
3441 3441 p1,
3442 3442 p2,
3443 3443 flags,
3444 3444 (baserev, delta, delta_base_reuse_policy),
3445 3445 alwayscache=alwayscache,
3446 3446 deltacomputer=deltacomputer,
3447 3447 sidedata=sidedata,
3448 3448 )
3449 3449
3450 3450 if addrevisioncb:
3451 3451 addrevisioncb(self, rev)
3452 3452 empty = False
3453 3453 finally:
3454 3454 self._adding_group = False
3455 3455 return not empty
3456 3456
3457 3457 def iscensored(self, rev):
3458 3458 """Check if a file revision is censored."""
3459 3459 if not self.feature_config.censorable:
3460 3460 return False
3461 3461
3462 3462 return self.flags(rev) & REVIDX_ISCENSORED
3463 3463
3464 3464 def _peek_iscensored(self, baserev, delta):
3465 3465 """Quickly check if a delta produces a censored revision."""
3466 3466 if not self.feature_config.censorable:
3467 3467 return False
3468 3468
3469 3469 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3470 3470
3471 3471 def getstrippoint(self, minlink):
3472 3472 """find the minimum rev that must be stripped to strip the linkrev
3473 3473
3474 3474 Returns a tuple containing the minimum rev and a set of all revs that
3475 3475 have linkrevs that will be broken by this strip.
3476 3476 """
3477 3477 return storageutil.resolvestripinfo(
3478 3478 minlink,
3479 3479 len(self) - 1,
3480 3480 self.headrevs(),
3481 3481 self.linkrev,
3482 3482 self.parentrevs,
3483 3483 )
3484 3484
3485 3485 def strip(self, minlink, transaction):
3486 3486 """truncate the revlog on the first revision with a linkrev >= minlink
3487 3487
3488 3488 This function is called when we're stripping revision minlink and
3489 3489 its descendants from the repository.
3490 3490
3491 3491 We have to remove all revisions with linkrev >= minlink, because
3492 3492 the equivalent changelog revisions will be renumbered after the
3493 3493 strip.
3494 3494
3495 3495 So we truncate the revlog on the first of these revisions, and
3496 3496 trust that the caller has saved the revisions that shouldn't be
3497 3497 removed and that it'll re-add them after this truncation.
3498 3498 """
3499 3499 if len(self) == 0:
3500 3500 return
3501 3501
3502 3502 rev, _ = self.getstrippoint(minlink)
3503 3503 if rev == len(self):
3504 3504 return
3505 3505
3506 3506 # first truncate the files on disk
3507 3507 data_end = self.start(rev)
3508 3508 if not self._inline:
3509 3509 transaction.add(self._datafile, data_end)
3510 3510 end = rev * self.index.entry_size
3511 3511 else:
3512 3512 end = data_end + (rev * self.index.entry_size)
3513 3513
3514 3514 if self._sidedatafile:
3515 3515 sidedata_end = self.sidedata_cut_off(rev)
3516 3516 transaction.add(self._sidedatafile, sidedata_end)
3517 3517
3518 3518 transaction.add(self._indexfile, end)
3519 3519 if self._docket is not None:
3520 3520 # XXX we could, leverage the docket while stripping. However it is
3521 3521 # not powerfull enough at the time of this comment
3522 3522 self._docket.index_end = end
3523 3523 self._docket.data_end = data_end
3524 3524 self._docket.sidedata_end = sidedata_end
3525 3525 self._docket.write(transaction, stripping=True)
3526 3526
3527 3527 # then reset internal state in memory to forget those revisions
3528 3528 self._chaininfocache = util.lrucachedict(500)
3529 3529 self._inner.clear_cache()
3530 3530
3531 3531 del self.index[rev:-1]
3532 3532
3533 3533 def checksize(self):
3534 3534 """Check size of index and data files
3535 3535
3536 3536 return a (dd, di) tuple.
3537 3537 - dd: extra bytes for the "data" file
3538 3538 - di: extra bytes for the "index" file
3539 3539
3540 3540 A healthy revlog will return (0, 0).
3541 3541 """
3542 3542 expected = 0
3543 3543 if len(self):
3544 3544 expected = max(0, self.end(len(self) - 1))
3545 3545
3546 3546 try:
3547 3547 with self._datafp() as f:
3548 3548 f.seek(0, io.SEEK_END)
3549 3549 actual = f.tell()
3550 3550 dd = actual - expected
3551 3551 except FileNotFoundError:
3552 3552 dd = 0
3553 3553
3554 3554 try:
3555 3555 f = self.opener(self._indexfile)
3556 3556 f.seek(0, io.SEEK_END)
3557 3557 actual = f.tell()
3558 3558 f.close()
3559 3559 s = self.index.entry_size
3560 3560 i = max(0, actual // s)
3561 3561 di = actual - (i * s)
3562 3562 if self._inline:
3563 3563 databytes = 0
3564 3564 for r in self:
3565 3565 databytes += max(0, self.length(r))
3566 3566 dd = 0
3567 3567 di = actual - len(self) * s - databytes
3568 3568 except FileNotFoundError:
3569 3569 di = 0
3570 3570
3571 3571 return (dd, di)
3572 3572
3573 3573 def files(self):
3574 3574 """return list of files that compose this revlog"""
3575 3575 res = [self._indexfile]
3576 3576 if self._docket_file is None:
3577 3577 if not self._inline:
3578 3578 res.append(self._datafile)
3579 3579 else:
3580 3580 res.append(self._docket_file)
3581 3581 res.extend(self._docket.old_index_filepaths(include_empty=False))
3582 3582 if self._docket.data_end:
3583 3583 res.append(self._datafile)
3584 3584 res.extend(self._docket.old_data_filepaths(include_empty=False))
3585 3585 if self._docket.sidedata_end:
3586 3586 res.append(self._sidedatafile)
3587 3587 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3588 3588 return res
3589 3589
3590 3590 def emitrevisions(
3591 3591 self,
3592 3592 nodes,
3593 3593 nodesorder=None,
3594 3594 revisiondata=False,
3595 3595 assumehaveparentrevisions=False,
3596 3596 deltamode=repository.CG_DELTAMODE_STD,
3597 3597 sidedata_helpers=None,
3598 3598 debug_info=None,
3599 3599 ):
3600 3600 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3601 3601 raise error.ProgrammingError(
3602 3602 b'unhandled value for nodesorder: %s' % nodesorder
3603 3603 )
3604 3604
3605 3605 if nodesorder is None and not self.delta_config.general_delta:
3606 3606 nodesorder = b'storage'
3607 3607
3608 3608 if (
3609 3609 not self._storedeltachains
3610 3610 and deltamode != repository.CG_DELTAMODE_PREV
3611 3611 ):
3612 3612 deltamode = repository.CG_DELTAMODE_FULL
3613 3613
3614 3614 return storageutil.emitrevisions(
3615 3615 self,
3616 3616 nodes,
3617 3617 nodesorder,
3618 3618 revlogrevisiondelta,
3619 3619 deltaparentfn=self.deltaparent,
3620 3620 candeltafn=self._candelta,
3621 3621 rawsizefn=self.rawsize,
3622 3622 revdifffn=self.revdiff,
3623 3623 flagsfn=self.flags,
3624 3624 deltamode=deltamode,
3625 3625 revisiondata=revisiondata,
3626 3626 assumehaveparentrevisions=assumehaveparentrevisions,
3627 3627 sidedata_helpers=sidedata_helpers,
3628 3628 debug_info=debug_info,
3629 3629 )
3630 3630
3631 3631 DELTAREUSEALWAYS = b'always'
3632 3632 DELTAREUSESAMEREVS = b'samerevs'
3633 3633 DELTAREUSENEVER = b'never'
3634 3634
3635 3635 DELTAREUSEFULLADD = b'fulladd'
3636 3636
3637 3637 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3638 3638
3639 3639 def clone(
3640 3640 self,
3641 3641 tr,
3642 3642 destrevlog,
3643 3643 addrevisioncb=None,
3644 3644 deltareuse=DELTAREUSESAMEREVS,
3645 3645 forcedeltabothparents=None,
3646 3646 sidedata_helpers=None,
3647 3647 ):
3648 3648 """Copy this revlog to another, possibly with format changes.
3649 3649
3650 3650 The destination revlog will contain the same revisions and nodes.
3651 3651 However, it may not be bit-for-bit identical due to e.g. delta encoding
3652 3652 differences.
3653 3653
3654 3654 The ``deltareuse`` argument control how deltas from the existing revlog
3655 3655 are preserved in the destination revlog. The argument can have the
3656 3656 following values:
3657 3657
3658 3658 DELTAREUSEALWAYS
3659 3659 Deltas will always be reused (if possible), even if the destination
3660 3660 revlog would not select the same revisions for the delta. This is the
3661 3661 fastest mode of operation.
3662 3662 DELTAREUSESAMEREVS
3663 3663 Deltas will be reused if the destination revlog would pick the same
3664 3664 revisions for the delta. This mode strikes a balance between speed
3665 3665 and optimization.
3666 3666 DELTAREUSENEVER
3667 3667 Deltas will never be reused. This is the slowest mode of execution.
3668 3668 This mode can be used to recompute deltas (e.g. if the diff/delta
3669 3669 algorithm changes).
3670 3670 DELTAREUSEFULLADD
3671 3671 Revision will be re-added as if their were new content. This is
3672 3672 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3673 3673 eg: large file detection and handling.
3674 3674
3675 3675 Delta computation can be slow, so the choice of delta reuse policy can
3676 3676 significantly affect run time.
3677 3677
3678 3678 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3679 3679 two extremes. Deltas will be reused if they are appropriate. But if the
3680 3680 delta could choose a better revision, it will do so. This means if you
3681 3681 are converting a non-generaldelta revlog to a generaldelta revlog,
3682 3682 deltas will be recomputed if the delta's parent isn't a parent of the
3683 3683 revision.
3684 3684
3685 3685 In addition to the delta policy, the ``forcedeltabothparents``
3686 3686 argument controls whether to force compute deltas against both parents
3687 3687 for merges. By default, the current default is used.
3688 3688
3689 3689 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3690 3690 `sidedata_helpers`.
3691 3691 """
3692 3692 if deltareuse not in self.DELTAREUSEALL:
3693 3693 raise ValueError(
3694 3694 _(b'value for deltareuse invalid: %s') % deltareuse
3695 3695 )
3696 3696
3697 3697 if len(destrevlog):
3698 3698 raise ValueError(_(b'destination revlog is not empty'))
3699 3699
3700 3700 if getattr(self, 'filteredrevs', None):
3701 3701 raise ValueError(_(b'source revlog has filtered revisions'))
3702 3702 if getattr(destrevlog, 'filteredrevs', None):
3703 3703 raise ValueError(_(b'destination revlog has filtered revisions'))
3704 3704
3705 3705 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3706 3706 # if possible.
3707 3707 old_delta_config = destrevlog.delta_config
3708 3708 destrevlog.delta_config = destrevlog.delta_config.copy()
3709 3709
3710 3710 try:
3711 3711 if deltareuse == self.DELTAREUSEALWAYS:
3712 3712 destrevlog.delta_config.lazy_delta_base = True
3713 3713 destrevlog.delta_config.lazy_delta = True
3714 3714 elif deltareuse == self.DELTAREUSESAMEREVS:
3715 3715 destrevlog.delta_config.lazy_delta_base = False
3716 3716 destrevlog.delta_config.lazy_delta = True
3717 3717 elif deltareuse == self.DELTAREUSENEVER:
3718 3718 destrevlog.delta_config.lazy_delta_base = False
3719 3719 destrevlog.delta_config.lazy_delta = False
3720 3720
3721 3721 delta_both_parents = (
3722 3722 forcedeltabothparents or old_delta_config.delta_both_parents
3723 3723 )
3724 3724 destrevlog.delta_config.delta_both_parents = delta_both_parents
3725 3725
3726 3726 with self.reading(), destrevlog._writing(tr):
3727 3727 self._clone(
3728 3728 tr,
3729 3729 destrevlog,
3730 3730 addrevisioncb,
3731 3731 deltareuse,
3732 3732 forcedeltabothparents,
3733 3733 sidedata_helpers,
3734 3734 )
3735 3735
3736 3736 finally:
3737 3737 destrevlog.delta_config = old_delta_config
3738 3738
3739 3739 def _clone(
3740 3740 self,
3741 3741 tr,
3742 3742 destrevlog,
3743 3743 addrevisioncb,
3744 3744 deltareuse,
3745 3745 forcedeltabothparents,
3746 3746 sidedata_helpers,
3747 3747 ):
3748 3748 """perform the core duty of `revlog.clone` after parameter processing"""
3749 3749 write_debug = None
3750 3750 if self.delta_config.debug_delta:
3751 3751 write_debug = tr._report
3752 3752 deltacomputer = deltautil.deltacomputer(
3753 3753 destrevlog,
3754 3754 write_debug=write_debug,
3755 3755 )
3756 3756 index = self.index
3757 3757 for rev in self:
3758 3758 entry = index[rev]
3759 3759
3760 3760 # Some classes override linkrev to take filtered revs into
3761 3761 # account. Use raw entry from index.
3762 3762 flags = entry[0] & 0xFFFF
3763 3763 linkrev = entry[4]
3764 3764 p1 = index[entry[5]][7]
3765 3765 p2 = index[entry[6]][7]
3766 3766 node = entry[7]
3767 3767
3768 3768 # (Possibly) reuse the delta from the revlog if allowed and
3769 3769 # the revlog chunk is a delta.
3770 3770 cachedelta = None
3771 3771 rawtext = None
3772 3772 if deltareuse == self.DELTAREUSEFULLADD:
3773 3773 text = self._revisiondata(rev)
3774 3774 sidedata = self.sidedata(rev)
3775 3775
3776 3776 if sidedata_helpers is not None:
3777 3777 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3778 3778 self, sidedata_helpers, sidedata, rev
3779 3779 )
3780 3780 flags = flags | new_flags[0] & ~new_flags[1]
3781 3781
3782 3782 destrevlog.addrevision(
3783 3783 text,
3784 3784 tr,
3785 3785 linkrev,
3786 3786 p1,
3787 3787 p2,
3788 3788 cachedelta=cachedelta,
3789 3789 node=node,
3790 3790 flags=flags,
3791 3791 deltacomputer=deltacomputer,
3792 3792 sidedata=sidedata,
3793 3793 )
3794 3794 else:
3795 3795 if destrevlog.delta_config.lazy_delta:
3796 3796 dp = self.deltaparent(rev)
3797 3797 if dp != nullrev:
3798 3798 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3799 3799
3800 3800 sidedata = None
3801 3801 if not cachedelta:
3802 3802 try:
3803 3803 rawtext = self._revisiondata(rev)
3804 3804 except error.CensoredNodeError as censored:
3805 3805 assert flags & REVIDX_ISCENSORED
3806 3806 rawtext = censored.tombstone
3807 3807 sidedata = self.sidedata(rev)
3808 3808 if sidedata is None:
3809 3809 sidedata = self.sidedata(rev)
3810 3810
3811 3811 if sidedata_helpers is not None:
3812 3812 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3813 3813 self, sidedata_helpers, sidedata, rev
3814 3814 )
3815 3815 flags = flags | new_flags[0] & ~new_flags[1]
3816 3816
3817 3817 destrevlog._addrevision(
3818 3818 node,
3819 3819 rawtext,
3820 3820 tr,
3821 3821 linkrev,
3822 3822 p1,
3823 3823 p2,
3824 3824 flags,
3825 3825 cachedelta,
3826 3826 deltacomputer=deltacomputer,
3827 3827 sidedata=sidedata,
3828 3828 )
3829 3829
3830 3830 if addrevisioncb:
3831 3831 addrevisioncb(self, rev, node)
3832 3832
3833 3833 def censorrevision(self, tr, censornode, tombstone=b''):
3834 3834 if self._format_version == REVLOGV0:
3835 3835 raise error.RevlogError(
3836 3836 _(b'cannot censor with version %d revlogs')
3837 3837 % self._format_version
3838 3838 )
3839 3839 elif self._format_version == REVLOGV1:
3840 3840 rewrite.v1_censor(self, tr, censornode, tombstone)
3841 3841 else:
3842 3842 rewrite.v2_censor(self, tr, censornode, tombstone)
3843 3843
3844 3844 def verifyintegrity(self, state):
3845 3845 """Verifies the integrity of the revlog.
3846 3846
3847 3847 Yields ``revlogproblem`` instances describing problems that are
3848 3848 found.
3849 3849 """
3850 3850 dd, di = self.checksize()
3851 3851 if dd:
3852 3852 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3853 3853 if di:
3854 3854 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3855 3855
3856 3856 version = self._format_version
3857 3857
3858 3858 # The verifier tells us what version revlog we should be.
3859 3859 if version != state[b'expectedversion']:
3860 3860 yield revlogproblem(
3861 3861 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3862 3862 % (self.display_id, version, state[b'expectedversion'])
3863 3863 )
3864 3864
3865 3865 state[b'skipread'] = set()
3866 3866 state[b'safe_renamed'] = set()
3867 3867
3868 3868 for rev in self:
3869 3869 node = self.node(rev)
3870 3870
3871 3871 # Verify contents. 4 cases to care about:
3872 3872 #
3873 3873 # common: the most common case
3874 3874 # rename: with a rename
3875 3875 # meta: file content starts with b'\1\n', the metadata
3876 3876 # header defined in filelog.py, but without a rename
3877 3877 # ext: content stored externally
3878 3878 #
3879 3879 # More formally, their differences are shown below:
3880 3880 #
3881 3881 # | common | rename | meta | ext
3882 3882 # -------------------------------------------------------
3883 3883 # flags() | 0 | 0 | 0 | not 0
3884 3884 # renamed() | False | True | False | ?
3885 3885 # rawtext[0:2]=='\1\n'| False | True | True | ?
3886 3886 #
3887 3887 # "rawtext" means the raw text stored in revlog data, which
3888 3888 # could be retrieved by "rawdata(rev)". "text"
3889 3889 # mentioned below is "revision(rev)".
3890 3890 #
3891 3891 # There are 3 different lengths stored physically:
3892 3892 # 1. L1: rawsize, stored in revlog index
3893 3893 # 2. L2: len(rawtext), stored in revlog data
3894 3894 # 3. L3: len(text), stored in revlog data if flags==0, or
3895 3895 # possibly somewhere else if flags!=0
3896 3896 #
3897 3897 # L1 should be equal to L2. L3 could be different from them.
3898 3898 # "text" may or may not affect commit hash depending on flag
3899 3899 # processors (see flagutil.addflagprocessor).
3900 3900 #
3901 3901 # | common | rename | meta | ext
3902 3902 # -------------------------------------------------
3903 3903 # rawsize() | L1 | L1 | L1 | L1
3904 3904 # size() | L1 | L2-LM | L1(*) | L1 (?)
3905 3905 # len(rawtext) | L2 | L2 | L2 | L2
3906 3906 # len(text) | L2 | L2 | L2 | L3
3907 3907 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3908 3908 #
3909 3909 # LM: length of metadata, depending on rawtext
3910 3910 # (*): not ideal, see comment in filelog.size
3911 3911 # (?): could be "- len(meta)" if the resolved content has
3912 3912 # rename metadata
3913 3913 #
3914 3914 # Checks needed to be done:
3915 3915 # 1. length check: L1 == L2, in all cases.
3916 3916 # 2. hash check: depending on flag processor, we may need to
3917 3917 # use either "text" (external), or "rawtext" (in revlog).
3918 3918
3919 3919 try:
3920 3920 skipflags = state.get(b'skipflags', 0)
3921 3921 if skipflags:
3922 3922 skipflags &= self.flags(rev)
3923 3923
3924 3924 _verify_revision(self, skipflags, state, node)
3925 3925
3926 3926 l1 = self.rawsize(rev)
3927 3927 l2 = len(self.rawdata(node))
3928 3928
3929 3929 if l1 != l2:
3930 3930 yield revlogproblem(
3931 3931 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3932 3932 node=node,
3933 3933 )
3934 3934
3935 3935 except error.CensoredNodeError:
3936 3936 if state[b'erroroncensored']:
3937 3937 yield revlogproblem(
3938 3938 error=_(b'censored file data'), node=node
3939 3939 )
3940 3940 state[b'skipread'].add(node)
3941 3941 except Exception as e:
3942 3942 yield revlogproblem(
3943 3943 error=_(b'unpacking %s: %s')
3944 3944 % (short(node), stringutil.forcebytestr(e)),
3945 3945 node=node,
3946 3946 )
3947 3947 state[b'skipread'].add(node)
3948 3948
3949 3949 def storageinfo(
3950 3950 self,
3951 3951 exclusivefiles=False,
3952 3952 sharedfiles=False,
3953 3953 revisionscount=False,
3954 3954 trackedsize=False,
3955 3955 storedsize=False,
3956 3956 ):
3957 3957 d = {}
3958 3958
3959 3959 if exclusivefiles:
3960 3960 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3961 3961 if not self._inline:
3962 3962 d[b'exclusivefiles'].append((self.opener, self._datafile))
3963 3963
3964 3964 if sharedfiles:
3965 3965 d[b'sharedfiles'] = []
3966 3966
3967 3967 if revisionscount:
3968 3968 d[b'revisionscount'] = len(self)
3969 3969
3970 3970 if trackedsize:
3971 3971 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3972 3972
3973 3973 if storedsize:
3974 3974 d[b'storedsize'] = sum(
3975 3975 self.opener.stat(path).st_size for path in self.files()
3976 3976 )
3977 3977
3978 3978 return d
3979 3979
3980 3980 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3981 3981 if not self.feature_config.has_side_data:
3982 3982 return
3983 3983 # revlog formats with sidedata support does not support inline
3984 3984 assert not self._inline
3985 3985 if not helpers[1] and not helpers[2]:
3986 3986 # Nothing to generate or remove
3987 3987 return
3988 3988
3989 3989 new_entries = []
3990 3990 # append the new sidedata
3991 3991 with self._writing(transaction):
3992 3992 ifh, dfh, sdfh = self._inner._writinghandles
3993 3993 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3994 3994
3995 3995 current_offset = sdfh.tell()
3996 3996 for rev in range(startrev, endrev + 1):
3997 3997 entry = self.index[rev]
3998 3998 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3999 3999 store=self,
4000 4000 sidedata_helpers=helpers,
4001 4001 sidedata={},
4002 4002 rev=rev,
4003 4003 )
4004 4004
4005 4005 serialized_sidedata = sidedatautil.serialize_sidedata(
4006 4006 new_sidedata
4007 4007 )
4008 4008
4009 4009 sidedata_compression_mode = COMP_MODE_INLINE
4010 4010 if serialized_sidedata and self.feature_config.has_side_data:
4011 4011 sidedata_compression_mode = COMP_MODE_PLAIN
4012 4012 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4013 4013 if (
4014 4014 h != b'u'
4015 4015 and comp_sidedata[0] != b'\0'
4016 4016 and len(comp_sidedata) < len(serialized_sidedata)
4017 4017 ):
4018 4018 assert not h
4019 4019 if (
4020 4020 comp_sidedata[0]
4021 4021 == self._docket.default_compression_header
4022 4022 ):
4023 4023 sidedata_compression_mode = COMP_MODE_DEFAULT
4024 4024 serialized_sidedata = comp_sidedata
4025 4025 else:
4026 4026 sidedata_compression_mode = COMP_MODE_INLINE
4027 4027 serialized_sidedata = comp_sidedata
4028 4028 if entry[8] != 0 or entry[9] != 0:
4029 4029 # rewriting entries that already have sidedata is not
4030 4030 # supported yet, because it introduces garbage data in the
4031 4031 # revlog.
4032 4032 msg = b"rewriting existing sidedata is not supported yet"
4033 4033 raise error.Abort(msg)
4034 4034
4035 4035 # Apply (potential) flags to add and to remove after running
4036 4036 # the sidedata helpers
4037 4037 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4038 4038 entry_update = (
4039 4039 current_offset,
4040 4040 len(serialized_sidedata),
4041 4041 new_offset_flags,
4042 4042 sidedata_compression_mode,
4043 4043 )
4044 4044
4045 4045 # the sidedata computation might have move the file cursors around
4046 4046 sdfh.seek(current_offset, os.SEEK_SET)
4047 4047 sdfh.write(serialized_sidedata)
4048 4048 new_entries.append(entry_update)
4049 4049 current_offset += len(serialized_sidedata)
4050 4050 self._docket.sidedata_end = sdfh.tell()
4051 4051
4052 4052 # rewrite the new index entries
4053 4053 ifh.seek(startrev * self.index.entry_size)
4054 4054 for i, e in enumerate(new_entries):
4055 4055 rev = startrev + i
4056 4056 self.index.replace_sidedata_info(rev, *e)
4057 4057 packed = self.index.entry_binary(rev)
4058 4058 if rev == 0 and self._docket is None:
4059 4059 header = self._format_flags | self._format_version
4060 4060 header = self.index.pack_header(header)
4061 4061 packed = header + packed
4062 4062 ifh.write(packed)
@@ -1,60 +1,62 b''
1 1 import unittest
2 2
3 3 # picked from test-parse-index2, copied rather than imported
4 4 # so that it stays stable even if test-parse-index2 changes or disappears.
5 5 data_non_inlined = (
6 6 b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19'
7 7 b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff'
8 8 b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d'
9 9 b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00'
10 10 b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00'
11 11 b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff'
12 12 b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh'
13 13 b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
14 14 b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00'
15 15 b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n'
16 16 b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00'
17 17 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F'
18 18 b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01'
19 19 b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1'
20 20 b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00'
21 21 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00'
22 22 )
23 23
24 24 from ..revlogutils.constants import REVLOGV1
25 25
26 26
27 27 try:
28 28 from ..cext import parsers as cparsers # pytype: disable=import-error
29 29 except ImportError:
30 30 cparsers = None
31 31
32 32 try:
33 from ..rustext.revlog import MixedIndex # pytype: disable=import-error
33 from ..rustext.revlog import ( # pytype: disable=import-error
34 Index as RustIndex,
35 )
34 36 except ImportError:
35 MixedIndex = None
37 RustIndex = None
36 38
37 39
38 40 @unittest.skipIf(
39 41 cparsers is None,
40 42 'The C version of the "parsers" module is not available. It is needed for this test.',
41 43 )
42 44 class RevlogBasedTestBase(unittest.TestCase):
43 45 def parseindex(self, data=None):
44 46 if data is None:
45 47 data = data_non_inlined
46 48 return cparsers.parse_index2(data, False)[0]
47 49
48 50
49 51 @unittest.skipIf(
50 MixedIndex is None,
52 RustIndex is None,
51 53 'The Rust index is not available. It is needed for this test.',
52 54 )
53 55 class RustRevlogBasedTestBase(unittest.TestCase):
54 56 def parserustindex(self, data=None):
55 57 if data is None:
56 58 data = data_non_inlined
57 59 # not inheriting RevlogBasedTestCase to avoid having a
58 60 # `parseindex` method that would be shadowed by future subclasses
59 61 # this duplication will soon be removed
60 return MixedIndex(data, REVLOGV1)
62 return RustIndex(data, REVLOGV1)
@@ -1,1119 +1,1119 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 PyRevision,
12 12 };
13 13 use cpython::{
14 14 buffer::{Element, PyBuffer},
15 15 exc::{IndexError, ValueError},
16 16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
17 17 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
18 18 PythonObject, ToPyObject, UnsafePyLeaked,
19 19 };
20 20 use hg::{
21 21 errors::HgError,
22 22 index::{
23 23 IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
24 24 INDEX_ENTRY_SIZE,
25 25 },
26 26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
27 27 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
28 28 BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
29 29 };
30 30 use std::{cell::RefCell, collections::HashMap};
31 31 use vcsgraph::graph::Graph as VCSGraph;
32 32
33 33 pub struct PySharedIndex {
34 34 /// The underlying hg-core index
35 35 pub(crate) inner: &'static hg::index::Index,
36 36 }
37 37
38 38 /// Return a Struct implementing the Graph trait
39 39 pub(crate) fn py_rust_index_to_graph(
40 40 py: Python,
41 41 index: PyObject,
42 42 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
43 let midx = index.extract::<MixedIndex>(py)?;
43 let midx = index.extract::<Index>(py)?;
44 44 let leaked = midx.index(py).leak_immutable();
45 45 Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
46 46 }
47 47
48 48 impl Clone for PySharedIndex {
49 49 fn clone(&self) -> Self {
50 50 Self { inner: self.inner }
51 51 }
52 52 }
53 53
54 54 impl Graph for PySharedIndex {
55 55 fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> {
56 56 self.inner.parents(rev)
57 57 }
58 58 }
59 59
60 60 impl VCSGraph for PySharedIndex {
61 61 fn parents(
62 62 &self,
63 63 rev: BaseRevision,
64 64 ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError>
65 65 {
66 66 // FIXME This trait should be reworked to decide between Revision
67 67 // and UncheckedRevision, get better errors names, etc.
68 68 match Graph::parents(self, Revision(rev)) {
69 69 Ok(parents) => {
70 70 Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0]))
71 71 }
72 72 Err(hg::GraphError::ParentOutOfRange(rev)) => {
73 73 Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0))
74 74 }
75 75 }
76 76 }
77 77 }
78 78
79 79 impl RevlogIndex for PySharedIndex {
80 80 fn len(&self) -> usize {
81 81 self.inner.len()
82 82 }
83 83 fn node(&self, rev: Revision) -> Option<&Node> {
84 84 self.inner.node(rev)
85 85 }
86 86 }
87 87
88 py_class!(pub class MixedIndex |py| {
88 py_class!(pub class Index |py| {
89 89 @shared data index: hg::index::Index;
90 90 data nt: RefCell<Option<CoreNodeTree>>;
91 91 data docket: RefCell<Option<PyObject>>;
92 92 // Holds a reference to the mmap'ed persistent nodemap data
93 93 data nodemap_mmap: RefCell<Option<PyBuffer>>;
94 94 // Holds a reference to the mmap'ed persistent index data
95 95 data index_mmap: RefCell<Option<PyBuffer>>;
96 96
97 97 def __new__(
98 98 _cls,
99 99 data: PyObject,
100 100 default_header: u32,
101 ) -> PyResult<MixedIndex> {
101 ) -> PyResult<Self> {
102 102 Self::new(py, data, default_header)
103 103 }
104 104
105 105 /// Compatibility layer used for Python consumers needing access to the C index
106 106 ///
107 107 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
108 108 /// that may need to build a custom `nodetree`, based on a specified revset.
109 109 /// With a Rust implementation of the nodemap, we will be able to get rid of
110 110 /// this, by exposing our own standalone nodemap class,
111 111 /// ready to accept `Index`.
112 112 /* def get_cindex(&self) -> PyResult<PyObject> {
113 113 Ok(self.cindex(py).borrow().inner().clone_ref(py))
114 114 }
115 115 */
116 116 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
117 117
118 118 /// Return Revision if found, raises a bare `error.RevlogError`
119 119 /// in case of ambiguity, same as C version does
120 120 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
121 121 let opt = self.get_nodetree(py)?.borrow();
122 122 let nt = opt.as_ref().unwrap();
123 123 let ridx = &*self.index(py).borrow();
124 124 let node = node_from_py_bytes(py, &node)?;
125 125 let rust_rev =
126 126 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
127 127 Ok(rust_rev.map(Into::into))
128 128
129 129 }
130 130
131 131 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
132 132 /// is not found.
133 133 ///
134 134 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
135 135 /// will catch and rewrap with it
136 136 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
137 137 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
138 138 }
139 139
140 140 /// return True if the node exist in the index
141 141 def has_node(&self, node: PyBytes) -> PyResult<bool> {
142 142 // TODO OPTIM we could avoid a needless conversion here,
143 143 // to do when scaffolding for pure Rust switch is removed,
144 144 // as `get_rev()` currently does the necessary assertions
145 145 self.get_rev(py, node).map(|opt| opt.is_some())
146 146 }
147 147
148 148 /// find length of shortest hex nodeid of a binary ID
149 149 def shortest(&self, node: PyBytes) -> PyResult<usize> {
150 150 let opt = self.get_nodetree(py)?.borrow();
151 151 let nt = opt.as_ref().unwrap();
152 152 let idx = &*self.index(py).borrow();
153 153 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
154 154 {
155 155 Ok(Some(l)) => Ok(l),
156 156 Ok(None) => Err(revlog_error(py)),
157 157 Err(e) => Err(nodemap_error(py, e)),
158 158 }
159 159 }
160 160
161 161 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
162 162 let opt = self.get_nodetree(py)?.borrow();
163 163 let nt = opt.as_ref().unwrap();
164 164 let idx = &*self.index(py).borrow();
165 165
166 166 let node_as_string = if cfg!(feature = "python3-sys") {
167 167 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
168 168 }
169 169 else {
170 170 let node = node.extract::<PyBytes>(py)?;
171 171 String::from_utf8_lossy(node.data(py)).to_string()
172 172 };
173 173
174 174 let prefix = NodePrefix::from_hex(&node_as_string)
175 175 .map_err(|_| PyErr::new::<ValueError, _>(
176 176 py, format!("Invalid node or prefix '{}'", node_as_string))
177 177 )?;
178 178
179 179 nt.find_bin(idx, prefix)
180 180 // TODO make an inner API returning the node directly
181 181 .map(|opt| opt.map(
182 182 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
183 183 .map_err(|e| nodemap_error(py, e))
184 184
185 185 }
186 186
187 187 /// append an index entry
188 188 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
189 189 if tup.len(py) < 8 {
190 190 // this is better than the panic promised by tup.get_item()
191 191 return Err(
192 192 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
193 193 }
194 194 let node_bytes = tup.get_item(py, 7).extract(py)?;
195 195 let node = node_from_py_object(py, &node_bytes)?;
196 196
197 197 let rev = self.len(py)? as BaseRevision;
198 198
199 199 // This is ok since we will just add the revision to the index
200 200 let rev = Revision(rev);
201 201 self.index(py)
202 202 .borrow_mut()
203 203 .append(py_tuple_to_revision_data_params(py, tup)?)
204 204 .unwrap();
205 205 let idx = &*self.index(py).borrow();
206 206 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
207 207 .insert(idx, &node, rev)
208 208 .map_err(|e| nodemap_error(py, e))?;
209 209 Ok(py.None())
210 210 }
211 211
212 212 def __delitem__(&self, key: PyObject) -> PyResult<()> {
213 213 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
214 214 let start = if let Ok(rev) = key.extract(py) {
215 215 UncheckedRevision(rev)
216 216 } else {
217 217 let start = key.getattr(py, "start")?;
218 218 UncheckedRevision(start.extract(py)?)
219 219 };
220 220 let start = self.index(py)
221 221 .borrow()
222 222 .check_revision(start)
223 223 .ok_or_else(|| {
224 224 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
225 225 })?;
226 226 self.index(py).borrow_mut().remove(start).unwrap();
227 227 let mut opt = self.get_nodetree(py)?.borrow_mut();
228 228 let nt = opt.as_mut().unwrap();
229 229 nt.invalidate_all();
230 230 self.fill_nodemap(py, nt)?;
231 231 Ok(())
232 232 }
233 233
234 234 //
235 235 // Index methods previously reforwarded to C index (tp_methods)
236 236 // Same ordering as in revlog.c
237 237 //
238 238
239 239 /// return the gca set of the given revs
240 240 def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
241 241 let rust_res = self.inner_ancestors(py, args)?;
242 242 Ok(rust_res)
243 243 }
244 244
245 245 /// return the heads of the common ancestors of the given revs
246 246 def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
247 247 let rust_res = self.inner_commonancestorsheads(py, args)?;
248 248 Ok(rust_res)
249 249 }
250 250
251 251 /// Clear the index caches and inner py_class data.
252 252 /// It is Python's responsibility to call `update_nodemap_data` again.
253 253 def clearcaches(&self) -> PyResult<PyObject> {
254 254 self.nt(py).borrow_mut().take();
255 255 self.docket(py).borrow_mut().take();
256 256 self.nodemap_mmap(py).borrow_mut().take();
257 257 self.index(py).borrow().clear_caches();
258 258 Ok(py.None())
259 259 }
260 260
261 261 /// return the raw binary string representing a revision
262 262 def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
263 263 let rindex = self.index(py).borrow();
264 264 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
265 265 let rust_bytes = rindex.check_revision(rev).and_then(
266 266 |r| rindex.entry_binary(r))
267 267 .ok_or_else(|| rev_not_in_index(py, rev))?;
268 268 let rust_res = PyBytes::new(py, rust_bytes).into_object();
269 269 Ok(rust_res)
270 270 }
271 271
272 272 /// return a binary packed version of the header
273 273 def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
274 274 let rindex = self.index(py).borrow();
275 275 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
276 276 let rust_res = PyBytes::new(py, &packed).into_object();
277 277 Ok(rust_res)
278 278 }
279 279
280 280 /// compute phases
281 281 def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
282 282 let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
283 283 let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
284 284 Ok(rust_res)
285 285 }
286 286
287 287 /// reachableroots
288 288 def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
289 289 let rust_res = self.inner_reachableroots2(
290 290 py,
291 291 UncheckedRevision(args.get_item(py, 0).extract(py)?),
292 292 args.get_item(py, 1),
293 293 args.get_item(py, 2),
294 294 args.get_item(py, 3).extract(py)?,
295 295 )?;
296 296 Ok(rust_res)
297 297 }
298 298
299 299 /// get head revisions
300 300 def headrevs(&self) -> PyResult<PyObject> {
301 301 let rust_res = self.inner_headrevs(py)?;
302 302 Ok(rust_res)
303 303 }
304 304
305 305 /// get filtered head revisions
306 306 def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
307 307 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
308 308 Ok(rust_res)
309 309 }
310 310
311 311 /// True if the object is a snapshot
312 312 def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
313 313 let index = self.index(py).borrow();
314 314 let result = index
315 315 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
316 316 .map_err(|e| {
317 317 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
318 318 })?;
319 319 Ok(result)
320 320 }
321 321
322 322 /// Gather snapshot data in a cache dict
323 323 def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
324 324 let index = self.index(py).borrow();
325 325 let cache: PyDict = args.get_item(py, 0).extract(py)?;
326 326 // this methods operates by setting new values in the cache,
327 327 // hence we will compare results by letting the C implementation
328 328 // operate over a deepcopy of the cache, and finally compare both
329 329 // caches.
330 330 let c_cache = PyDict::new(py);
331 331 for (k, v) in cache.items(py) {
332 332 c_cache.set_item(py, k, PySet::new(py, v)?)?;
333 333 }
334 334
335 335 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
336 336 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
337 337 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
338 338 index.find_snapshots(
339 339 start_rev,
340 340 end_rev,
341 341 &mut cache_wrapper,
342 342 ).map_err(|_| revlog_error(py))?;
343 343 Ok(py.None())
344 344 }
345 345
346 346 /// determine revisions with deltas to reconstruct fulltext
347 347 def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
348 348 let index = self.index(py).borrow();
349 349 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
350 350 let stop_rev =
351 351 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
352 352 let rev = index.check_revision(rev).ok_or_else(|| {
353 353 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
354 354 })?;
355 355 let stop_rev = if let Some(stop_rev) = stop_rev {
356 356 let stop_rev = UncheckedRevision(stop_rev);
357 357 Some(index.check_revision(stop_rev).ok_or_else(|| {
358 358 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
359 359 })?)
360 360 } else {None};
361 361 let using_general_delta = args.get_item(py, 2)
362 362 .extract::<Option<u32>>(py)?
363 363 .map(|i| i != 0);
364 364 let (chain, stopped) = index.delta_chain(
365 365 rev, stop_rev, using_general_delta
366 366 ).map_err(|e| {
367 367 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
368 368 })?;
369 369
370 370 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
371 371 Ok(
372 372 PyTuple::new(
373 373 py,
374 374 &[
375 375 chain.into_py_object(py).into_object(),
376 376 stopped.into_py_object(py).into_object()
377 377 ]
378 378 ).into_object()
379 379 )
380 380
381 381 }
382 382
383 383 /// slice planned chunk read to reach a density threshold
384 384 def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
385 385 let rust_res = self.inner_slicechunktodensity(
386 386 py,
387 387 args.get_item(py, 0),
388 388 args.get_item(py, 1).extract(py)?,
389 389 args.get_item(py, 2).extract(py)?
390 390 )?;
391 391 Ok(rust_res)
392 392 }
393 393
394 394 // index_sequence_methods and index_mapping_methods.
395 395 //
396 396 // Since we call back through the high level Python API,
397 397 // there's no point making a distinction between index_get
398 398 // and index_getitem.
399 399 // gracinet 2023: this above is no longer true for the pure Rust impl
400 400
401 401 def __len__(&self) -> PyResult<usize> {
402 402 self.len(py)
403 403 }
404 404
405 405 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
406 406 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
407 407 Ok(rust_res)
408 408 }
409 409
410 410 def __contains__(&self, item: PyObject) -> PyResult<bool> {
411 411 // ObjectProtocol does not seem to provide contains(), so
412 412 // this is an equivalent implementation of the index_contains()
413 413 // defined in revlog.c
414 414 match item.extract::<i32>(py) {
415 415 Ok(rev) => {
416 416 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
417 417 }
418 418 Err(_) => {
419 419 let item_bytes: PyBytes = item.extract(py)?;
420 420 let rust_res = self.has_node(py, item_bytes)?;
421 421 Ok(rust_res)
422 422 }
423 423 }
424 424 }
425 425
426 426 def nodemap_data_all(&self) -> PyResult<PyBytes> {
427 427 self.inner_nodemap_data_all(py)
428 428 }
429 429
430 430 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
431 431 self.inner_nodemap_data_incremental(py)
432 432 }
433 433 def update_nodemap_data(
434 434 &self,
435 435 docket: PyObject,
436 436 nm_data: PyObject
437 437 ) -> PyResult<PyObject> {
438 438 self.inner_update_nodemap_data(py, docket, nm_data)
439 439 }
440 440
441 441 @property
442 442 def entry_size(&self) -> PyResult<PyInt> {
443 443 let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
444 444 Ok(rust_res)
445 445 }
446 446
447 447 @property
448 448 def rust_ext_compat(&self) -> PyResult<PyInt> {
449 449 // will be entirely removed when the Rust index yet useful to
450 450 // implement in Rust to detangle things when removing `self.cindex`
451 451 let rust_res: PyInt = 1.to_py_object(py);
452 452 Ok(rust_res)
453 453 }
454 454
455 455 @property
456 456 def is_rust(&self) -> PyResult<PyBool> {
457 457 Ok(false.to_py_object(py))
458 458 }
459 459
460 460 });
461 461
462 462 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
463 463 /// buffer along with the Rust slice into said buffer. We need to keep the
464 464 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
465 465 /// is freed from Python's side.
466 466 ///
467 467 /// # Safety
468 468 ///
469 469 /// The caller must make sure that the buffer is kept around for at least as
470 470 /// long as the slice.
471 471 #[deny(unsafe_op_in_unsafe_fn)]
472 472 unsafe fn mmap_keeparound(
473 473 py: Python,
474 474 data: PyObject,
475 475 ) -> PyResult<(
476 476 PyBuffer,
477 477 Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>,
478 478 )> {
479 479 let buf = PyBuffer::get(py, &data)?;
480 480 let len = buf.item_count();
481 481
482 482 // Build a slice from the mmap'ed buffer data
483 483 let cbuf = buf.buf_ptr();
484 484 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
485 485 && buf.is_c_contiguous()
486 486 && u8::is_compatible_format(buf.format())
487 487 {
488 488 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
489 489 } else {
490 490 return Err(PyErr::new::<ValueError, _>(
491 491 py,
492 492 "Nodemap data buffer has an invalid memory representation"
493 493 .to_string(),
494 494 ));
495 495 };
496 496
497 497 Ok((buf, Box::new(bytes)))
498 498 }
499 499
500 500 fn py_tuple_to_revision_data_params(
501 501 py: Python,
502 502 tuple: PyTuple,
503 503 ) -> PyResult<RevisionDataParams> {
504 504 if tuple.len(py) < 8 {
505 505 // this is better than the panic promised by tup.get_item()
506 506 return Err(PyErr::new::<IndexError, _>(
507 507 py,
508 508 "tuple index out of range",
509 509 ));
510 510 }
511 511 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
512 512 let node_id = tuple
513 513 .get_item(py, 7)
514 514 .extract::<PyBytes>(py)?
515 515 .data(py)
516 516 .try_into()
517 517 .unwrap();
518 518 let flags = (offset_or_flags & 0xFFFF) as u16;
519 519 let data_offset = offset_or_flags >> 16;
520 520 Ok(RevisionDataParams {
521 521 flags,
522 522 data_offset,
523 523 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
524 524 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
525 525 data_delta_base: tuple.get_item(py, 3).extract(py)?,
526 526 link_rev: tuple.get_item(py, 4).extract(py)?,
527 527 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
528 528 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
529 529 node_id,
530 530 ..Default::default()
531 531 })
532 532 }
533 533 fn revision_data_params_to_py_tuple(
534 534 py: Python,
535 535 params: RevisionDataParams,
536 536 ) -> PyTuple {
537 537 PyTuple::new(
538 538 py,
539 539 &[
540 540 params.data_offset.into_py_object(py).into_object(),
541 541 params
542 542 .data_compressed_length
543 543 .into_py_object(py)
544 544 .into_object(),
545 545 params
546 546 .data_uncompressed_length
547 547 .into_py_object(py)
548 548 .into_object(),
549 549 params.data_delta_base.into_py_object(py).into_object(),
550 550 params.link_rev.into_py_object(py).into_object(),
551 551 params.parent_rev_1.into_py_object(py).into_object(),
552 552 params.parent_rev_2.into_py_object(py).into_object(),
553 553 PyBytes::new(py, &params.node_id)
554 554 .into_py_object(py)
555 555 .into_object(),
556 556 params._sidedata_offset.into_py_object(py).into_object(),
557 557 params
558 558 ._sidedata_compressed_length
559 559 .into_py_object(py)
560 560 .into_object(),
561 561 params
562 562 .data_compression_mode
563 563 .into_py_object(py)
564 564 .into_object(),
565 565 params
566 566 ._sidedata_compression_mode
567 567 .into_py_object(py)
568 568 .into_object(),
569 569 params._rank.into_py_object(py).into_object(),
570 570 ],
571 571 )
572 572 }
573 573
574 574 struct PySnapshotsCache<'p> {
575 575 py: Python<'p>,
576 576 dict: PyDict,
577 577 }
578 578
579 579 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
580 580 fn insert_for(
581 581 &mut self,
582 582 rev: BaseRevision,
583 583 value: BaseRevision,
584 584 ) -> Result<(), RevlogError> {
585 585 let pyvalue = value.into_py_object(self.py).into_object();
586 586 match self.dict.get_item(self.py, rev) {
587 587 Some(obj) => obj
588 588 .extract::<PySet>(self.py)
589 589 .and_then(|set| set.add(self.py, pyvalue)),
590 590 None => PySet::new(self.py, vec![pyvalue])
591 591 .and_then(|set| self.dict.set_item(self.py, rev, set)),
592 592 }
593 593 .map_err(|_| {
594 594 RevlogError::Other(HgError::unsupported(
595 595 "Error in Python caches handling",
596 596 ))
597 597 })
598 598 }
599 599 }
600 600
601 impl MixedIndex {
602 fn new(py: Python, data: PyObject, header: u32) -> PyResult<MixedIndex> {
601 impl Index {
602 fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
603 603 // Safety: we keep the buffer around inside the class as `index_mmap`
604 604 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
605 605
606 606 Self::create_instance(
607 607 py,
608 608 hg::index::Index::new(
609 609 bytes,
610 610 IndexHeader::parse(&header.to_be_bytes())
611 611 .expect("default header is broken")
612 612 .unwrap(),
613 613 )
614 614 .map_err(|e| {
615 615 revlog_error_with_msg(py, e.to_string().as_bytes())
616 616 })?,
617 617 RefCell::new(None),
618 618 RefCell::new(None),
619 619 RefCell::new(None),
620 620 RefCell::new(Some(buf)),
621 621 )
622 622 }
623 623
624 624 fn len(&self, py: Python) -> PyResult<usize> {
625 625 let rust_index_len = self.index(py).borrow().len();
626 626 Ok(rust_index_len)
627 627 }
628 628
629 629 /// This is scaffolding at this point, but it could also become
630 630 /// a way to start a persistent nodemap or perform a
631 631 /// vacuum / repack operation
632 632 fn fill_nodemap(
633 633 &self,
634 634 py: Python,
635 635 nt: &mut CoreNodeTree,
636 636 ) -> PyResult<PyObject> {
637 637 let index = self.index(py).borrow();
638 638 for r in 0..self.len(py)? {
639 639 let rev = Revision(r as BaseRevision);
640 640 // in this case node() won't ever return None
641 641 nt.insert(&*index, index.node(rev).unwrap(), rev)
642 642 .map_err(|e| nodemap_error(py, e))?
643 643 }
644 644 Ok(py.None())
645 645 }
646 646
647 647 fn get_nodetree<'a>(
648 648 &'a self,
649 649 py: Python<'a>,
650 650 ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> {
651 651 if self.nt(py).borrow().is_none() {
652 652 let readonly = Box::<Vec<_>>::default();
653 653 let mut nt = CoreNodeTree::load_bytes(readonly, 0);
654 654 self.fill_nodemap(py, &mut nt)?;
655 655 self.nt(py).borrow_mut().replace(nt);
656 656 }
657 657 Ok(self.nt(py))
658 658 }
659 659
660 660 /// Returns the full nodemap bytes to be written as-is to disk
661 661 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
662 662 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
663 663 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
664 664
665 665 // If there's anything readonly, we need to build the data again from
666 666 // scratch
667 667 let bytes = if readonly.len() > 0 {
668 668 let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
669 669 self.fill_nodemap(py, &mut nt)?;
670 670
671 671 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
672 672 assert_eq!(readonly.len(), 0);
673 673
674 674 bytes
675 675 } else {
676 676 bytes
677 677 };
678 678
679 679 let bytes = PyBytes::new(py, &bytes);
680 680 Ok(bytes)
681 681 }
682 682
683 683 /// Returns the last saved docket along with the size of any changed data
684 684 /// (in number of blocks), and said data as bytes.
685 685 fn inner_nodemap_data_incremental(
686 686 &self,
687 687 py: Python,
688 688 ) -> PyResult<PyObject> {
689 689 let docket = self.docket(py).borrow();
690 690 let docket = match docket.as_ref() {
691 691 Some(d) => d,
692 692 None => return Ok(py.None()),
693 693 };
694 694
695 695 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
696 696 let masked_blocks = node_tree.masked_readonly_blocks();
697 697 let (_, data) = node_tree.into_readonly_and_added_bytes();
698 698 let changed = masked_blocks * std::mem::size_of::<Block>();
699 699
700 700 Ok((docket, changed, PyBytes::new(py, &data))
701 701 .to_py_object(py)
702 702 .into_object())
703 703 }
704 704
705 705 /// Update the nodemap from the new (mmaped) data.
706 706 /// The docket is kept as a reference for later incremental calls.
707 707 fn inner_update_nodemap_data(
708 708 &self,
709 709 py: Python,
710 710 docket: PyObject,
711 711 nm_data: PyObject,
712 712 ) -> PyResult<PyObject> {
713 713 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
714 714 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
715 715 let len = buf.item_count();
716 716 self.nodemap_mmap(py).borrow_mut().replace(buf);
717 717
718 718 let mut nt = CoreNodeTree::load_bytes(bytes, len);
719 719
720 720 let data_tip = docket
721 721 .getattr(py, "tip_rev")?
722 722 .extract::<BaseRevision>(py)?
723 723 .into();
724 724 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
725 725 let idx = self.index(py).borrow();
726 726 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
727 727 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
728 728 })?;
729 729 let current_tip = idx.len();
730 730
731 731 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
732 732 let rev = Revision(r);
733 733 // in this case node() won't ever return None
734 734 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
735 735 .map_err(|e| nodemap_error(py, e))?
736 736 }
737 737
738 738 *self.nt(py).borrow_mut() = Some(nt);
739 739
740 740 Ok(py.None())
741 741 }
742 742
743 743 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
744 744 let idx = self.index(py).borrow();
745 745 Ok(match key.extract::<BaseRevision>(py) {
746 746 Ok(key_as_int) => {
747 747 let entry_params = if key_as_int == NULL_REVISION.0 {
748 748 RevisionDataParams::default()
749 749 } else {
750 750 let rev = UncheckedRevision(key_as_int);
751 751 match idx.entry_as_params(rev) {
752 752 Some(e) => e,
753 753 None => {
754 754 return Err(PyErr::new::<IndexError, _>(
755 755 py,
756 756 "revlog index out of range",
757 757 ));
758 758 }
759 759 }
760 760 };
761 761 revision_data_params_to_py_tuple(py, entry_params)
762 762 .into_object()
763 763 }
764 764 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
765 765 || py.None(),
766 766 |py_rev| py_rev.into_py_object(py).into_object(),
767 767 ),
768 768 })
769 769 }
770 770
771 771 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
772 772 let index = &*self.index(py).borrow();
773 773 let as_vec: Vec<PyObject> = index
774 774 .head_revs()
775 775 .map_err(|e| graph_error(py, e))?
776 776 .iter()
777 777 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
778 778 .collect();
779 779 Ok(PyList::new(py, &as_vec).into_object())
780 780 }
781 781
782 782 fn inner_headrevsfiltered(
783 783 &self,
784 784 py: Python,
785 785 filtered_revs: &PyObject,
786 786 ) -> PyResult<PyObject> {
787 787 let index = &mut *self.index(py).borrow_mut();
788 788 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
789 789
790 790 let as_vec: Vec<PyObject> = index
791 791 .head_revs_filtered(&filtered_revs)
792 792 .map_err(|e| graph_error(py, e))?
793 793 .iter()
794 794 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
795 795 .collect();
796 796 Ok(PyList::new(py, &as_vec).into_object())
797 797 }
798 798
799 799 fn inner_ancestors(
800 800 &self,
801 801 py: Python,
802 802 py_revs: &PyTuple,
803 803 ) -> PyResult<PyObject> {
804 804 let index = &*self.index(py).borrow();
805 805 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
806 806 let as_vec: Vec<_> = index
807 807 .ancestors(&revs)
808 808 .map_err(|e| graph_error(py, e))?
809 809 .iter()
810 810 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
811 811 .collect();
812 812 Ok(PyList::new(py, &as_vec).into_object())
813 813 }
814 814
815 815 fn inner_commonancestorsheads(
816 816 &self,
817 817 py: Python,
818 818 py_revs: &PyTuple,
819 819 ) -> PyResult<PyObject> {
820 820 let index = &*self.index(py).borrow();
821 821 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
822 822 let as_vec: Vec<_> = index
823 823 .common_ancestor_heads(&revs)
824 824 .map_err(|e| graph_error(py, e))?
825 825 .iter()
826 826 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
827 827 .collect();
828 828 Ok(PyList::new(py, &as_vec).into_object())
829 829 }
830 830
831 831 fn inner_computephasesmapsets(
832 832 &self,
833 833 py: Python,
834 834 py_roots: PyDict,
835 835 ) -> PyResult<PyObject> {
836 836 let index = &*self.index(py).borrow();
837 837 let opt = self.get_nodetree(py)?.borrow();
838 838 let nt = opt.as_ref().unwrap();
839 839 let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
840 840 .items_list(py)
841 841 .iter(py)
842 842 .map(|r| {
843 843 let phase = r.get_item(py, 0)?;
844 844 let nodes = r.get_item(py, 1)?;
845 845 // Transform the nodes from Python to revs here since we
846 846 // have access to the nodemap
847 847 let revs: Result<_, _> = nodes
848 848 .iter(py)?
849 849 .map(|node| match node?.extract::<PyBytes>(py) {
850 850 Ok(py_bytes) => {
851 851 let node = node_from_py_bytes(py, &py_bytes)?;
852 852 nt.find_bin(index, node.into())
853 853 .map_err(|e| nodemap_error(py, e))?
854 854 .ok_or_else(|| revlog_error(py))
855 855 }
856 856 Err(e) => Err(e),
857 857 })
858 858 .collect();
859 859 let phase = Phase::try_from(phase.extract::<usize>(py)?)
860 860 .map_err(|_| revlog_error(py));
861 861 Ok((phase?, revs?))
862 862 })
863 863 .collect();
864 864 let (len, phase_maps) = index
865 865 .compute_phases_map_sets(roots?)
866 866 .map_err(|e| graph_error(py, e))?;
867 867
868 868 // Ugly hack, but temporary
869 869 const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96];
870 870 let py_phase_maps = PyDict::new(py);
871 871 for (idx, roots) in phase_maps.iter().enumerate() {
872 872 let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py);
873 873 // OPTIM too bad we have to collect here. At least, we could
874 874 // reuse the same Vec and allocate it with capacity at
875 875 // max(len(phase_maps)
876 876 let roots_vec: Vec<PyInt> = roots
877 877 .iter()
878 878 .map(|r| PyRevision::from(*r).into_py_object(py))
879 879 .collect();
880 880 py_phase_maps.set_item(
881 881 py,
882 882 phase_num,
883 883 PySet::new(py, roots_vec)?,
884 884 )?;
885 885 }
886 886 Ok(PyTuple::new(
887 887 py,
888 888 &[
889 889 len.into_py_object(py).into_object(),
890 890 py_phase_maps.into_object(),
891 891 ],
892 892 )
893 893 .into_object())
894 894 }
895 895
896 896 fn inner_slicechunktodensity(
897 897 &self,
898 898 py: Python,
899 899 revs: PyObject,
900 900 target_density: f64,
901 901 min_gap_size: usize,
902 902 ) -> PyResult<PyObject> {
903 903 let index = &*self.index(py).borrow();
904 904 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
905 905 let as_nested_vec =
906 906 index.slice_chunk_to_density(&revs, target_density, min_gap_size);
907 907 let mut res = Vec::with_capacity(as_nested_vec.len());
908 908 let mut py_chunk = Vec::new();
909 909 for chunk in as_nested_vec {
910 910 py_chunk.clear();
911 911 py_chunk.reserve_exact(chunk.len());
912 912 for rev in chunk {
913 913 py_chunk.push(
914 914 PyRevision::from(rev).into_py_object(py).into_object(),
915 915 );
916 916 }
917 917 res.push(PyList::new(py, &py_chunk).into_object());
918 918 }
919 919 // This is just to do the same as C, not sure why it does this
920 920 if res.len() == 1 {
921 921 Ok(PyTuple::new(py, &res).into_object())
922 922 } else {
923 923 Ok(PyList::new(py, &res).into_object())
924 924 }
925 925 }
926 926
927 927 fn inner_reachableroots2(
928 928 &self,
929 929 py: Python,
930 930 min_root: UncheckedRevision,
931 931 heads: PyObject,
932 932 roots: PyObject,
933 933 include_path: bool,
934 934 ) -> PyResult<PyObject> {
935 935 let index = &*self.index(py).borrow();
936 936 let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
937 937 PyErr::new::<IndexError, _>(py, "head out of range")
938 938 })?;
939 939 let roots: Result<_, _> = roots
940 940 .iter(py)?
941 941 .map(|r| {
942 942 r.and_then(|o| match o.extract::<PyRevision>(py) {
943 943 Ok(r) => Ok(UncheckedRevision(r.0)),
944 944 Err(e) => Err(e),
945 945 })
946 946 })
947 947 .collect();
948 948 let as_set = index
949 949 .reachable_roots(min_root, heads, roots?, include_path)
950 950 .map_err(|e| graph_error(py, e))?;
951 951 let as_vec: Vec<PyObject> = as_set
952 952 .iter()
953 953 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
954 954 .collect();
955 955 Ok(PyList::new(py, &as_vec).into_object())
956 956 }
957 957 }
958 958
959 959 py_class!(pub class NodeTree |py| {
960 960 data nt: RefCell<CoreNodeTree>;
961 961 data index: RefCell<UnsafePyLeaked<PySharedIndex>>;
962 962
963 963 def __new__(_cls, index: PyObject) -> PyResult<NodeTree> {
964 964 let index = py_rust_index_to_graph(py, index)?;
965 965 let nt = CoreNodeTree::default(); // in-RAM, fully mutable
966 966 Self::create_instance(py, RefCell::new(nt), RefCell::new(index))
967 967 }
968 968
969 969 /// Tell whether the NodeTree is still valid
970 970 ///
971 971 /// In case of mutation of the index, the given results are not
972 972 /// guaranteed to be correct, and in fact, the methods borrowing
973 973 /// the inner index would fail because of `PySharedRef` poisoning
974 974 /// (generation-based guard), same as iterating on a `dict` that has
975 975 /// been meanwhile mutated.
976 976 def is_invalidated(&self) -> PyResult<bool> {
977 977 let leaked = self.index(py).borrow();
978 978 let result = unsafe { leaked.try_borrow(py) };
979 979 // two cases for result to be an error:
980 980 // - the index has previously been mutably borrowed
981 981 // - there is currently a mutable borrow
982 982 // in both cases this amounts for previous results related to
983 983 // the index to still be valid.
984 984 Ok(result.is_err())
985 985 }
986 986
987 987 def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
988 988 let leaked = self.index(py).borrow();
989 989 let index = &*unsafe { leaked.try_borrow(py)? };
990 990
991 991 let rev = UncheckedRevision(rev.0);
992 992 let rev = index
993 993 .check_revision(rev)
994 994 .ok_or_else(|| rev_not_in_index(py, rev))?;
995 995 if rev == NULL_REVISION {
996 996 return Err(rev_not_in_index(py, rev.into()))
997 997 }
998 998
999 999 let entry = index.inner.get_entry(rev).unwrap();
1000 1000 let mut nt = self.nt(py).borrow_mut();
1001 1001 nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
1002 1002
1003 1003 Ok(py.None())
1004 1004 }
1005 1005
1006 1006 /// Lookup by node hex prefix in the NodeTree, returning revision number.
1007 1007 ///
1008 1008 /// This is not part of the classical NodeTree API, but is good enough
1009 1009 /// for unit testing, as in `test-rust-revlog.py`.
1010 1010 def prefix_rev_lookup(
1011 1011 &self,
1012 1012 node_prefix: PyBytes
1013 1013 ) -> PyResult<Option<PyRevision>> {
1014 1014 let prefix = NodePrefix::from_hex(node_prefix.data(py))
1015 1015 .map_err(|_| PyErr::new::<ValueError, _>(
1016 1016 py,
1017 1017 format!("Invalid node or prefix {:?}",
1018 1018 node_prefix.as_object()))
1019 1019 )?;
1020 1020
1021 1021 let nt = self.nt(py).borrow();
1022 1022 let leaked = self.index(py).borrow();
1023 1023 let index = &*unsafe { leaked.try_borrow(py)? };
1024 1024
1025 1025 Ok(nt.find_bin(index, prefix)
1026 1026 .map_err(|e| nodemap_error(py, e))?
1027 1027 .map(|r| r.into())
1028 1028 )
1029 1029 }
1030 1030
1031 1031 def shortest(&self, node: PyBytes) -> PyResult<usize> {
1032 1032 let nt = self.nt(py).borrow();
1033 1033 let leaked = self.index(py).borrow();
1034 1034 let idx = &*unsafe { leaked.try_borrow(py)? };
1035 1035 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
1036 1036 {
1037 1037 Ok(Some(l)) => Ok(l),
1038 1038 Ok(None) => Err(revlog_error(py)),
1039 1039 Err(e) => Err(nodemap_error(py, e)),
1040 1040 }
1041 1041 }
1042 1042 });
1043 1043
1044 1044 fn revlog_error(py: Python) -> PyErr {
1045 1045 match py
1046 1046 .import("mercurial.error")
1047 1047 .and_then(|m| m.get(py, "RevlogError"))
1048 1048 {
1049 1049 Err(e) => e,
1050 1050 Ok(cls) => PyErr::from_instance(
1051 1051 py,
1052 1052 cls.call(py, (py.None(),), None).ok().into_py_object(py),
1053 1053 ),
1054 1054 }
1055 1055 }
1056 1056
1057 1057 fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
1058 1058 match py
1059 1059 .import("mercurial.error")
1060 1060 .and_then(|m| m.get(py, "RevlogError"))
1061 1061 {
1062 1062 Err(e) => e,
1063 1063 Ok(cls) => PyErr::from_instance(
1064 1064 py,
1065 1065 cls.call(py, (PyBytes::new(py, msg),), None)
1066 1066 .ok()
1067 1067 .into_py_object(py),
1068 1068 ),
1069 1069 }
1070 1070 }
1071 1071
1072 1072 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
1073 1073 // ParentOutOfRange is currently the only alternative
1074 1074 // in `hg::GraphError`. The C index always raises this simple ValueError.
1075 1075 PyErr::new::<ValueError, _>(py, "parent out of range")
1076 1076 }
1077 1077
1078 1078 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1079 1079 PyErr::new::<ValueError, _>(
1080 1080 py,
1081 1081 format!(
1082 1082 "Inconsistency: Revision {} found in nodemap \
1083 1083 is not in revlog index",
1084 1084 rev
1085 1085 ),
1086 1086 )
1087 1087 }
1088 1088
1089 1089 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1090 1090 PyErr::new::<ValueError, _>(
1091 1091 py,
1092 1092 format!("revlog index out of range: {}", rev),
1093 1093 )
1094 1094 }
1095 1095
1096 1096 /// Standard treatment of NodeMapError
1097 1097 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
1098 1098 match err {
1099 1099 NodeMapError::MultipleResults => revlog_error(py),
1100 1100 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
1101 1101 }
1102 1102 }
1103 1103
1104 1104 /// Create the module, with __package__ given from parent
1105 1105 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
1106 1106 let dotted_name = &format!("{}.revlog", package);
1107 1107 let m = PyModule::new(py, dotted_name)?;
1108 1108 m.add(py, "__package__", package)?;
1109 1109 m.add(py, "__doc__", "RevLog - Rust implementations")?;
1110 1110
1111 m.add_class::<MixedIndex>(py)?;
1111 m.add_class::<Index>(py)?;
1112 1112 m.add_class::<NodeTree>(py)?;
1113 1113
1114 1114 let sys = PyModule::import(py, "sys")?;
1115 1115 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
1116 1116 sys_modules.set_item(py, dotted_name, &m)?;
1117 1117
1118 1118 Ok(m)
1119 1119 }
@@ -1,90 +1,90 b''
1 1 import struct
2 2 import unittest
3 3
4 4 from mercurial.node import hex
5 5
6 6 try:
7 7 from mercurial import rustext
8 8
9 9 rustext.__name__ # trigger immediate actual import
10 10 except ImportError:
11 11 rustext = None
12 12 else:
13 13 from mercurial.rustext import revlog
14 14
15 15 # this would fail already without appropriate ancestor.__package__
16 16 from mercurial.rustext.ancestor import LazyAncestors
17 17
18 18 from mercurial.testing import revlog as revlogtesting
19 19
20 20 header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
21 21
22 22
23 23 @unittest.skipIf(
24 24 rustext is None,
25 25 "rustext module revlog relies on is not available",
26 26 )
27 27 class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
28 28 def test_heads(self):
29 29 idx = self.parseindex()
30 rustidx = revlog.MixedIndex(revlogtesting.data_non_inlined, header)
30 rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
31 31 self.assertEqual(rustidx.headrevs(), idx.headrevs())
32 32
33 33 def test_len(self):
34 34 idx = self.parseindex()
35 rustidx = revlog.MixedIndex(revlogtesting.data_non_inlined, header)
35 rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
36 36 self.assertEqual(len(rustidx), len(idx))
37 37
38 38 def test_ancestors(self):
39 rustidx = revlog.MixedIndex(revlogtesting.data_non_inlined, header)
39 rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
40 40 lazy = LazyAncestors(rustidx, [3], 0, True)
41 41 # we have two more references to the index:
42 42 # - in its inner iterator for __contains__ and __bool__
43 43 # - in the LazyAncestors instance itself (to spawn new iterators)
44 44 self.assertTrue(2 in lazy)
45 45 self.assertTrue(bool(lazy))
46 46 self.assertEqual(list(lazy), [3, 2, 1, 0])
47 47 # a second time to validate that we spawn new iterators
48 48 self.assertEqual(list(lazy), [3, 2, 1, 0])
49 49
50 50 # let's check bool for an empty one
51 51 self.assertFalse(LazyAncestors(rustidx, [0], 0, False))
52 52
53 53
54 54 @unittest.skipIf(
55 55 rustext is None,
56 56 "rustext module revlog relies on is not available",
57 57 )
58 58 class RustRevlogNodeTreeClassTest(revlogtesting.RustRevlogBasedTestBase):
59 59 def test_standalone_nodetree(self):
60 60 idx = self.parserustindex()
61 61 nt = revlog.NodeTree(idx)
62 62 for i in range(4):
63 63 nt.insert(i)
64 64
65 65 bin_nodes = [entry[7] for entry in idx]
66 66 hex_nodes = [hex(n) for n in bin_nodes]
67 67
68 68 for i, node in enumerate(hex_nodes):
69 69 self.assertEqual(nt.prefix_rev_lookup(node), i)
70 70 self.assertEqual(nt.prefix_rev_lookup(node[:5]), i)
71 71
72 72 # all 4 revisions in idx (standard data set) have different
73 73 # first nybbles in their Node IDs,
74 74 # hence `nt.shortest()` should return 1 for them, except when
75 75 # the leading nybble is 0 (ambiguity with NULL_NODE)
76 76 for i, (bin_node, hex_node) in enumerate(zip(bin_nodes, hex_nodes)):
77 77 shortest = nt.shortest(bin_node)
78 78 expected = 2 if hex_node[0] == ord('0') else 1
79 79 self.assertEqual(shortest, expected)
80 80 self.assertEqual(nt.prefix_rev_lookup(hex_node[:shortest]), i)
81 81
82 82 # test invalidation (generation poisoning) detection
83 83 del idx[3]
84 84 self.assertTrue(nt.is_invalidated())
85 85
86 86
87 87 if __name__ == '__main__':
88 88 import silenttestrunner
89 89
90 90 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now