##// END OF EJS Templates
rust-revlog: teach the revlog opening code to read the repo options...
Raphaël Gomès -
r52084:13f58ce7 default
parent child Browse files
Show More
@@ -1,4056 +1,4059 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 import functools
19 20 import io
20 21 import os
21 22 import struct
22 23 import weakref
23 24 import zlib
24 25
25 26 # import stuff from node for others to import from revlog
26 27 from .node import (
27 28 bin,
28 29 hex,
29 30 nullrev,
30 31 sha1nodeconstants,
31 32 short,
32 33 wdirrev,
33 34 )
34 35 from .i18n import _
35 36 from .revlogutils.constants import (
36 37 ALL_KINDS,
37 38 CHANGELOGV2,
38 39 COMP_MODE_DEFAULT,
39 40 COMP_MODE_INLINE,
40 41 COMP_MODE_PLAIN,
41 42 DELTA_BASE_REUSE_NO,
42 43 DELTA_BASE_REUSE_TRY,
43 44 ENTRY_RANK,
44 45 FEATURES_BY_VERSION,
45 46 FLAG_GENERALDELTA,
46 47 FLAG_INLINE_DATA,
47 48 INDEX_HEADER,
48 49 KIND_CHANGELOG,
49 50 KIND_FILELOG,
50 51 RANK_UNKNOWN,
51 52 REVLOGV0,
52 53 REVLOGV1,
53 54 REVLOGV1_FLAGS,
54 55 REVLOGV2,
55 56 REVLOGV2_FLAGS,
56 57 REVLOG_DEFAULT_FLAGS,
57 58 REVLOG_DEFAULT_FORMAT,
58 59 REVLOG_DEFAULT_VERSION,
59 60 SUPPORTED_FLAGS,
60 61 )
61 62 from .revlogutils.flagutil import (
62 63 REVIDX_DEFAULT_FLAGS,
63 64 REVIDX_ELLIPSIS,
64 65 REVIDX_EXTSTORED,
65 66 REVIDX_FLAGS_ORDER,
66 67 REVIDX_HASCOPIESINFO,
67 68 REVIDX_ISCENSORED,
68 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 70 )
70 71 from .thirdparty import attr
71 72 from . import (
72 73 ancestor,
73 74 dagop,
74 75 error,
75 76 mdiff,
76 77 policy,
77 78 pycompat,
78 79 revlogutils,
79 80 templatefilters,
80 81 util,
81 82 )
82 83 from .interfaces import (
83 84 repository,
84 85 util as interfaceutil,
85 86 )
86 87 from .revlogutils import (
87 88 deltas as deltautil,
88 89 docket as docketutil,
89 90 flagutil,
90 91 nodemap as nodemaputil,
91 92 randomaccessfile,
92 93 revlogv0,
93 94 rewrite,
94 95 sidedata as sidedatautil,
95 96 )
96 97 from .utils import (
97 98 storageutil,
98 99 stringutil,
99 100 )
100 101
101 102 # blanked usage of all the name to prevent pyflakes constraints
102 103 # We need these name available in the module for extensions.
103 104
104 105 REVLOGV0
105 106 REVLOGV1
106 107 REVLOGV2
107 108 CHANGELOGV2
108 109 FLAG_INLINE_DATA
109 110 FLAG_GENERALDELTA
110 111 REVLOG_DEFAULT_FLAGS
111 112 REVLOG_DEFAULT_FORMAT
112 113 REVLOG_DEFAULT_VERSION
113 114 REVLOGV1_FLAGS
114 115 REVLOGV2_FLAGS
115 116 REVIDX_ISCENSORED
116 117 REVIDX_ELLIPSIS
117 118 REVIDX_HASCOPIESINFO
118 119 REVIDX_EXTSTORED
119 120 REVIDX_DEFAULT_FLAGS
120 121 REVIDX_FLAGS_ORDER
121 122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 123
123 124 parsers = policy.importmod('parsers')
124 125 rustancestor = policy.importrust('ancestor')
125 126 rustdagop = policy.importrust('dagop')
126 127 rustrevlog = policy.importrust('revlog')
127 128
128 129 # Aliased for performance.
129 130 _zlibdecompress = zlib.decompress
130 131
131 132 # max size of inline data embedded into a revlog
132 133 _maxinline = 131072
133 134
134 135 # Flag processors for REVIDX_ELLIPSIS.
135 136 def ellipsisreadprocessor(rl, text):
136 137 return text, False
137 138
138 139
139 140 def ellipsiswriteprocessor(rl, text):
140 141 return text, False
141 142
142 143
143 144 def ellipsisrawprocessor(rl, text):
144 145 return False
145 146
146 147
147 148 ellipsisprocessor = (
148 149 ellipsisreadprocessor,
149 150 ellipsiswriteprocessor,
150 151 ellipsisrawprocessor,
151 152 )
152 153
153 154
154 155 def _verify_revision(rl, skipflags, state, node):
155 156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 157 point for extensions to influence the operation."""
157 158 if skipflags:
158 159 state[b'skipread'].add(node)
159 160 else:
160 161 # Side-effect: read content and verify hash.
161 162 rl.revision(node)
162 163
163 164
164 165 # True if a fast implementation for persistent-nodemap is available
165 166 #
166 167 # We also consider we have a "fast" implementation in "pure" python because
167 168 # people using pure don't really have performance consideration (and a
168 169 # wheelbarrow of other slowness source)
169 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 171 parsers, 'BaseIndexObject'
171 172 )
172 173
173 174
174 175 @interfaceutil.implementer(repository.irevisiondelta)
175 176 @attr.s(slots=True)
176 177 class revlogrevisiondelta:
177 178 node = attr.ib()
178 179 p1node = attr.ib()
179 180 p2node = attr.ib()
180 181 basenode = attr.ib()
181 182 flags = attr.ib()
182 183 baserevisionsize = attr.ib()
183 184 revision = attr.ib()
184 185 delta = attr.ib()
185 186 sidedata = attr.ib()
186 187 protocol_flags = attr.ib()
187 188 linknode = attr.ib(default=None)
188 189
189 190
190 191 @interfaceutil.implementer(repository.iverifyproblem)
191 192 @attr.s(frozen=True)
192 193 class revlogproblem:
193 194 warning = attr.ib(default=None)
194 195 error = attr.ib(default=None)
195 196 node = attr.ib(default=None)
196 197
197 198
198 199 def parse_index_v1(data, inline):
199 200 # call the C implementation to parse the index data
200 201 index, cache = parsers.parse_index2(data, inline)
201 202 return index, cache
202 203
203 204
204 205 def parse_index_v2(data, inline):
205 206 # call the C implementation to parse the index data
206 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 208 return index, cache
208 209
209 210
210 211 def parse_index_cl_v2(data, inline):
211 212 # call the C implementation to parse the index data
212 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 214 return index, cache
214 215
215 216
216 217 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 218
218 219 def parse_index_v1_nodemap(data, inline):
219 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 221 return index, cache
221 222
222 223
223 224 else:
224 225 parse_index_v1_nodemap = None
225 226
226 227
227 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline, default_header):
228 229 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index, data), cache
230 return rustrevlog.MixedIndex(index, data, default_header), cache
230 231
231 232
232 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 234 # signed integer)
234 235 _maxentrysize = 0x7FFFFFFF
235 236
236 237 FILE_TOO_SHORT_MSG = _(
237 238 b'cannot read from revlog %s;'
238 239 b' expected %d bytes from offset %d, data size is %d'
239 240 )
240 241
241 242 hexdigits = b'0123456789abcdefABCDEF'
242 243
243 244
244 245 class _Config:
245 246 def copy(self):
246 247 return self.__class__(**self.__dict__)
247 248
248 249
249 250 @attr.s()
250 251 class FeatureConfig(_Config):
251 252 """Hold configuration values about the available revlog features"""
252 253
253 254 # the default compression engine
254 255 compression_engine = attr.ib(default=b'zlib')
255 256 # compression engines options
256 257 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 258
258 259 # can we use censor on this revlog
259 260 censorable = attr.ib(default=False)
260 261 # does this revlog use the "side data" feature
261 262 has_side_data = attr.ib(default=False)
262 263 # might remove rank configuration once the computation has no impact
263 264 compute_rank = attr.ib(default=False)
264 265 # parent order is supposed to be semantically irrelevant, so we
265 266 # normally resort parents to ensure that the first parent is non-null,
266 267 # if there is a non-null parent at all.
267 268 # filelog abuses the parent order as flag to mark some instances of
268 269 # meta-encoded files, so allow it to disable this behavior.
269 270 canonical_parent_order = attr.ib(default=False)
270 271 # can ellipsis commit be used
271 272 enable_ellipsis = attr.ib(default=False)
272 273
273 274 def copy(self):
274 275 new = super().copy()
275 276 new.compression_engine_options = self.compression_engine_options.copy()
276 277 return new
277 278
278 279
279 280 @attr.s()
280 281 class DataConfig(_Config):
281 282 """Hold configuration value about how the revlog data are read"""
282 283
283 284 # should we try to open the "pending" version of the revlog
284 285 try_pending = attr.ib(default=False)
285 286 # should we try to open the "splitted" version of the revlog
286 287 try_split = attr.ib(default=False)
287 288 # When True, indexfile should be opened with checkambig=True at writing,
288 289 # to avoid file stat ambiguity.
289 290 check_ambig = attr.ib(default=False)
290 291
291 292 # If true, use mmap instead of reading to deal with large index
292 293 mmap_large_index = attr.ib(default=False)
293 294 # how much data is large
294 295 mmap_index_threshold = attr.ib(default=None)
295 296 # How much data to read and cache into the raw revlog data cache.
296 297 chunk_cache_size = attr.ib(default=65536)
297 298
298 299 # The size of the uncompressed cache compared to the largest revision seen.
299 300 uncompressed_cache_factor = attr.ib(default=None)
300 301
301 302 # The number of chunk cached
302 303 uncompressed_cache_count = attr.ib(default=None)
303 304
304 305 # Allow sparse reading of the revlog data
305 306 with_sparse_read = attr.ib(default=False)
306 307 # minimal density of a sparse read chunk
307 308 sr_density_threshold = attr.ib(default=0.50)
308 309 # minimal size of data we skip when performing sparse read
309 310 sr_min_gap_size = attr.ib(default=262144)
310 311
311 312 # are delta encoded against arbitrary bases.
312 313 generaldelta = attr.ib(default=False)
313 314
314 315
315 316 @attr.s()
316 317 class DeltaConfig(_Config):
317 318 """Hold configuration value about how new delta are computed
318 319
319 320 Some attributes are duplicated from DataConfig to help havign each object
320 321 self contained.
321 322 """
322 323
323 324 # can delta be encoded against arbitrary bases.
324 325 general_delta = attr.ib(default=False)
325 326 # Allow sparse writing of the revlog data
326 327 sparse_revlog = attr.ib(default=False)
327 328 # maximum length of a delta chain
328 329 max_chain_len = attr.ib(default=None)
329 330 # Maximum distance between delta chain base start and end
330 331 max_deltachain_span = attr.ib(default=-1)
331 332 # If `upper_bound_comp` is not None, this is the expected maximal gain from
332 333 # compression for the data content.
333 334 upper_bound_comp = attr.ib(default=None)
334 335 # Should we try a delta against both parent
335 336 delta_both_parents = attr.ib(default=True)
336 337 # Test delta base candidate group by chunk of this maximal size.
337 338 candidate_group_chunk_size = attr.ib(default=0)
338 339 # Should we display debug information about delta computation
339 340 debug_delta = attr.ib(default=False)
340 341 # trust incoming delta by default
341 342 lazy_delta = attr.ib(default=True)
342 343 # trust the base of incoming delta by default
343 344 lazy_delta_base = attr.ib(default=False)
344 345
345 346
346 347 class _InnerRevlog:
347 348 """An inner layer of the revlog object
348 349
349 350 That layer exist to be able to delegate some operation to Rust, its
350 351 boundaries are arbitrary and based on what we can delegate to Rust.
351 352 """
352 353
353 354 def __init__(
354 355 self,
355 356 opener,
356 357 index,
357 358 index_file,
358 359 data_file,
359 360 sidedata_file,
360 361 inline,
361 362 data_config,
362 363 delta_config,
363 364 feature_config,
364 365 chunk_cache,
365 366 default_compression_header,
366 367 ):
367 368 self.opener = opener
368 369 self.index = index
369 370
370 371 self.index_file = index_file
371 372 self.data_file = data_file
372 373 self.sidedata_file = sidedata_file
373 374 self.inline = inline
374 375 self.data_config = data_config
375 376 self.delta_config = delta_config
376 377 self.feature_config = feature_config
377 378
378 379 # used during diverted write.
379 380 self._orig_index_file = None
380 381
381 382 self._default_compression_header = default_compression_header
382 383
383 384 # index
384 385
385 386 # 3-tuple of file handles being used for active writing.
386 387 self._writinghandles = None
387 388
388 389 self._segmentfile = randomaccessfile.randomaccessfile(
389 390 self.opener,
390 391 (self.index_file if self.inline else self.data_file),
391 392 self.data_config.chunk_cache_size,
392 393 chunk_cache,
393 394 )
394 395 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
395 396 self.opener,
396 397 self.sidedata_file,
397 398 self.data_config.chunk_cache_size,
398 399 )
399 400
400 401 # revlog header -> revlog compressor
401 402 self._decompressors = {}
402 403 # 3-tuple of (node, rev, text) for a raw revision.
403 404 self._revisioncache = None
404 405
405 406 # cache some uncompressed chunks
406 407 # rev → uncompressed_chunk
407 408 #
408 409 # the max cost is dynamically updated to be proportionnal to the
409 410 # size of revision we actually encounter.
410 411 self._uncompressed_chunk_cache = None
411 412 if self.data_config.uncompressed_cache_factor is not None:
412 413 self._uncompressed_chunk_cache = util.lrucachedict(
413 414 self.data_config.uncompressed_cache_count,
414 415 maxcost=65536, # some arbitrary initial value
415 416 )
416 417
417 418 self._delay_buffer = None
418 419
419 420 def __len__(self):
420 421 return len(self.index)
421 422
422 423 def clear_cache(self):
423 424 assert not self.is_delaying
424 425 self._revisioncache = None
425 426 if self._uncompressed_chunk_cache is not None:
426 427 self._uncompressed_chunk_cache.clear()
427 428 self._segmentfile.clear_cache()
428 429 self._segmentfile_sidedata.clear_cache()
429 430
430 431 @property
431 432 def canonical_index_file(self):
432 433 if self._orig_index_file is not None:
433 434 return self._orig_index_file
434 435 return self.index_file
435 436
436 437 @property
437 438 def is_delaying(self):
438 439 """is the revlog is currently delaying the visibility of written data?
439 440
440 441 The delaying mechanism can be either in-memory or written on disk in a
441 442 side-file."""
442 443 return (self._delay_buffer is not None) or (
443 444 self._orig_index_file is not None
444 445 )
445 446
446 447 # Derived from index values.
447 448
448 449 def start(self, rev):
449 450 """the offset of the data chunk for this revision"""
450 451 return int(self.index[rev][0] >> 16)
451 452
452 453 def length(self, rev):
453 454 """the length of the data chunk for this revision"""
454 455 return self.index[rev][1]
455 456
456 457 def end(self, rev):
457 458 """the end of the data chunk for this revision"""
458 459 return self.start(rev) + self.length(rev)
459 460
460 461 def deltaparent(self, rev):
461 462 """return deltaparent of the given revision"""
462 463 base = self.index[rev][3]
463 464 if base == rev:
464 465 return nullrev
465 466 elif self.delta_config.general_delta:
466 467 return base
467 468 else:
468 469 return rev - 1
469 470
470 471 def issnapshot(self, rev):
471 472 """tells whether rev is a snapshot"""
472 473 if not self.delta_config.sparse_revlog:
473 474 return self.deltaparent(rev) == nullrev
474 475 elif hasattr(self.index, 'issnapshot'):
475 476 # directly assign the method to cache the testing and access
476 477 self.issnapshot = self.index.issnapshot
477 478 return self.issnapshot(rev)
478 479 if rev == nullrev:
479 480 return True
480 481 entry = self.index[rev]
481 482 base = entry[3]
482 483 if base == rev:
483 484 return True
484 485 if base == nullrev:
485 486 return True
486 487 p1 = entry[5]
487 488 while self.length(p1) == 0:
488 489 b = self.deltaparent(p1)
489 490 if b == p1:
490 491 break
491 492 p1 = b
492 493 p2 = entry[6]
493 494 while self.length(p2) == 0:
494 495 b = self.deltaparent(p2)
495 496 if b == p2:
496 497 break
497 498 p2 = b
498 499 if base == p1 or base == p2:
499 500 return False
500 501 return self.issnapshot(base)
501 502
502 503 def _deltachain(self, rev, stoprev=None):
503 504 """Obtain the delta chain for a revision.
504 505
505 506 ``stoprev`` specifies a revision to stop at. If not specified, we
506 507 stop at the base of the chain.
507 508
508 509 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
509 510 revs in ascending order and ``stopped`` is a bool indicating whether
510 511 ``stoprev`` was hit.
511 512 """
512 513 generaldelta = self.delta_config.general_delta
513 514 # Try C implementation.
514 515 try:
515 516 return self.index.deltachain(rev, stoprev, generaldelta)
516 517 except AttributeError:
517 518 pass
518 519
519 520 chain = []
520 521
521 522 # Alias to prevent attribute lookup in tight loop.
522 523 index = self.index
523 524
524 525 iterrev = rev
525 526 e = index[iterrev]
526 527 while iterrev != e[3] and iterrev != stoprev:
527 528 chain.append(iterrev)
528 529 if generaldelta:
529 530 iterrev = e[3]
530 531 else:
531 532 iterrev -= 1
532 533 e = index[iterrev]
533 534
534 535 if iterrev == stoprev:
535 536 stopped = True
536 537 else:
537 538 chain.append(iterrev)
538 539 stopped = False
539 540
540 541 chain.reverse()
541 542 return chain, stopped
542 543
543 544 @util.propertycache
544 545 def _compressor(self):
545 546 engine = util.compengines[self.feature_config.compression_engine]
546 547 return engine.revlogcompressor(
547 548 self.feature_config.compression_engine_options
548 549 )
549 550
550 551 @util.propertycache
551 552 def _decompressor(self):
552 553 """the default decompressor"""
553 554 if self._default_compression_header is None:
554 555 return None
555 556 t = self._default_compression_header
556 557 c = self._get_decompressor(t)
557 558 return c.decompress
558 559
559 560 def _get_decompressor(self, t):
560 561 try:
561 562 compressor = self._decompressors[t]
562 563 except KeyError:
563 564 try:
564 565 engine = util.compengines.forrevlogheader(t)
565 566 compressor = engine.revlogcompressor(
566 567 self.feature_config.compression_engine_options
567 568 )
568 569 self._decompressors[t] = compressor
569 570 except KeyError:
570 571 raise error.RevlogError(
571 572 _(b'unknown compression type %s') % binascii.hexlify(t)
572 573 )
573 574 return compressor
574 575
575 576 def compress(self, data):
576 577 """Generate a possibly-compressed representation of data."""
577 578 if not data:
578 579 return b'', data
579 580
580 581 compressed = self._compressor.compress(data)
581 582
582 583 if compressed:
583 584 # The revlog compressor added the header in the returned data.
584 585 return b'', compressed
585 586
586 587 if data[0:1] == b'\0':
587 588 return b'', data
588 589 return b'u', data
589 590
590 591 def decompress(self, data):
591 592 """Decompress a revlog chunk.
592 593
593 594 The chunk is expected to begin with a header identifying the
594 595 format type so it can be routed to an appropriate decompressor.
595 596 """
596 597 if not data:
597 598 return data
598 599
599 600 # Revlogs are read much more frequently than they are written and many
600 601 # chunks only take microseconds to decompress, so performance is
601 602 # important here.
602 603 #
603 604 # We can make a few assumptions about revlogs:
604 605 #
605 606 # 1) the majority of chunks will be compressed (as opposed to inline
606 607 # raw data).
607 608 # 2) decompressing *any* data will likely by at least 10x slower than
608 609 # returning raw inline data.
609 610 # 3) we want to prioritize common and officially supported compression
610 611 # engines
611 612 #
612 613 # It follows that we want to optimize for "decompress compressed data
613 614 # when encoded with common and officially supported compression engines"
614 615 # case over "raw data" and "data encoded by less common or non-official
615 616 # compression engines." That is why we have the inline lookup first
616 617 # followed by the compengines lookup.
617 618 #
618 619 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
619 620 # compressed chunks. And this matters for changelog and manifest reads.
620 621 t = data[0:1]
621 622
622 623 if t == b'x':
623 624 try:
624 625 return _zlibdecompress(data)
625 626 except zlib.error as e:
626 627 raise error.RevlogError(
627 628 _(b'revlog decompress error: %s')
628 629 % stringutil.forcebytestr(e)
629 630 )
630 631 # '\0' is more common than 'u' so it goes first.
631 632 elif t == b'\0':
632 633 return data
633 634 elif t == b'u':
634 635 return util.buffer(data, 1)
635 636
636 637 compressor = self._get_decompressor(t)
637 638
638 639 return compressor.decompress(data)
639 640
640 641 @contextlib.contextmanager
641 642 def reading(self):
642 643 """Context manager that keeps data and sidedata files open for reading"""
643 644 if len(self.index) == 0:
644 645 yield # nothing to be read
645 646 elif self._delay_buffer is not None and self.inline:
646 647 msg = "revlog with delayed write should not be inline"
647 648 raise error.ProgrammingError(msg)
648 649 else:
649 650 with self._segmentfile.reading():
650 651 with self._segmentfile_sidedata.reading():
651 652 yield
652 653
653 654 @property
654 655 def is_writing(self):
655 656 """True is a writing context is open"""
656 657 return self._writinghandles is not None
657 658
658 659 @property
659 660 def is_open(self):
660 661 """True if any file handle is being held
661 662
662 663 Used for assert and debug in the python code"""
663 664 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
664 665
665 666 @contextlib.contextmanager
666 667 def writing(self, transaction, data_end=None, sidedata_end=None):
667 668 """Open the revlog files for writing
668 669
669 670 Add content to a revlog should be done within such context.
670 671 """
671 672 if self.is_writing:
672 673 yield
673 674 else:
674 675 ifh = dfh = sdfh = None
675 676 try:
676 677 r = len(self.index)
677 678 # opening the data file.
678 679 dsize = 0
679 680 if r:
680 681 dsize = self.end(r - 1)
681 682 dfh = None
682 683 if not self.inline:
683 684 try:
684 685 dfh = self.opener(self.data_file, mode=b"r+")
685 686 if data_end is None:
686 687 dfh.seek(0, os.SEEK_END)
687 688 else:
688 689 dfh.seek(data_end, os.SEEK_SET)
689 690 except FileNotFoundError:
690 691 dfh = self.opener(self.data_file, mode=b"w+")
691 692 transaction.add(self.data_file, dsize)
692 693 if self.sidedata_file is not None:
693 694 assert sidedata_end is not None
694 695 # revlog-v2 does not inline, help Pytype
695 696 assert dfh is not None
696 697 try:
697 698 sdfh = self.opener(self.sidedata_file, mode=b"r+")
698 699 dfh.seek(sidedata_end, os.SEEK_SET)
699 700 except FileNotFoundError:
700 701 sdfh = self.opener(self.sidedata_file, mode=b"w+")
701 702 transaction.add(self.sidedata_file, sidedata_end)
702 703
703 704 # opening the index file.
704 705 isize = r * self.index.entry_size
705 706 ifh = self.__index_write_fp()
706 707 if self.inline:
707 708 transaction.add(self.index_file, dsize + isize)
708 709 else:
709 710 transaction.add(self.index_file, isize)
710 711 # exposing all file handle for writing.
711 712 self._writinghandles = (ifh, dfh, sdfh)
712 713 self._segmentfile.writing_handle = ifh if self.inline else dfh
713 714 self._segmentfile_sidedata.writing_handle = sdfh
714 715 yield
715 716 finally:
716 717 self._writinghandles = None
717 718 self._segmentfile.writing_handle = None
718 719 self._segmentfile_sidedata.writing_handle = None
719 720 if dfh is not None:
720 721 dfh.close()
721 722 if sdfh is not None:
722 723 sdfh.close()
723 724 # closing the index file last to avoid exposing referent to
724 725 # potential unflushed data content.
725 726 if ifh is not None:
726 727 ifh.close()
727 728
728 729 def __index_write_fp(self, index_end=None):
729 730 """internal method to open the index file for writing
730 731
731 732 You should not use this directly and use `_writing` instead
732 733 """
733 734 try:
734 735 if self._delay_buffer is None:
735 736 f = self.opener(
736 737 self.index_file,
737 738 mode=b"r+",
738 739 checkambig=self.data_config.check_ambig,
739 740 )
740 741 else:
741 742 # check_ambig affect we way we open file for writing, however
742 743 # here, we do not actually open a file for writting as write
743 744 # will appened to a delay_buffer. So check_ambig is not
744 745 # meaningful and unneeded here.
745 746 f = randomaccessfile.appender(
746 747 self.opener, self.index_file, b"r+", self._delay_buffer
747 748 )
748 749 if index_end is None:
749 750 f.seek(0, os.SEEK_END)
750 751 else:
751 752 f.seek(index_end, os.SEEK_SET)
752 753 return f
753 754 except FileNotFoundError:
754 755 if self._delay_buffer is None:
755 756 return self.opener(
756 757 self.index_file,
757 758 mode=b"w+",
758 759 checkambig=self.data_config.check_ambig,
759 760 )
760 761 else:
761 762 return randomaccessfile.appender(
762 763 self.opener, self.index_file, b"w+", self._delay_buffer
763 764 )
764 765
765 766 def __index_new_fp(self):
766 767 """internal method to create a new index file for writing
767 768
768 769 You should not use this unless you are upgrading from inline revlog
769 770 """
770 771 return self.opener(
771 772 self.index_file,
772 773 mode=b"w",
773 774 checkambig=self.data_config.check_ambig,
774 775 atomictemp=True,
775 776 )
776 777
777 778 def split_inline(self, tr, header, new_index_file_path=None):
778 779 """split the data of an inline revlog into an index and a data file"""
779 780 assert self._delay_buffer is None
780 781 existing_handles = False
781 782 if self._writinghandles is not None:
782 783 existing_handles = True
783 784 fp = self._writinghandles[0]
784 785 fp.flush()
785 786 fp.close()
786 787 # We can't use the cached file handle after close(). So prevent
787 788 # its usage.
788 789 self._writinghandles = None
789 790 self._segmentfile.writing_handle = None
790 791 # No need to deal with sidedata writing handle as it is only
791 792 # relevant with revlog-v2 which is never inline, not reaching
792 793 # this code
793 794
794 795 new_dfh = self.opener(self.data_file, mode=b"w+")
795 796 new_dfh.truncate(0) # drop any potentially existing data
796 797 try:
797 798 with self.reading():
798 799 for r in range(len(self.index)):
799 800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 801 new_dfh.flush()
801 802
802 803 if new_index_file_path is not None:
803 804 self.index_file = new_index_file_path
804 805 with self.__index_new_fp() as fp:
805 806 self.inline = False
806 807 for i in range(len(self.index)):
807 808 e = self.index.entry_binary(i)
808 809 if i == 0:
809 810 packed_header = self.index.pack_header(header)
810 811 e = packed_header + e
811 812 fp.write(e)
812 813
813 814 # If we don't use side-write, the temp file replace the real
814 815 # index when we exit the context manager
815 816
816 817 self._segmentfile = randomaccessfile.randomaccessfile(
817 818 self.opener,
818 819 self.data_file,
819 820 self.data_config.chunk_cache_size,
820 821 )
821 822
822 823 if existing_handles:
823 824 # switched from inline to conventional reopen the index
824 825 ifh = self.__index_write_fp()
825 826 self._writinghandles = (ifh, new_dfh, None)
826 827 self._segmentfile.writing_handle = new_dfh
827 828 new_dfh = None
828 829 # No need to deal with sidedata writing handle as it is only
829 830 # relevant with revlog-v2 which is never inline, not reaching
830 831 # this code
831 832 finally:
832 833 if new_dfh is not None:
833 834 new_dfh.close()
834 835 return self.index_file
835 836
836 837 def get_segment_for_revs(self, startrev, endrev):
837 838 """Obtain a segment of raw data corresponding to a range of revisions.
838 839
839 840 Accepts the start and end revisions and an optional already-open
840 841 file handle to be used for reading. If the file handle is read, its
841 842 seek position will not be preserved.
842 843
843 844 Requests for data may be satisfied by a cache.
844 845
845 846 Returns a 2-tuple of (offset, data) for the requested range of
846 847 revisions. Offset is the integer offset from the beginning of the
847 848 revlog and data is a str or buffer of the raw byte data.
848 849
849 850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 851 to determine where each revision's data begins and ends.
851 852
852 853 API: we should consider making this a private part of the InnerRevlog
853 854 at some point.
854 855 """
855 856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 857 # (functions are expensive).
857 858 index = self.index
858 859 istart = index[startrev]
859 860 start = int(istart[0] >> 16)
860 861 if startrev == endrev:
861 862 end = start + istart[1]
862 863 else:
863 864 iend = index[endrev]
864 865 end = int(iend[0] >> 16) + iend[1]
865 866
866 867 if self.inline:
867 868 start += (startrev + 1) * self.index.entry_size
868 869 end += (endrev + 1) * self.index.entry_size
869 870 length = end - start
870 871
871 872 return start, self._segmentfile.read_chunk(start, length)
872 873
873 874 def _chunk(self, rev):
874 875 """Obtain a single decompressed chunk for a revision.
875 876
876 877 Accepts an integer revision and an optional already-open file handle
877 878 to be used for reading. If used, the seek position of the file will not
878 879 be preserved.
879 880
880 881 Returns a str holding uncompressed data for the requested revision.
881 882 """
882 883 if self._uncompressed_chunk_cache is not None:
883 884 uncomp = self._uncompressed_chunk_cache.get(rev)
884 885 if uncomp is not None:
885 886 return uncomp
886 887
887 888 compression_mode = self.index[rev][10]
888 889 data = self.get_segment_for_revs(rev, rev)[1]
889 890 if compression_mode == COMP_MODE_PLAIN:
890 891 uncomp = data
891 892 elif compression_mode == COMP_MODE_DEFAULT:
892 893 uncomp = self._decompressor(data)
893 894 elif compression_mode == COMP_MODE_INLINE:
894 895 uncomp = self.decompress(data)
895 896 else:
896 897 msg = b'unknown compression mode %d'
897 898 msg %= compression_mode
898 899 raise error.RevlogError(msg)
899 900 if self._uncompressed_chunk_cache is not None:
900 901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 902 return uncomp
902 903
903 904 def _chunks(self, revs, targetsize=None):
904 905 """Obtain decompressed chunks for the specified revisions.
905 906
906 907 Accepts an iterable of numeric revisions that are assumed to be in
907 908 ascending order. Also accepts an optional already-open file handle
908 909 to be used for reading. If used, the seek position of the file will
909 910 not be preserved.
910 911
911 912 This function is similar to calling ``self._chunk()`` multiple times,
912 913 but is faster.
913 914
914 915 Returns a list with decompressed data for each requested revision.
915 916 """
916 917 if not revs:
917 918 return []
918 919 start = self.start
919 920 length = self.length
920 921 inline = self.inline
921 922 iosize = self.index.entry_size
922 923 buffer = util.buffer
923 924
924 925 fetched_revs = []
925 926 fadd = fetched_revs.append
926 927
927 928 chunks = []
928 929 ladd = chunks.append
929 930
930 931 if self._uncompressed_chunk_cache is None:
931 932 fetched_revs = revs
932 933 else:
933 934 for rev in revs:
934 935 cached_value = self._uncompressed_chunk_cache.get(rev)
935 936 if cached_value is None:
936 937 fadd(rev)
937 938 else:
938 939 ladd((rev, cached_value))
939 940
940 941 if not fetched_revs:
941 942 slicedchunks = ()
942 943 elif not self.data_config.with_sparse_read:
943 944 slicedchunks = (fetched_revs,)
944 945 else:
945 946 slicedchunks = deltautil.slicechunk(
946 947 self,
947 948 fetched_revs,
948 949 targetsize=targetsize,
949 950 )
950 951
951 952 for revschunk in slicedchunks:
952 953 firstrev = revschunk[0]
953 954 # Skip trailing revisions with empty diff
954 955 for lastrev in revschunk[::-1]:
955 956 if length(lastrev) != 0:
956 957 break
957 958
958 959 try:
959 960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 961 except OverflowError:
961 962 # issue4215 - we can't cache a run of chunks greater than
962 963 # 2G on Windows
963 964 for rev in revschunk:
964 965 ladd((rev, self._chunk(rev)))
965 966
966 967 decomp = self.decompress
967 968 # self._decompressor might be None, but will not be used in that case
968 969 def_decomp = self._decompressor
969 970 for rev in revschunk:
970 971 chunkstart = start(rev)
971 972 if inline:
972 973 chunkstart += (rev + 1) * iosize
973 974 chunklength = length(rev)
974 975 comp_mode = self.index[rev][10]
975 976 c = buffer(data, chunkstart - offset, chunklength)
976 977 if comp_mode == COMP_MODE_PLAIN:
977 978 c = c
978 979 elif comp_mode == COMP_MODE_INLINE:
979 980 c = decomp(c)
980 981 elif comp_mode == COMP_MODE_DEFAULT:
981 982 c = def_decomp(c)
982 983 else:
983 984 msg = b'unknown compression mode %d'
984 985 msg %= comp_mode
985 986 raise error.RevlogError(msg)
986 987 ladd((rev, c))
987 988 if self._uncompressed_chunk_cache is not None:
988 989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989 990
990 991 chunks.sort()
991 992 return [x[1] for x in chunks]
992 993
993 994 def raw_text(self, node, rev):
994 995 """return the possibly unvalidated rawtext for a revision
995 996
996 997 returns (rev, rawtext, validated)
997 998 """
998 999
999 1000 # revision in the cache (could be useful to apply delta)
1000 1001 cachedrev = None
1001 1002 # An intermediate text to apply deltas to
1002 1003 basetext = None
1003 1004
1004 1005 # Check if we have the entry in cache
1005 1006 # The cache entry looks like (node, rev, rawtext)
1006 1007 if self._revisioncache:
1007 1008 cachedrev = self._revisioncache[1]
1008 1009
1009 1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 1011 if stopped:
1011 1012 basetext = self._revisioncache[2]
1012 1013
1013 1014 # drop cache to save memory, the caller is expected to
1014 1015 # update self._inner._revisioncache after validating the text
1015 1016 self._revisioncache = None
1016 1017
1017 1018 targetsize = None
1018 1019 rawsize = self.index[rev][2]
1019 1020 if 0 <= rawsize:
1020 1021 targetsize = 4 * rawsize
1021 1022
1022 1023 if self._uncompressed_chunk_cache is not None:
1023 1024 # dynamically update the uncompressed_chunk_cache size to the
1024 1025 # largest revision we saw in this revlog.
1025 1026 factor = self.data_config.uncompressed_cache_factor
1026 1027 candidate_size = rawsize * factor
1027 1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1029 1030
1030 1031 bins = self._chunks(chain, targetsize=targetsize)
1031 1032 if basetext is None:
1032 1033 basetext = bytes(bins[0])
1033 1034 bins = bins[1:]
1034 1035
1035 1036 rawtext = mdiff.patches(basetext, bins)
1036 1037 del basetext # let us have a chance to free memory early
1037 1038 return (rev, rawtext, False)
1038 1039
1039 1040 def sidedata(self, rev, sidedata_end):
1040 1041 """Return the sidedata for a given revision number."""
1041 1042 index_entry = self.index[rev]
1042 1043 sidedata_offset = index_entry[8]
1043 1044 sidedata_size = index_entry[9]
1044 1045
1045 1046 if self.inline:
1046 1047 sidedata_offset += self.index.entry_size * (1 + rev)
1047 1048 if sidedata_size == 0:
1048 1049 return {}
1049 1050
1050 1051 if sidedata_end < sidedata_offset + sidedata_size:
1051 1052 filename = self.sidedata_file
1052 1053 end = sidedata_end
1053 1054 offset = sidedata_offset
1054 1055 length = sidedata_size
1055 1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 1057 raise error.RevlogError(m)
1057 1058
1058 1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 1060 sidedata_offset, sidedata_size
1060 1061 )
1061 1062
1062 1063 comp = self.index[rev][11]
1063 1064 if comp == COMP_MODE_PLAIN:
1064 1065 segment = comp_segment
1065 1066 elif comp == COMP_MODE_DEFAULT:
1066 1067 segment = self._decompressor(comp_segment)
1067 1068 elif comp == COMP_MODE_INLINE:
1068 1069 segment = self.decompress(comp_segment)
1069 1070 else:
1070 1071 msg = b'unknown compression mode %d'
1071 1072 msg %= comp
1072 1073 raise error.RevlogError(msg)
1073 1074
1074 1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 1076 return sidedata
1076 1077
1077 1078 def write_entry(
1078 1079 self,
1079 1080 transaction,
1080 1081 entry,
1081 1082 data,
1082 1083 link,
1083 1084 offset,
1084 1085 sidedata,
1085 1086 sidedata_offset,
1086 1087 index_end,
1087 1088 data_end,
1088 1089 sidedata_end,
1089 1090 ):
1090 1091 # Files opened in a+ mode have inconsistent behavior on various
1091 1092 # platforms. Windows requires that a file positioning call be made
1092 1093 # when the file handle transitions between reads and writes. See
1093 1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 1095 # platforms, Python or the platform itself can be buggy. Some versions
1095 1096 # of Solaris have been observed to not append at the end of the file
1096 1097 # if the file was seeked to before the end. See issue4943 for more.
1097 1098 #
1098 1099 # We work around this issue by inserting a seek() before writing.
1099 1100 # Note: This is likely not necessary on Python 3. However, because
1100 1101 # the file handle is reused for reads and may be seeked there, we need
1101 1102 # to be careful before changing this.
1102 1103 if self._writinghandles is None:
1103 1104 msg = b'adding revision outside `revlog._writing` context'
1104 1105 raise error.ProgrammingError(msg)
1105 1106 ifh, dfh, sdfh = self._writinghandles
1106 1107 if index_end is None:
1107 1108 ifh.seek(0, os.SEEK_END)
1108 1109 else:
1109 1110 ifh.seek(index_end, os.SEEK_SET)
1110 1111 if dfh:
1111 1112 if data_end is None:
1112 1113 dfh.seek(0, os.SEEK_END)
1113 1114 else:
1114 1115 dfh.seek(data_end, os.SEEK_SET)
1115 1116 if sdfh:
1116 1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1117 1118
1118 1119 curr = len(self.index) - 1
1119 1120 if not self.inline:
1120 1121 transaction.add(self.data_file, offset)
1121 1122 if self.sidedata_file:
1122 1123 transaction.add(self.sidedata_file, sidedata_offset)
1123 1124 transaction.add(self.canonical_index_file, curr * len(entry))
1124 1125 if data[0]:
1125 1126 dfh.write(data[0])
1126 1127 dfh.write(data[1])
1127 1128 if sidedata:
1128 1129 sdfh.write(sidedata)
1129 1130 if self._delay_buffer is None:
1130 1131 ifh.write(entry)
1131 1132 else:
1132 1133 self._delay_buffer.append(entry)
1133 1134 elif self._delay_buffer is not None:
1134 1135 msg = b'invalid delayed write on inline revlog'
1135 1136 raise error.ProgrammingError(msg)
1136 1137 else:
1137 1138 offset += curr * self.index.entry_size
1138 1139 transaction.add(self.canonical_index_file, offset)
1139 1140 assert not sidedata
1140 1141 ifh.write(entry)
1141 1142 ifh.write(data[0])
1142 1143 ifh.write(data[1])
1143 1144 return (
1144 1145 ifh.tell(),
1145 1146 dfh.tell() if dfh else None,
1146 1147 sdfh.tell() if sdfh else None,
1147 1148 )
1148 1149
1149 1150 def _divert_index(self):
1150 1151 return self.index_file + b'.a'
1151 1152
1152 1153 def delay(self):
1153 1154 assert not self.is_open
1154 1155 if self.inline:
1155 1156 msg = "revlog with delayed write should not be inline"
1156 1157 raise error.ProgrammingError(msg)
1157 1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1158 1159 # delay or divert already in place
1159 1160 return None
1160 1161 elif len(self.index) == 0:
1161 1162 self._orig_index_file = self.index_file
1162 1163 self.index_file = self._divert_index()
1163 1164 assert self._orig_index_file is not None
1164 1165 assert self.index_file is not None
1165 1166 if self.opener.exists(self.index_file):
1166 1167 self.opener.unlink(self.index_file)
1167 1168 return self.index_file
1168 1169 else:
1169 1170 self._delay_buffer = []
1170 1171 return None
1171 1172
1172 1173 def write_pending(self):
1173 1174 assert not self.is_open
1174 1175 if self.inline:
1175 1176 msg = "revlog with delayed write should not be inline"
1176 1177 raise error.ProgrammingError(msg)
1177 1178 if self._orig_index_file is not None:
1178 1179 return None, True
1179 1180 any_pending = False
1180 1181 pending_index_file = self._divert_index()
1181 1182 if self.opener.exists(pending_index_file):
1182 1183 self.opener.unlink(pending_index_file)
1183 1184 util.copyfile(
1184 1185 self.opener.join(self.index_file),
1185 1186 self.opener.join(pending_index_file),
1186 1187 )
1187 1188 if self._delay_buffer:
1188 1189 with self.opener(pending_index_file, b'r+') as ifh:
1189 1190 ifh.seek(0, os.SEEK_END)
1190 1191 ifh.write(b"".join(self._delay_buffer))
1191 1192 any_pending = True
1192 1193 self._delay_buffer = None
1193 1194 self._orig_index_file = self.index_file
1194 1195 self.index_file = pending_index_file
1195 1196 return self.index_file, any_pending
1196 1197
1197 1198 def finalize_pending(self):
1198 1199 assert not self.is_open
1199 1200 if self.inline:
1200 1201 msg = "revlog with delayed write should not be inline"
1201 1202 raise error.ProgrammingError(msg)
1202 1203
1203 1204 delay = self._delay_buffer is not None
1204 1205 divert = self._orig_index_file is not None
1205 1206
1206 1207 if delay and divert:
1207 1208 assert False, "unreachable"
1208 1209 elif delay:
1209 1210 if self._delay_buffer:
1210 1211 with self.opener(self.index_file, b'r+') as ifh:
1211 1212 ifh.seek(0, os.SEEK_END)
1212 1213 ifh.write(b"".join(self._delay_buffer))
1213 1214 self._delay_buffer = None
1214 1215 elif divert:
1215 1216 if self.opener.exists(self.index_file):
1216 1217 self.opener.rename(
1217 1218 self.index_file,
1218 1219 self._orig_index_file,
1219 1220 checkambig=True,
1220 1221 )
1221 1222 self.index_file = self._orig_index_file
1222 1223 self._orig_index_file = None
1223 1224 else:
1224 1225 msg = b"not delay or divert found on this revlog"
1225 1226 raise error.ProgrammingError(msg)
1226 1227 return self.canonical_index_file
1227 1228
1228 1229
1229 1230 class revlog:
1230 1231 """
1231 1232 the underlying revision storage object
1232 1233
1233 1234 A revlog consists of two parts, an index and the revision data.
1234 1235
1235 1236 The index is a file with a fixed record size containing
1236 1237 information on each revision, including its nodeid (hash), the
1237 1238 nodeids of its parents, the position and offset of its data within
1238 1239 the data file, and the revision it's based on. Finally, each entry
1239 1240 contains a linkrev entry that can serve as a pointer to external
1240 1241 data.
1241 1242
1242 1243 The revision data itself is a linear collection of data chunks.
1243 1244 Each chunk represents a revision and is usually represented as a
1244 1245 delta against the previous chunk. To bound lookup time, runs of
1245 1246 deltas are limited to about 2 times the length of the original
1246 1247 version data. This makes retrieval of a version proportional to
1247 1248 its size, or O(1) relative to the number of revisions.
1248 1249
1249 1250 Both pieces of the revlog are written to in an append-only
1250 1251 fashion, which means we never need to rewrite a file to insert or
1251 1252 remove data, and can use some simple techniques to avoid the need
1252 1253 for locking while reading.
1253 1254
1254 1255 If checkambig, indexfile is opened with checkambig=True at
1255 1256 writing, to avoid file stat ambiguity.
1256 1257
1257 1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1258 1259 index will be mmapped rather than read if it is larger than the
1259 1260 configured threshold.
1260 1261
1261 1262 If censorable is True, the revlog can have censored revisions.
1262 1263
1263 1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1264 1265 compression for the data content.
1265 1266
1266 1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1267 1268 file handle, a filename, and an expected position. It should check whether
1268 1269 the current position in the file handle is valid, and log/warn/fail (by
1269 1270 raising).
1270 1271
1271 1272 See mercurial/revlogutils/contants.py for details about the content of an
1272 1273 index entry.
1273 1274 """
1274 1275
1275 1276 _flagserrorclass = error.RevlogError
1276 1277
1277 1278 @staticmethod
1278 1279 def is_inline_index(header_bytes):
1279 1280 """Determine if a revlog is inline from the initial bytes of the index"""
1280 1281 if len(header_bytes) == 0:
1281 1282 return True
1282 1283
1283 1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1284 1285
1285 1286 _format_flags = header & ~0xFFFF
1286 1287 _format_version = header & 0xFFFF
1287 1288
1288 1289 features = FEATURES_BY_VERSION[_format_version]
1289 1290 return features[b'inline'](_format_flags)
1290 1291
1291 1292 def __init__(
1292 1293 self,
1293 1294 opener,
1294 1295 target,
1295 1296 radix,
1296 1297 postfix=None, # only exist for `tmpcensored` now
1297 1298 checkambig=False,
1298 1299 mmaplargeindex=False,
1299 1300 censorable=False,
1300 1301 upperboundcomp=None,
1301 1302 persistentnodemap=False,
1302 1303 concurrencychecker=None,
1303 1304 trypending=False,
1304 1305 try_split=False,
1305 1306 canonical_parent_order=True,
1306 1307 data_config=None,
1307 1308 delta_config=None,
1308 1309 feature_config=None,
1309 1310 may_inline=True, # may inline new revlog
1310 1311 ):
1311 1312 """
1312 1313 create a revlog object
1313 1314
1314 1315 opener is a function that abstracts the file opening operation
1315 1316 and can be used to implement COW semantics or the like.
1316 1317
1317 1318 `target`: a (KIND, ID) tuple that identify the content stored in
1318 1319 this revlog. It help the rest of the code to understand what the revlog
1319 1320 is about without having to resort to heuristic and index filename
1320 1321 analysis. Note: that this must be reliably be set by normal code, but
1321 1322 that test, debug, or performance measurement code might not set this to
1322 1323 accurate value.
1323 1324 """
1324 1325
1325 1326 self.radix = radix
1326 1327
1327 1328 self._docket_file = None
1328 1329 self._indexfile = None
1329 1330 self._datafile = None
1330 1331 self._sidedatafile = None
1331 1332 self._nodemap_file = None
1332 1333 self.postfix = postfix
1333 1334 self._trypending = trypending
1334 1335 self._try_split = try_split
1335 1336 self._may_inline = may_inline
1336 1337 self.opener = opener
1337 1338 if persistentnodemap:
1338 1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1339 1340
1340 1341 assert target[0] in ALL_KINDS
1341 1342 assert len(target) == 2
1342 1343 self.target = target
1343 1344 if feature_config is not None:
1344 1345 self.feature_config = feature_config.copy()
1345 1346 elif b'feature-config' in self.opener.options:
1346 1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1347 1348 else:
1348 1349 self.feature_config = FeatureConfig()
1349 1350 self.feature_config.censorable = censorable
1350 1351 self.feature_config.canonical_parent_order = canonical_parent_order
1351 1352 if data_config is not None:
1352 1353 self.data_config = data_config.copy()
1353 1354 elif b'data-config' in self.opener.options:
1354 1355 self.data_config = self.opener.options[b'data-config'].copy()
1355 1356 else:
1356 1357 self.data_config = DataConfig()
1357 1358 self.data_config.check_ambig = checkambig
1358 1359 self.data_config.mmap_large_index = mmaplargeindex
1359 1360 if delta_config is not None:
1360 1361 self.delta_config = delta_config.copy()
1361 1362 elif b'delta-config' in self.opener.options:
1362 1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1363 1364 else:
1364 1365 self.delta_config = DeltaConfig()
1365 1366 self.delta_config.upper_bound_comp = upperboundcomp
1366 1367
1367 1368 # Maps rev to chain base rev.
1368 1369 self._chainbasecache = util.lrucachedict(100)
1369 1370
1370 1371 self.index = None
1371 1372 self._docket = None
1372 1373 self._nodemap_docket = None
1373 1374 # Mapping of partial identifiers to full nodes.
1374 1375 self._pcache = {}
1375 1376
1376 1377 # other optionnals features
1377 1378
1378 1379 # Make copy of flag processors so each revlog instance can support
1379 1380 # custom flags.
1380 1381 self._flagprocessors = dict(flagutil.flagprocessors)
1381 1382 # prevent nesting of addgroup
1382 1383 self._adding_group = None
1383 1384
1384 1385 chunk_cache = self._loadindex()
1385 1386 self._load_inner(chunk_cache)
1386 1387 self._concurrencychecker = concurrencychecker
1387 1388
1388 1389 def _init_opts(self):
1389 1390 """process options (from above/config) to setup associated default revlog mode
1390 1391
1391 1392 These values might be affected when actually reading on disk information.
1392 1393
1393 1394 The relevant values are returned for use in _loadindex().
1394 1395
1395 1396 * newversionflags:
1396 1397 version header to use if we need to create a new revlog
1397 1398
1398 1399 * mmapindexthreshold:
1399 1400 minimal index size for start to use mmap
1400 1401
1401 1402 * force_nodemap:
1402 1403 force the usage of a "development" version of the nodemap code
1403 1404 """
1404 1405 opts = self.opener.options
1405 1406
1406 1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1407 1408 new_header = CHANGELOGV2
1408 1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1409 1410 self.feature_config.compute_rank = compute_rank
1410 1411 elif b'revlogv2' in opts:
1411 1412 new_header = REVLOGV2
1412 1413 elif b'revlogv1' in opts:
1413 1414 new_header = REVLOGV1
1414 1415 if self._may_inline:
1415 1416 new_header |= FLAG_INLINE_DATA
1416 1417 if b'generaldelta' in opts:
1417 1418 new_header |= FLAG_GENERALDELTA
1418 1419 elif b'revlogv0' in self.opener.options:
1419 1420 new_header = REVLOGV0
1420 1421 else:
1421 1422 new_header = REVLOG_DEFAULT_VERSION
1422 1423
1423 1424 mmapindexthreshold = None
1424 1425 if self.data_config.mmap_large_index:
1425 1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 1427 if self.feature_config.enable_ellipsis:
1427 1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428 1429
1429 1430 # revlog v0 doesn't have flag processors
1430 1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432 1433
1433 1434 chunk_cache_size = self.data_config.chunk_cache_size
1434 1435 if chunk_cache_size <= 0:
1435 1436 raise error.RevlogError(
1436 1437 _(b'revlog chunk cache size %r is not greater than 0')
1437 1438 % chunk_cache_size
1438 1439 )
1439 1440 elif chunk_cache_size & (chunk_cache_size - 1):
1440 1441 raise error.RevlogError(
1441 1442 _(b'revlog chunk cache size %r is not a power of 2')
1442 1443 % chunk_cache_size
1443 1444 )
1444 1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 1446 return new_header, mmapindexthreshold, force_nodemap
1446 1447
1447 1448 def _get_data(self, filepath, mmap_threshold, size=None):
1448 1449 """return a file content with or without mmap
1449 1450
1450 1451 If the file is missing return the empty string"""
1451 1452 try:
1452 1453 with self.opener(filepath) as fp:
1453 1454 if mmap_threshold is not None:
1454 1455 file_size = self.opener.fstat(fp).st_size
1455 1456 if file_size >= mmap_threshold:
1456 1457 if size is not None:
1457 1458 # avoid potentiel mmap crash
1458 1459 size = min(file_size, size)
1459 1460 # TODO: should .close() to release resources without
1460 1461 # relying on Python GC
1461 1462 if size is None:
1462 1463 return util.buffer(util.mmapread(fp))
1463 1464 else:
1464 1465 return util.buffer(util.mmapread(fp, size))
1465 1466 if size is None:
1466 1467 return fp.read()
1467 1468 else:
1468 1469 return fp.read(size)
1469 1470 except FileNotFoundError:
1470 1471 return b''
1471 1472
1472 1473 def get_streams(self, max_linkrev, force_inline=False):
1473 1474 """return a list of streams that represent this revlog
1474 1475
1475 1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1476 1477
1477 1478 This streams data for all revisions that refer to a changelog revision up
1478 1479 to `max_linkrev`.
1479 1480
1480 1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481 1482
1482 1483 It returns is a list of three-tuple:
1483 1484
1484 1485 [
1485 1486 (filename, bytes_stream, stream_size),
1486 1487
1487 1488 ]
1488 1489 """
1489 1490 n = len(self)
1490 1491 index = self.index
1491 1492 while n > 0:
1492 1493 linkrev = index[n - 1][4]
1493 1494 if linkrev < max_linkrev:
1494 1495 break
1495 1496 # note: this loop will rarely go through multiple iterations, since
1496 1497 # it only traverses commits created during the current streaming
1497 1498 # pull operation.
1498 1499 #
1499 1500 # If this become a problem, using a binary search should cap the
1500 1501 # runtime of this.
1501 1502 n = n - 1
1502 1503 if n == 0:
1503 1504 # no data to send
1504 1505 return []
1505 1506 index_size = n * index.entry_size
1506 1507 data_size = self.end(n - 1)
1507 1508
1508 1509 # XXX we might have been split (or stripped) since the object
1509 1510 # initialization, We need to close this race too, but having a way to
1510 1511 # pre-open the file we feed to the revlog and never closing them before
1511 1512 # we are done streaming.
1512 1513
1513 1514 if self._inline:
1514 1515
1515 1516 def get_stream():
1516 1517 with self.opener(self._indexfile, mode=b"r") as fp:
1517 1518 yield None
1518 1519 size = index_size + data_size
1519 1520 if size <= 65536:
1520 1521 yield fp.read(size)
1521 1522 else:
1522 1523 yield from util.filechunkiter(fp, limit=size)
1523 1524
1524 1525 inline_stream = get_stream()
1525 1526 next(inline_stream)
1526 1527 return [
1527 1528 (self._indexfile, inline_stream, index_size + data_size),
1528 1529 ]
1529 1530 elif force_inline:
1530 1531
1531 1532 def get_stream():
1532 1533 with self.reading():
1533 1534 yield None
1534 1535
1535 1536 for rev in range(n):
1536 1537 idx = self.index.entry_binary(rev)
1537 1538 if rev == 0 and self._docket is None:
1538 1539 # re-inject the inline flag
1539 1540 header = self._format_flags
1540 1541 header |= self._format_version
1541 1542 header |= FLAG_INLINE_DATA
1542 1543 header = self.index.pack_header(header)
1543 1544 idx = header + idx
1544 1545 yield idx
1545 1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546 1547
1547 1548 inline_stream = get_stream()
1548 1549 next(inline_stream)
1549 1550 return [
1550 1551 (self._indexfile, inline_stream, index_size + data_size),
1551 1552 ]
1552 1553 else:
1553 1554
1554 1555 def get_index_stream():
1555 1556 with self.opener(self._indexfile, mode=b"r") as fp:
1556 1557 yield None
1557 1558 if index_size <= 65536:
1558 1559 yield fp.read(index_size)
1559 1560 else:
1560 1561 yield from util.filechunkiter(fp, limit=index_size)
1561 1562
1562 1563 def get_data_stream():
1563 1564 with self._datafp() as fp:
1564 1565 yield None
1565 1566 if data_size <= 65536:
1566 1567 yield fp.read(data_size)
1567 1568 else:
1568 1569 yield from util.filechunkiter(fp, limit=data_size)
1569 1570
1570 1571 index_stream = get_index_stream()
1571 1572 next(index_stream)
1572 1573 data_stream = get_data_stream()
1573 1574 next(data_stream)
1574 1575 return [
1575 1576 (self._datafile, data_stream, data_size),
1576 1577 (self._indexfile, index_stream, index_size),
1577 1578 ]
1578 1579
1579 1580 def _loadindex(self, docket=None):
1580 1581
1581 1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582 1583
1583 1584 if self.postfix is not None:
1584 1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 1587 entry_point = b'%s.i.a' % self.radix
1587 1588 elif self._try_split and self.opener.exists(self._split_index_file):
1588 1589 entry_point = self._split_index_file
1589 1590 else:
1590 1591 entry_point = b'%s.i' % self.radix
1591 1592
1592 1593 if docket is not None:
1593 1594 self._docket = docket
1594 1595 self._docket_file = entry_point
1595 1596 else:
1596 1597 self._initempty = True
1597 1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 1599 if len(entry_data) > 0:
1599 1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 1601 self._initempty = False
1601 1602 else:
1602 1603 header = new_header
1603 1604
1604 1605 self._format_flags = header & ~0xFFFF
1605 1606 self._format_version = header & 0xFFFF
1606 1607
1607 1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 1609 if supported_flags is None:
1609 1610 msg = _(b'unknown version (%d) in revlog %s')
1610 1611 msg %= (self._format_version, self.display_id)
1611 1612 raise error.RevlogError(msg)
1612 1613 elif self._format_flags & ~supported_flags:
1613 1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 1615 display_flag = self._format_flags >> 16
1615 1616 msg %= (display_flag, self._format_version, self.display_id)
1616 1617 raise error.RevlogError(msg)
1617 1618
1618 1619 features = FEATURES_BY_VERSION[self._format_version]
1619 1620 self._inline = features[b'inline'](self._format_flags)
1620 1621 self.delta_config.general_delta = features[b'generaldelta'](
1621 1622 self._format_flags
1622 1623 )
1623 1624 self.feature_config.has_side_data = features[b'sidedata']
1624 1625
1625 1626 if not features[b'docket']:
1626 1627 self._indexfile = entry_point
1627 1628 index_data = entry_data
1628 1629 else:
1629 1630 self._docket_file = entry_point
1630 1631 if self._initempty:
1631 1632 self._docket = docketutil.default_docket(self, header)
1632 1633 else:
1633 1634 self._docket = docketutil.parse_docket(
1634 1635 self, entry_data, use_pending=self._trypending
1635 1636 )
1636 1637
1637 1638 if self._docket is not None:
1638 1639 self._indexfile = self._docket.index_filepath()
1639 1640 index_data = b''
1640 1641 index_size = self._docket.index_end
1641 1642 if index_size > 0:
1642 1643 index_data = self._get_data(
1643 1644 self._indexfile, mmapindexthreshold, size=index_size
1644 1645 )
1645 1646 if len(index_data) < index_size:
1646 1647 msg = _(b'too few index data for %s: got %d, expected %d')
1647 1648 msg %= (self.display_id, len(index_data), index_size)
1648 1649 raise error.RevlogError(msg)
1649 1650
1650 1651 self._inline = False
1651 1652 # generaldelta implied by version 2 revlogs.
1652 1653 self.delta_config.general_delta = True
1653 1654 # the logic for persistent nodemap will be dealt with within the
1654 1655 # main docket, so disable it for now.
1655 1656 self._nodemap_file = None
1656 1657
1657 1658 if self._docket is not None:
1658 1659 self._datafile = self._docket.data_filepath()
1659 1660 self._sidedatafile = self._docket.sidedata_filepath()
1660 1661 elif self.postfix is None:
1661 1662 self._datafile = b'%s.d' % self.radix
1662 1663 else:
1663 1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664 1665
1665 1666 self.nodeconstants = sha1nodeconstants
1666 1667 self.nullid = self.nodeconstants.nullid
1667 1668
1668 1669 # sparse-revlog can't be on without general-delta (issue6056)
1669 1670 if not self.delta_config.general_delta:
1670 1671 self.delta_config.sparse_revlog = False
1671 1672
1672 1673 self._storedeltachains = True
1673 1674
1674 1675 devel_nodemap = (
1675 1676 self._nodemap_file
1676 1677 and force_nodemap
1677 1678 and parse_index_v1_nodemap is not None
1678 1679 )
1679 1680
1680 1681 use_rust_index = False
1681 1682 if rustrevlog is not None:
1682 1683 if self._nodemap_file is not None:
1683 1684 use_rust_index = True
1684 1685 else:
1685 1686 use_rust_index = self.opener.options.get(b'rust.index')
1686 1687
1687 1688 self._parse_index = parse_index_v1
1688 1689 if self._format_version == REVLOGV0:
1689 1690 self._parse_index = revlogv0.parse_index_v0
1690 1691 elif self._format_version == REVLOGV2:
1691 1692 self._parse_index = parse_index_v2
1692 1693 elif self._format_version == CHANGELOGV2:
1693 1694 self._parse_index = parse_index_cl_v2
1694 1695 elif devel_nodemap:
1695 1696 self._parse_index = parse_index_v1_nodemap
1696 1697 elif use_rust_index:
1697 self._parse_index = parse_index_v1_mixed
1698 self._parse_index = functools.partial(
1699 parse_index_v1_mixed, default_header=new_header
1700 )
1698 1701 try:
1699 1702 d = self._parse_index(index_data, self._inline)
1700 1703 index, chunkcache = d
1701 1704 use_nodemap = (
1702 1705 not self._inline
1703 1706 and self._nodemap_file is not None
1704 1707 and hasattr(index, 'update_nodemap_data')
1705 1708 )
1706 1709 if use_nodemap:
1707 1710 nodemap_data = nodemaputil.persisted_data(self)
1708 1711 if nodemap_data is not None:
1709 1712 docket = nodemap_data[0]
1710 1713 if (
1711 1714 len(d[0]) > docket.tip_rev
1712 1715 and d[0][docket.tip_rev][7] == docket.tip_node
1713 1716 ):
1714 1717 # no changelog tampering
1715 1718 self._nodemap_docket = docket
1716 1719 index.update_nodemap_data(*nodemap_data)
1717 1720 except (ValueError, IndexError):
1718 1721 raise error.RevlogError(
1719 1722 _(b"index %s is corrupted") % self.display_id
1720 1723 )
1721 1724 self.index = index
1722 1725 # revnum -> (chain-length, sum-delta-length)
1723 1726 self._chaininfocache = util.lrucachedict(500)
1724 1727
1725 1728 return chunkcache
1726 1729
1727 1730 def _load_inner(self, chunk_cache):
1728 1731 if self._docket is None:
1729 1732 default_compression_header = None
1730 1733 else:
1731 1734 default_compression_header = self._docket.default_compression_header
1732 1735
1733 1736 self._inner = _InnerRevlog(
1734 1737 opener=self.opener,
1735 1738 index=self.index,
1736 1739 index_file=self._indexfile,
1737 1740 data_file=self._datafile,
1738 1741 sidedata_file=self._sidedatafile,
1739 1742 inline=self._inline,
1740 1743 data_config=self.data_config,
1741 1744 delta_config=self.delta_config,
1742 1745 feature_config=self.feature_config,
1743 1746 chunk_cache=chunk_cache,
1744 1747 default_compression_header=default_compression_header,
1745 1748 )
1746 1749
1747 1750 def get_revlog(self):
1748 1751 """simple function to mirror API of other not-really-revlog API"""
1749 1752 return self
1750 1753
1751 1754 @util.propertycache
1752 1755 def revlog_kind(self):
1753 1756 return self.target[0]
1754 1757
1755 1758 @util.propertycache
1756 1759 def display_id(self):
1757 1760 """The public facing "ID" of the revlog that we use in message"""
1758 1761 if self.revlog_kind == KIND_FILELOG:
1759 1762 # Reference the file without the "data/" prefix, so it is familiar
1760 1763 # to the user.
1761 1764 return self.target[1]
1762 1765 else:
1763 1766 return self.radix
1764 1767
1765 1768 def _datafp(self, mode=b'r'):
1766 1769 """file object for the revlog's data file"""
1767 1770 return self.opener(self._datafile, mode=mode)
1768 1771
1769 1772 def tiprev(self):
1770 1773 return len(self.index) - 1
1771 1774
1772 1775 def tip(self):
1773 1776 return self.node(self.tiprev())
1774 1777
1775 1778 def __contains__(self, rev):
1776 1779 return 0 <= rev < len(self)
1777 1780
1778 1781 def __len__(self):
1779 1782 return len(self.index)
1780 1783
1781 1784 def __iter__(self):
1782 1785 return iter(range(len(self)))
1783 1786
1784 1787 def revs(self, start=0, stop=None):
1785 1788 """iterate over all rev in this revlog (from start to stop)"""
1786 1789 return storageutil.iterrevs(len(self), start=start, stop=stop)
1787 1790
1788 1791 def hasnode(self, node):
1789 1792 try:
1790 1793 self.rev(node)
1791 1794 return True
1792 1795 except KeyError:
1793 1796 return False
1794 1797
1795 1798 def _candelta(self, baserev, rev):
1796 1799 """whether two revisions (baserev, rev) can be delta-ed or not"""
1797 1800 # Disable delta if either rev requires a content-changing flag
1798 1801 # processor (ex. LFS). This is because such flag processor can alter
1799 1802 # the rawtext content that the delta will be based on, and two clients
1800 1803 # could have a same revlog node with different flags (i.e. different
1801 1804 # rawtext contents) and the delta could be incompatible.
1802 1805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1803 1806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1804 1807 ):
1805 1808 return False
1806 1809 return True
1807 1810
1808 1811 def update_caches(self, transaction):
1809 1812 """update on disk cache
1810 1813
1811 1814 If a transaction is passed, the update may be delayed to transaction
1812 1815 commit."""
1813 1816 if self._nodemap_file is not None:
1814 1817 if transaction is None:
1815 1818 nodemaputil.update_persistent_nodemap(self)
1816 1819 else:
1817 1820 nodemaputil.setup_persistent_nodemap(transaction, self)
1818 1821
1819 1822 def clearcaches(self):
1820 1823 """Clear in-memory caches"""
1821 1824 self._chainbasecache.clear()
1822 1825 self._inner.clear_cache()
1823 1826 self._pcache = {}
1824 1827 self._nodemap_docket = None
1825 1828 self.index.clearcaches()
1826 1829 # The python code is the one responsible for validating the docket, we
1827 1830 # end up having to refresh it here.
1828 1831 use_nodemap = (
1829 1832 not self._inline
1830 1833 and self._nodemap_file is not None
1831 1834 and hasattr(self.index, 'update_nodemap_data')
1832 1835 )
1833 1836 if use_nodemap:
1834 1837 nodemap_data = nodemaputil.persisted_data(self)
1835 1838 if nodemap_data is not None:
1836 1839 self._nodemap_docket = nodemap_data[0]
1837 1840 self.index.update_nodemap_data(*nodemap_data)
1838 1841
1839 1842 def rev(self, node):
1840 1843 """return the revision number associated with a <nodeid>"""
1841 1844 try:
1842 1845 return self.index.rev(node)
1843 1846 except TypeError:
1844 1847 raise
1845 1848 except error.RevlogError:
1846 1849 # parsers.c radix tree lookup failed
1847 1850 if (
1848 1851 node == self.nodeconstants.wdirid
1849 1852 or node in self.nodeconstants.wdirfilenodeids
1850 1853 ):
1851 1854 raise error.WdirUnsupported
1852 1855 raise error.LookupError(node, self.display_id, _(b'no node'))
1853 1856
1854 1857 # Accessors for index entries.
1855 1858
1856 1859 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1857 1860 # are flags.
1858 1861 def start(self, rev):
1859 1862 return int(self.index[rev][0] >> 16)
1860 1863
1861 1864 def sidedata_cut_off(self, rev):
1862 1865 sd_cut_off = self.index[rev][8]
1863 1866 if sd_cut_off != 0:
1864 1867 return sd_cut_off
1865 1868 # This is some annoying dance, because entries without sidedata
1866 1869 # currently use 0 as their ofsset. (instead of previous-offset +
1867 1870 # previous-size)
1868 1871 #
1869 1872 # We should reconsider this sidedata → 0 sidata_offset policy.
1870 1873 # In the meantime, we need this.
1871 1874 while 0 <= rev:
1872 1875 e = self.index[rev]
1873 1876 if e[9] != 0:
1874 1877 return e[8] + e[9]
1875 1878 rev -= 1
1876 1879 return 0
1877 1880
1878 1881 def flags(self, rev):
1879 1882 return self.index[rev][0] & 0xFFFF
1880 1883
1881 1884 def length(self, rev):
1882 1885 return self.index[rev][1]
1883 1886
1884 1887 def sidedata_length(self, rev):
1885 1888 if not self.feature_config.has_side_data:
1886 1889 return 0
1887 1890 return self.index[rev][9]
1888 1891
1889 1892 def rawsize(self, rev):
1890 1893 """return the length of the uncompressed text for a given revision"""
1891 1894 l = self.index[rev][2]
1892 1895 if l >= 0:
1893 1896 return l
1894 1897
1895 1898 t = self.rawdata(rev)
1896 1899 return len(t)
1897 1900
1898 1901 def size(self, rev):
1899 1902 """length of non-raw text (processed by a "read" flag processor)"""
1900 1903 # fast path: if no "read" flag processor could change the content,
1901 1904 # size is rawsize. note: ELLIPSIS is known to not change the content.
1902 1905 flags = self.flags(rev)
1903 1906 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1904 1907 return self.rawsize(rev)
1905 1908
1906 1909 return len(self.revision(rev))
1907 1910
1908 1911 def fast_rank(self, rev):
1909 1912 """Return the rank of a revision if already known, or None otherwise.
1910 1913
1911 1914 The rank of a revision is the size of the sub-graph it defines as a
1912 1915 head. Equivalently, the rank of a revision `r` is the size of the set
1913 1916 `ancestors(r)`, `r` included.
1914 1917
1915 1918 This method returns the rank retrieved from the revlog in constant
1916 1919 time. It makes no attempt at computing unknown values for versions of
1917 1920 the revlog which do not persist the rank.
1918 1921 """
1919 1922 rank = self.index[rev][ENTRY_RANK]
1920 1923 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1921 1924 return None
1922 1925 if rev == nullrev:
1923 1926 return 0 # convention
1924 1927 return rank
1925 1928
1926 1929 def chainbase(self, rev):
1927 1930 base = self._chainbasecache.get(rev)
1928 1931 if base is not None:
1929 1932 return base
1930 1933
1931 1934 index = self.index
1932 1935 iterrev = rev
1933 1936 base = index[iterrev][3]
1934 1937 while base != iterrev:
1935 1938 iterrev = base
1936 1939 base = index[iterrev][3]
1937 1940
1938 1941 self._chainbasecache[rev] = base
1939 1942 return base
1940 1943
1941 1944 def linkrev(self, rev):
1942 1945 return self.index[rev][4]
1943 1946
1944 1947 def parentrevs(self, rev):
1945 1948 try:
1946 1949 entry = self.index[rev]
1947 1950 except IndexError:
1948 1951 if rev == wdirrev:
1949 1952 raise error.WdirUnsupported
1950 1953 raise
1951 1954
1952 1955 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1953 1956 return entry[6], entry[5]
1954 1957 else:
1955 1958 return entry[5], entry[6]
1956 1959
1957 1960 # fast parentrevs(rev) where rev isn't filtered
1958 1961 _uncheckedparentrevs = parentrevs
1959 1962
1960 1963 def node(self, rev):
1961 1964 try:
1962 1965 return self.index[rev][7]
1963 1966 except IndexError:
1964 1967 if rev == wdirrev:
1965 1968 raise error.WdirUnsupported
1966 1969 raise
1967 1970
1968 1971 # Derived from index values.
1969 1972
1970 1973 def end(self, rev):
1971 1974 return self.start(rev) + self.length(rev)
1972 1975
1973 1976 def parents(self, node):
1974 1977 i = self.index
1975 1978 d = i[self.rev(node)]
1976 1979 # inline node() to avoid function call overhead
1977 1980 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1978 1981 return i[d[6]][7], i[d[5]][7]
1979 1982 else:
1980 1983 return i[d[5]][7], i[d[6]][7]
1981 1984
1982 1985 def chainlen(self, rev):
1983 1986 return self._chaininfo(rev)[0]
1984 1987
1985 1988 def _chaininfo(self, rev):
1986 1989 chaininfocache = self._chaininfocache
1987 1990 if rev in chaininfocache:
1988 1991 return chaininfocache[rev]
1989 1992 index = self.index
1990 1993 generaldelta = self.delta_config.general_delta
1991 1994 iterrev = rev
1992 1995 e = index[iterrev]
1993 1996 clen = 0
1994 1997 compresseddeltalen = 0
1995 1998 while iterrev != e[3]:
1996 1999 clen += 1
1997 2000 compresseddeltalen += e[1]
1998 2001 if generaldelta:
1999 2002 iterrev = e[3]
2000 2003 else:
2001 2004 iterrev -= 1
2002 2005 if iterrev in chaininfocache:
2003 2006 t = chaininfocache[iterrev]
2004 2007 clen += t[0]
2005 2008 compresseddeltalen += t[1]
2006 2009 break
2007 2010 e = index[iterrev]
2008 2011 else:
2009 2012 # Add text length of base since decompressing that also takes
2010 2013 # work. For cache hits the length is already included.
2011 2014 compresseddeltalen += e[1]
2012 2015 r = (clen, compresseddeltalen)
2013 2016 chaininfocache[rev] = r
2014 2017 return r
2015 2018
2016 2019 def _deltachain(self, rev, stoprev=None):
2017 2020 return self._inner._deltachain(rev, stoprev=stoprev)
2018 2021
2019 2022 def ancestors(self, revs, stoprev=0, inclusive=False):
2020 2023 """Generate the ancestors of 'revs' in reverse revision order.
2021 2024 Does not generate revs lower than stoprev.
2022 2025
2023 2026 See the documentation for ancestor.lazyancestors for more details."""
2024 2027
2025 2028 # first, make sure start revisions aren't filtered
2026 2029 revs = list(revs)
2027 2030 checkrev = self.node
2028 2031 for r in revs:
2029 2032 checkrev(r)
2030 2033 # and we're sure ancestors aren't filtered as well
2031 2034
2032 2035 if rustancestor is not None and self.index.rust_ext_compat:
2033 2036 lazyancestors = rustancestor.LazyAncestors
2034 2037 arg = self.index
2035 2038 else:
2036 2039 lazyancestors = ancestor.lazyancestors
2037 2040 arg = self._uncheckedparentrevs
2038 2041 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2039 2042
2040 2043 def descendants(self, revs):
2041 2044 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2042 2045
2043 2046 def findcommonmissing(self, common=None, heads=None):
2044 2047 """Return a tuple of the ancestors of common and the ancestors of heads
2045 2048 that are not ancestors of common. In revset terminology, we return the
2046 2049 tuple:
2047 2050
2048 2051 ::common, (::heads) - (::common)
2049 2052
2050 2053 The list is sorted by revision number, meaning it is
2051 2054 topologically sorted.
2052 2055
2053 2056 'heads' and 'common' are both lists of node IDs. If heads is
2054 2057 not supplied, uses all of the revlog's heads. If common is not
2055 2058 supplied, uses nullid."""
2056 2059 if common is None:
2057 2060 common = [self.nullid]
2058 2061 if heads is None:
2059 2062 heads = self.heads()
2060 2063
2061 2064 common = [self.rev(n) for n in common]
2062 2065 heads = [self.rev(n) for n in heads]
2063 2066
2064 2067 # we want the ancestors, but inclusive
2065 2068 class lazyset:
2066 2069 def __init__(self, lazyvalues):
2067 2070 self.addedvalues = set()
2068 2071 self.lazyvalues = lazyvalues
2069 2072
2070 2073 def __contains__(self, value):
2071 2074 return value in self.addedvalues or value in self.lazyvalues
2072 2075
2073 2076 def __iter__(self):
2074 2077 added = self.addedvalues
2075 2078 for r in added:
2076 2079 yield r
2077 2080 for r in self.lazyvalues:
2078 2081 if not r in added:
2079 2082 yield r
2080 2083
2081 2084 def add(self, value):
2082 2085 self.addedvalues.add(value)
2083 2086
2084 2087 def update(self, values):
2085 2088 self.addedvalues.update(values)
2086 2089
2087 2090 has = lazyset(self.ancestors(common))
2088 2091 has.add(nullrev)
2089 2092 has.update(common)
2090 2093
2091 2094 # take all ancestors from heads that aren't in has
2092 2095 missing = set()
2093 2096 visit = collections.deque(r for r in heads if r not in has)
2094 2097 while visit:
2095 2098 r = visit.popleft()
2096 2099 if r in missing:
2097 2100 continue
2098 2101 else:
2099 2102 missing.add(r)
2100 2103 for p in self.parentrevs(r):
2101 2104 if p not in has:
2102 2105 visit.append(p)
2103 2106 missing = list(missing)
2104 2107 missing.sort()
2105 2108 return has, [self.node(miss) for miss in missing]
2106 2109
2107 2110 def incrementalmissingrevs(self, common=None):
2108 2111 """Return an object that can be used to incrementally compute the
2109 2112 revision numbers of the ancestors of arbitrary sets that are not
2110 2113 ancestors of common. This is an ancestor.incrementalmissingancestors
2111 2114 object.
2112 2115
2113 2116 'common' is a list of revision numbers. If common is not supplied, uses
2114 2117 nullrev.
2115 2118 """
2116 2119 if common is None:
2117 2120 common = [nullrev]
2118 2121
2119 2122 if rustancestor is not None and self.index.rust_ext_compat:
2120 2123 return rustancestor.MissingAncestors(self.index, common)
2121 2124 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2122 2125
2123 2126 def findmissingrevs(self, common=None, heads=None):
2124 2127 """Return the revision numbers of the ancestors of heads that
2125 2128 are not ancestors of common.
2126 2129
2127 2130 More specifically, return a list of revision numbers corresponding to
2128 2131 nodes N such that every N satisfies the following constraints:
2129 2132
2130 2133 1. N is an ancestor of some node in 'heads'
2131 2134 2. N is not an ancestor of any node in 'common'
2132 2135
2133 2136 The list is sorted by revision number, meaning it is
2134 2137 topologically sorted.
2135 2138
2136 2139 'heads' and 'common' are both lists of revision numbers. If heads is
2137 2140 not supplied, uses all of the revlog's heads. If common is not
2138 2141 supplied, uses nullid."""
2139 2142 if common is None:
2140 2143 common = [nullrev]
2141 2144 if heads is None:
2142 2145 heads = self.headrevs()
2143 2146
2144 2147 inc = self.incrementalmissingrevs(common=common)
2145 2148 return inc.missingancestors(heads)
2146 2149
2147 2150 def findmissing(self, common=None, heads=None):
2148 2151 """Return the ancestors of heads that are not ancestors of common.
2149 2152
2150 2153 More specifically, return a list of nodes N such that every N
2151 2154 satisfies the following constraints:
2152 2155
2153 2156 1. N is an ancestor of some node in 'heads'
2154 2157 2. N is not an ancestor of any node in 'common'
2155 2158
2156 2159 The list is sorted by revision number, meaning it is
2157 2160 topologically sorted.
2158 2161
2159 2162 'heads' and 'common' are both lists of node IDs. If heads is
2160 2163 not supplied, uses all of the revlog's heads. If common is not
2161 2164 supplied, uses nullid."""
2162 2165 if common is None:
2163 2166 common = [self.nullid]
2164 2167 if heads is None:
2165 2168 heads = self.heads()
2166 2169
2167 2170 common = [self.rev(n) for n in common]
2168 2171 heads = [self.rev(n) for n in heads]
2169 2172
2170 2173 inc = self.incrementalmissingrevs(common=common)
2171 2174 return [self.node(r) for r in inc.missingancestors(heads)]
2172 2175
2173 2176 def nodesbetween(self, roots=None, heads=None):
2174 2177 """Return a topological path from 'roots' to 'heads'.
2175 2178
2176 2179 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2177 2180 topologically sorted list of all nodes N that satisfy both of
2178 2181 these constraints:
2179 2182
2180 2183 1. N is a descendant of some node in 'roots'
2181 2184 2. N is an ancestor of some node in 'heads'
2182 2185
2183 2186 Every node is considered to be both a descendant and an ancestor
2184 2187 of itself, so every reachable node in 'roots' and 'heads' will be
2185 2188 included in 'nodes'.
2186 2189
2187 2190 'outroots' is the list of reachable nodes in 'roots', i.e., the
2188 2191 subset of 'roots' that is returned in 'nodes'. Likewise,
2189 2192 'outheads' is the subset of 'heads' that is also in 'nodes'.
2190 2193
2191 2194 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2192 2195 unspecified, uses nullid as the only root. If 'heads' is
2193 2196 unspecified, uses list of all of the revlog's heads."""
2194 2197 nonodes = ([], [], [])
2195 2198 if roots is not None:
2196 2199 roots = list(roots)
2197 2200 if not roots:
2198 2201 return nonodes
2199 2202 lowestrev = min([self.rev(n) for n in roots])
2200 2203 else:
2201 2204 roots = [self.nullid] # Everybody's a descendant of nullid
2202 2205 lowestrev = nullrev
2203 2206 if (lowestrev == nullrev) and (heads is None):
2204 2207 # We want _all_ the nodes!
2205 2208 return (
2206 2209 [self.node(r) for r in self],
2207 2210 [self.nullid],
2208 2211 list(self.heads()),
2209 2212 )
2210 2213 if heads is None:
2211 2214 # All nodes are ancestors, so the latest ancestor is the last
2212 2215 # node.
2213 2216 highestrev = len(self) - 1
2214 2217 # Set ancestors to None to signal that every node is an ancestor.
2215 2218 ancestors = None
2216 2219 # Set heads to an empty dictionary for later discovery of heads
2217 2220 heads = {}
2218 2221 else:
2219 2222 heads = list(heads)
2220 2223 if not heads:
2221 2224 return nonodes
2222 2225 ancestors = set()
2223 2226 # Turn heads into a dictionary so we can remove 'fake' heads.
2224 2227 # Also, later we will be using it to filter out the heads we can't
2225 2228 # find from roots.
2226 2229 heads = dict.fromkeys(heads, False)
2227 2230 # Start at the top and keep marking parents until we're done.
2228 2231 nodestotag = set(heads)
2229 2232 # Remember where the top was so we can use it as a limit later.
2230 2233 highestrev = max([self.rev(n) for n in nodestotag])
2231 2234 while nodestotag:
2232 2235 # grab a node to tag
2233 2236 n = nodestotag.pop()
2234 2237 # Never tag nullid
2235 2238 if n == self.nullid:
2236 2239 continue
2237 2240 # A node's revision number represents its place in a
2238 2241 # topologically sorted list of nodes.
2239 2242 r = self.rev(n)
2240 2243 if r >= lowestrev:
2241 2244 if n not in ancestors:
2242 2245 # If we are possibly a descendant of one of the roots
2243 2246 # and we haven't already been marked as an ancestor
2244 2247 ancestors.add(n) # Mark as ancestor
2245 2248 # Add non-nullid parents to list of nodes to tag.
2246 2249 nodestotag.update(
2247 2250 [p for p in self.parents(n) if p != self.nullid]
2248 2251 )
2249 2252 elif n in heads: # We've seen it before, is it a fake head?
2250 2253 # So it is, real heads should not be the ancestors of
2251 2254 # any other heads.
2252 2255 heads.pop(n)
2253 2256 if not ancestors:
2254 2257 return nonodes
2255 2258 # Now that we have our set of ancestors, we want to remove any
2256 2259 # roots that are not ancestors.
2257 2260
2258 2261 # If one of the roots was nullid, everything is included anyway.
2259 2262 if lowestrev > nullrev:
2260 2263 # But, since we weren't, let's recompute the lowest rev to not
2261 2264 # include roots that aren't ancestors.
2262 2265
2263 2266 # Filter out roots that aren't ancestors of heads
2264 2267 roots = [root for root in roots if root in ancestors]
2265 2268 # Recompute the lowest revision
2266 2269 if roots:
2267 2270 lowestrev = min([self.rev(root) for root in roots])
2268 2271 else:
2269 2272 # No more roots? Return empty list
2270 2273 return nonodes
2271 2274 else:
2272 2275 # We are descending from nullid, and don't need to care about
2273 2276 # any other roots.
2274 2277 lowestrev = nullrev
2275 2278 roots = [self.nullid]
2276 2279 # Transform our roots list into a set.
2277 2280 descendants = set(roots)
2278 2281 # Also, keep the original roots so we can filter out roots that aren't
2279 2282 # 'real' roots (i.e. are descended from other roots).
2280 2283 roots = descendants.copy()
2281 2284 # Our topologically sorted list of output nodes.
2282 2285 orderedout = []
2283 2286 # Don't start at nullid since we don't want nullid in our output list,
2284 2287 # and if nullid shows up in descendants, empty parents will look like
2285 2288 # they're descendants.
2286 2289 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2287 2290 n = self.node(r)
2288 2291 isdescendant = False
2289 2292 if lowestrev == nullrev: # Everybody is a descendant of nullid
2290 2293 isdescendant = True
2291 2294 elif n in descendants:
2292 2295 # n is already a descendant
2293 2296 isdescendant = True
2294 2297 # This check only needs to be done here because all the roots
2295 2298 # will start being marked is descendants before the loop.
2296 2299 if n in roots:
2297 2300 # If n was a root, check if it's a 'real' root.
2298 2301 p = tuple(self.parents(n))
2299 2302 # If any of its parents are descendants, it's not a root.
2300 2303 if (p[0] in descendants) or (p[1] in descendants):
2301 2304 roots.remove(n)
2302 2305 else:
2303 2306 p = tuple(self.parents(n))
2304 2307 # A node is a descendant if either of its parents are
2305 2308 # descendants. (We seeded the dependents list with the roots
2306 2309 # up there, remember?)
2307 2310 if (p[0] in descendants) or (p[1] in descendants):
2308 2311 descendants.add(n)
2309 2312 isdescendant = True
2310 2313 if isdescendant and ((ancestors is None) or (n in ancestors)):
2311 2314 # Only include nodes that are both descendants and ancestors.
2312 2315 orderedout.append(n)
2313 2316 if (ancestors is not None) and (n in heads):
2314 2317 # We're trying to figure out which heads are reachable
2315 2318 # from roots.
2316 2319 # Mark this head as having been reached
2317 2320 heads[n] = True
2318 2321 elif ancestors is None:
2319 2322 # Otherwise, we're trying to discover the heads.
2320 2323 # Assume this is a head because if it isn't, the next step
2321 2324 # will eventually remove it.
2322 2325 heads[n] = True
2323 2326 # But, obviously its parents aren't.
2324 2327 for p in self.parents(n):
2325 2328 heads.pop(p, None)
2326 2329 heads = [head for head, flag in heads.items() if flag]
2327 2330 roots = list(roots)
2328 2331 assert orderedout
2329 2332 assert roots
2330 2333 assert heads
2331 2334 return (orderedout, roots, heads)
2332 2335
2333 2336 def headrevs(self, revs=None):
2334 2337 if revs is None:
2335 2338 try:
2336 2339 return self.index.headrevs()
2337 2340 except AttributeError:
2338 2341 return self._headrevs()
2339 2342 if rustdagop is not None and self.index.rust_ext_compat:
2340 2343 return rustdagop.headrevs(self.index, revs)
2341 2344 return dagop.headrevs(revs, self._uncheckedparentrevs)
2342 2345
2343 2346 def computephases(self, roots):
2344 2347 return self.index.computephasesmapsets(roots)
2345 2348
2346 2349 def _headrevs(self):
2347 2350 count = len(self)
2348 2351 if not count:
2349 2352 return [nullrev]
2350 2353 # we won't iter over filtered rev so nobody is a head at start
2351 2354 ishead = [0] * (count + 1)
2352 2355 index = self.index
2353 2356 for r in self:
2354 2357 ishead[r] = 1 # I may be an head
2355 2358 e = index[r]
2356 2359 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2357 2360 return [r for r, val in enumerate(ishead) if val]
2358 2361
2359 2362 def heads(self, start=None, stop=None):
2360 2363 """return the list of all nodes that have no children
2361 2364
2362 2365 if start is specified, only heads that are descendants of
2363 2366 start will be returned
2364 2367 if stop is specified, it will consider all the revs from stop
2365 2368 as if they had no children
2366 2369 """
2367 2370 if start is None and stop is None:
2368 2371 if not len(self):
2369 2372 return [self.nullid]
2370 2373 return [self.node(r) for r in self.headrevs()]
2371 2374
2372 2375 if start is None:
2373 2376 start = nullrev
2374 2377 else:
2375 2378 start = self.rev(start)
2376 2379
2377 2380 stoprevs = {self.rev(n) for n in stop or []}
2378 2381
2379 2382 revs = dagop.headrevssubset(
2380 2383 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2381 2384 )
2382 2385
2383 2386 return [self.node(rev) for rev in revs]
2384 2387
2385 2388 def children(self, node):
2386 2389 """find the children of a given node"""
2387 2390 c = []
2388 2391 p = self.rev(node)
2389 2392 for r in self.revs(start=p + 1):
2390 2393 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2391 2394 if prevs:
2392 2395 for pr in prevs:
2393 2396 if pr == p:
2394 2397 c.append(self.node(r))
2395 2398 elif p == nullrev:
2396 2399 c.append(self.node(r))
2397 2400 return c
2398 2401
2399 2402 def commonancestorsheads(self, a, b):
2400 2403 """calculate all the heads of the common ancestors of nodes a and b"""
2401 2404 a, b = self.rev(a), self.rev(b)
2402 2405 ancs = self._commonancestorsheads(a, b)
2403 2406 return pycompat.maplist(self.node, ancs)
2404 2407
2405 2408 def _commonancestorsheads(self, *revs):
2406 2409 """calculate all the heads of the common ancestors of revs"""
2407 2410 try:
2408 2411 ancs = self.index.commonancestorsheads(*revs)
2409 2412 except (AttributeError, OverflowError): # C implementation failed
2410 2413 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2411 2414 return ancs
2412 2415
2413 2416 def isancestor(self, a, b):
2414 2417 """return True if node a is an ancestor of node b
2415 2418
2416 2419 A revision is considered an ancestor of itself."""
2417 2420 a, b = self.rev(a), self.rev(b)
2418 2421 return self.isancestorrev(a, b)
2419 2422
2420 2423 def isancestorrev(self, a, b):
2421 2424 """return True if revision a is an ancestor of revision b
2422 2425
2423 2426 A revision is considered an ancestor of itself.
2424 2427
2425 2428 The implementation of this is trivial but the use of
2426 2429 reachableroots is not."""
2427 2430 if a == nullrev:
2428 2431 return True
2429 2432 elif a == b:
2430 2433 return True
2431 2434 elif a > b:
2432 2435 return False
2433 2436 return bool(self.reachableroots(a, [b], [a], includepath=False))
2434 2437
2435 2438 def reachableroots(self, minroot, heads, roots, includepath=False):
2436 2439 """return (heads(::(<roots> and <roots>::<heads>)))
2437 2440
2438 2441 If includepath is True, return (<roots>::<heads>)."""
2439 2442 try:
2440 2443 return self.index.reachableroots2(
2441 2444 minroot, heads, roots, includepath
2442 2445 )
2443 2446 except AttributeError:
2444 2447 return dagop._reachablerootspure(
2445 2448 self.parentrevs, minroot, roots, heads, includepath
2446 2449 )
2447 2450
2448 2451 def ancestor(self, a, b):
2449 2452 """calculate the "best" common ancestor of nodes a and b"""
2450 2453
2451 2454 a, b = self.rev(a), self.rev(b)
2452 2455 try:
2453 2456 ancs = self.index.ancestors(a, b)
2454 2457 except (AttributeError, OverflowError):
2455 2458 ancs = ancestor.ancestors(self.parentrevs, a, b)
2456 2459 if ancs:
2457 2460 # choose a consistent winner when there's a tie
2458 2461 return min(map(self.node, ancs))
2459 2462 return self.nullid
2460 2463
2461 2464 def _match(self, id):
2462 2465 if isinstance(id, int):
2463 2466 # rev
2464 2467 return self.node(id)
2465 2468 if len(id) == self.nodeconstants.nodelen:
2466 2469 # possibly a binary node
2467 2470 # odds of a binary node being all hex in ASCII are 1 in 10**25
2468 2471 try:
2469 2472 node = id
2470 2473 self.rev(node) # quick search the index
2471 2474 return node
2472 2475 except error.LookupError:
2473 2476 pass # may be partial hex id
2474 2477 try:
2475 2478 # str(rev)
2476 2479 rev = int(id)
2477 2480 if b"%d" % rev != id:
2478 2481 raise ValueError
2479 2482 if rev < 0:
2480 2483 rev = len(self) + rev
2481 2484 if rev < 0 or rev >= len(self):
2482 2485 raise ValueError
2483 2486 return self.node(rev)
2484 2487 except (ValueError, OverflowError):
2485 2488 pass
2486 2489 if len(id) == 2 * self.nodeconstants.nodelen:
2487 2490 try:
2488 2491 # a full hex nodeid?
2489 2492 node = bin(id)
2490 2493 self.rev(node)
2491 2494 return node
2492 2495 except (binascii.Error, error.LookupError):
2493 2496 pass
2494 2497
2495 2498 def _partialmatch(self, id):
2496 2499 # we don't care wdirfilenodeids as they should be always full hash
2497 2500 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2498 2501 ambiguous = False
2499 2502 try:
2500 2503 partial = self.index.partialmatch(id)
2501 2504 if partial and self.hasnode(partial):
2502 2505 if maybewdir:
2503 2506 # single 'ff...' match in radix tree, ambiguous with wdir
2504 2507 ambiguous = True
2505 2508 else:
2506 2509 return partial
2507 2510 elif maybewdir:
2508 2511 # no 'ff...' match in radix tree, wdir identified
2509 2512 raise error.WdirUnsupported
2510 2513 else:
2511 2514 return None
2512 2515 except error.RevlogError:
2513 2516 # parsers.c radix tree lookup gave multiple matches
2514 2517 # fast path: for unfiltered changelog, radix tree is accurate
2515 2518 if not getattr(self, 'filteredrevs', None):
2516 2519 ambiguous = True
2517 2520 # fall through to slow path that filters hidden revisions
2518 2521 except (AttributeError, ValueError):
2519 2522 # we are pure python, or key is not hex
2520 2523 pass
2521 2524 if ambiguous:
2522 2525 raise error.AmbiguousPrefixLookupError(
2523 2526 id, self.display_id, _(b'ambiguous identifier')
2524 2527 )
2525 2528
2526 2529 if id in self._pcache:
2527 2530 return self._pcache[id]
2528 2531
2529 2532 if len(id) <= 40:
2530 2533 # hex(node)[:...]
2531 2534 l = len(id) // 2 * 2 # grab an even number of digits
2532 2535 try:
2533 2536 # we're dropping the last digit, so let's check that it's hex,
2534 2537 # to avoid the expensive computation below if it's not
2535 2538 if len(id) % 2 > 0:
2536 2539 if not (id[-1] in hexdigits):
2537 2540 return None
2538 2541 prefix = bin(id[:l])
2539 2542 except binascii.Error:
2540 2543 pass
2541 2544 else:
2542 2545 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2543 2546 nl = [
2544 2547 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2545 2548 ]
2546 2549 if self.nodeconstants.nullhex.startswith(id):
2547 2550 nl.append(self.nullid)
2548 2551 if len(nl) > 0:
2549 2552 if len(nl) == 1 and not maybewdir:
2550 2553 self._pcache[id] = nl[0]
2551 2554 return nl[0]
2552 2555 raise error.AmbiguousPrefixLookupError(
2553 2556 id, self.display_id, _(b'ambiguous identifier')
2554 2557 )
2555 2558 if maybewdir:
2556 2559 raise error.WdirUnsupported
2557 2560 return None
2558 2561
2559 2562 def lookup(self, id):
2560 2563 """locate a node based on:
2561 2564 - revision number or str(revision number)
2562 2565 - nodeid or subset of hex nodeid
2563 2566 """
2564 2567 n = self._match(id)
2565 2568 if n is not None:
2566 2569 return n
2567 2570 n = self._partialmatch(id)
2568 2571 if n:
2569 2572 return n
2570 2573
2571 2574 raise error.LookupError(id, self.display_id, _(b'no match found'))
2572 2575
2573 2576 def shortest(self, node, minlength=1):
2574 2577 """Find the shortest unambiguous prefix that matches node."""
2575 2578
2576 2579 def isvalid(prefix):
2577 2580 try:
2578 2581 matchednode = self._partialmatch(prefix)
2579 2582 except error.AmbiguousPrefixLookupError:
2580 2583 return False
2581 2584 except error.WdirUnsupported:
2582 2585 # single 'ff...' match
2583 2586 return True
2584 2587 if matchednode is None:
2585 2588 raise error.LookupError(node, self.display_id, _(b'no node'))
2586 2589 return True
2587 2590
2588 2591 def maybewdir(prefix):
2589 2592 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2590 2593
2591 2594 hexnode = hex(node)
2592 2595
2593 2596 def disambiguate(hexnode, minlength):
2594 2597 """Disambiguate against wdirid."""
2595 2598 for length in range(minlength, len(hexnode) + 1):
2596 2599 prefix = hexnode[:length]
2597 2600 if not maybewdir(prefix):
2598 2601 return prefix
2599 2602
2600 2603 if not getattr(self, 'filteredrevs', None):
2601 2604 try:
2602 2605 length = max(self.index.shortest(node), minlength)
2603 2606 return disambiguate(hexnode, length)
2604 2607 except error.RevlogError:
2605 2608 if node != self.nodeconstants.wdirid:
2606 2609 raise error.LookupError(
2607 2610 node, self.display_id, _(b'no node')
2608 2611 )
2609 2612 except AttributeError:
2610 2613 # Fall through to pure code
2611 2614 pass
2612 2615
2613 2616 if node == self.nodeconstants.wdirid:
2614 2617 for length in range(minlength, len(hexnode) + 1):
2615 2618 prefix = hexnode[:length]
2616 2619 if isvalid(prefix):
2617 2620 return prefix
2618 2621
2619 2622 for length in range(minlength, len(hexnode) + 1):
2620 2623 prefix = hexnode[:length]
2621 2624 if isvalid(prefix):
2622 2625 return disambiguate(hexnode, length)
2623 2626
2624 2627 def cmp(self, node, text):
2625 2628 """compare text with a given file revision
2626 2629
2627 2630 returns True if text is different than what is stored.
2628 2631 """
2629 2632 p1, p2 = self.parents(node)
2630 2633 return storageutil.hashrevisionsha1(text, p1, p2) != node
2631 2634
2632 2635 def deltaparent(self, rev):
2633 2636 """return deltaparent of the given revision"""
2634 2637 base = self.index[rev][3]
2635 2638 if base == rev:
2636 2639 return nullrev
2637 2640 elif self.delta_config.general_delta:
2638 2641 return base
2639 2642 else:
2640 2643 return rev - 1
2641 2644
2642 2645 def issnapshot(self, rev):
2643 2646 """tells whether rev is a snapshot"""
2644 2647 ret = self._inner.issnapshot(rev)
2645 2648 self.issnapshot = self._inner.issnapshot
2646 2649 return ret
2647 2650
2648 2651 def snapshotdepth(self, rev):
2649 2652 """number of snapshot in the chain before this one"""
2650 2653 if not self.issnapshot(rev):
2651 2654 raise error.ProgrammingError(b'revision %d not a snapshot')
2652 2655 return len(self._inner._deltachain(rev)[0]) - 1
2653 2656
2654 2657 def revdiff(self, rev1, rev2):
2655 2658 """return or calculate a delta between two revisions
2656 2659
2657 2660 The delta calculated is in binary form and is intended to be written to
2658 2661 revlog data directly. So this function needs raw revision data.
2659 2662 """
2660 2663 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2661 2664 return bytes(self._inner._chunk(rev2))
2662 2665
2663 2666 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2664 2667
2665 2668 def revision(self, nodeorrev):
2666 2669 """return an uncompressed revision of a given node or revision
2667 2670 number.
2668 2671 """
2669 2672 return self._revisiondata(nodeorrev)
2670 2673
2671 2674 def sidedata(self, nodeorrev):
2672 2675 """a map of extra data related to the changeset but not part of the hash
2673 2676
2674 2677 This function currently return a dictionary. However, more advanced
2675 2678 mapping object will likely be used in the future for a more
2676 2679 efficient/lazy code.
2677 2680 """
2678 2681 # deal with <nodeorrev> argument type
2679 2682 if isinstance(nodeorrev, int):
2680 2683 rev = nodeorrev
2681 2684 else:
2682 2685 rev = self.rev(nodeorrev)
2683 2686 return self._sidedata(rev)
2684 2687
2685 2688 def _rawtext(self, node, rev):
2686 2689 """return the possibly unvalidated rawtext for a revision
2687 2690
2688 2691 returns (rev, rawtext, validated)
2689 2692 """
2690 2693 # Check if we have the entry in cache
2691 2694 # The cache entry looks like (node, rev, rawtext)
2692 2695 if self._inner._revisioncache:
2693 2696 if self._inner._revisioncache[0] == node:
2694 2697 return (rev, self._inner._revisioncache[2], True)
2695 2698
2696 2699 if rev is None:
2697 2700 rev = self.rev(node)
2698 2701
2699 2702 return self._inner.raw_text(node, rev)
2700 2703
2701 2704 def _revisiondata(self, nodeorrev, raw=False):
2702 2705 # deal with <nodeorrev> argument type
2703 2706 if isinstance(nodeorrev, int):
2704 2707 rev = nodeorrev
2705 2708 node = self.node(rev)
2706 2709 else:
2707 2710 node = nodeorrev
2708 2711 rev = None
2709 2712
2710 2713 # fast path the special `nullid` rev
2711 2714 if node == self.nullid:
2712 2715 return b""
2713 2716
2714 2717 # ``rawtext`` is the text as stored inside the revlog. Might be the
2715 2718 # revision or might need to be processed to retrieve the revision.
2716 2719 rev, rawtext, validated = self._rawtext(node, rev)
2717 2720
2718 2721 if raw and validated:
2719 2722 # if we don't want to process the raw text and that raw
2720 2723 # text is cached, we can exit early.
2721 2724 return rawtext
2722 2725 if rev is None:
2723 2726 rev = self.rev(node)
2724 2727 # the revlog's flag for this revision
2725 2728 # (usually alter its state or content)
2726 2729 flags = self.flags(rev)
2727 2730
2728 2731 if validated and flags == REVIDX_DEFAULT_FLAGS:
2729 2732 # no extra flags set, no flag processor runs, text = rawtext
2730 2733 return rawtext
2731 2734
2732 2735 if raw:
2733 2736 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2734 2737 text = rawtext
2735 2738 else:
2736 2739 r = flagutil.processflagsread(self, rawtext, flags)
2737 2740 text, validatehash = r
2738 2741 if validatehash:
2739 2742 self.checkhash(text, node, rev=rev)
2740 2743 if not validated:
2741 2744 self._inner._revisioncache = (node, rev, rawtext)
2742 2745
2743 2746 return text
2744 2747
2745 2748 def _sidedata(self, rev):
2746 2749 """Return the sidedata for a given revision number."""
2747 2750 sidedata_end = None
2748 2751 if self._docket is not None:
2749 2752 sidedata_end = self._docket.sidedata_end
2750 2753 return self._inner.sidedata(rev, sidedata_end)
2751 2754
2752 2755 def rawdata(self, nodeorrev):
2753 2756 """return an uncompressed raw data of a given node or revision number."""
2754 2757 return self._revisiondata(nodeorrev, raw=True)
2755 2758
2756 2759 def hash(self, text, p1, p2):
2757 2760 """Compute a node hash.
2758 2761
2759 2762 Available as a function so that subclasses can replace the hash
2760 2763 as needed.
2761 2764 """
2762 2765 return storageutil.hashrevisionsha1(text, p1, p2)
2763 2766
2764 2767 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2765 2768 """Check node hash integrity.
2766 2769
2767 2770 Available as a function so that subclasses can extend hash mismatch
2768 2771 behaviors as needed.
2769 2772 """
2770 2773 try:
2771 2774 if p1 is None and p2 is None:
2772 2775 p1, p2 = self.parents(node)
2773 2776 if node != self.hash(text, p1, p2):
2774 2777 # Clear the revision cache on hash failure. The revision cache
2775 2778 # only stores the raw revision and clearing the cache does have
2776 2779 # the side-effect that we won't have a cache hit when the raw
2777 2780 # revision data is accessed. But this case should be rare and
2778 2781 # it is extra work to teach the cache about the hash
2779 2782 # verification state.
2780 2783 if (
2781 2784 self._inner._revisioncache
2782 2785 and self._inner._revisioncache[0] == node
2783 2786 ):
2784 2787 self._inner._revisioncache = None
2785 2788
2786 2789 revornode = rev
2787 2790 if revornode is None:
2788 2791 revornode = templatefilters.short(hex(node))
2789 2792 raise error.RevlogError(
2790 2793 _(b"integrity check failed on %s:%s")
2791 2794 % (self.display_id, pycompat.bytestr(revornode))
2792 2795 )
2793 2796 except error.RevlogError:
2794 2797 if self.feature_config.censorable and storageutil.iscensoredtext(
2795 2798 text
2796 2799 ):
2797 2800 raise error.CensoredNodeError(self.display_id, node, text)
2798 2801 raise
2799 2802
2800 2803 @property
2801 2804 def _split_index_file(self):
2802 2805 """the path where to expect the index of an ongoing splitting operation
2803 2806
2804 2807 The file will only exist if a splitting operation is in progress, but
2805 2808 it is always expected at the same location."""
2806 2809 parts = self.radix.split(b'/')
2807 2810 if len(parts) > 1:
2808 2811 # adds a '-s' prefix to the ``data/` or `meta/` base
2809 2812 head = parts[0] + b'-s'
2810 2813 mids = parts[1:-1]
2811 2814 tail = parts[-1] + b'.i'
2812 2815 pieces = [head] + mids + [tail]
2813 2816 return b'/'.join(pieces)
2814 2817 else:
2815 2818 # the revlog is stored at the root of the store (changelog or
2816 2819 # manifest), no risk of collision.
2817 2820 return self.radix + b'.i.s'
2818 2821
2819 2822 def _enforceinlinesize(self, tr, side_write=True):
2820 2823 """Check if the revlog is too big for inline and convert if so.
2821 2824
2822 2825 This should be called after revisions are added to the revlog. If the
2823 2826 revlog has grown too large to be an inline revlog, it will convert it
2824 2827 to use multiple index and data files.
2825 2828 """
2826 2829 tiprev = len(self) - 1
2827 2830 total_size = self.start(tiprev) + self.length(tiprev)
2828 2831 if not self._inline or (self._may_inline and total_size < _maxinline):
2829 2832 return
2830 2833
2831 2834 if self._docket is not None:
2832 2835 msg = b"inline revlog should not have a docket"
2833 2836 raise error.ProgrammingError(msg)
2834 2837
2835 2838 # In the common case, we enforce inline size because the revlog has
2836 2839 # been appened too. And in such case, it must have an initial offset
2837 2840 # recorded in the transaction.
2838 2841 troffset = tr.findoffset(self._inner.canonical_index_file)
2839 2842 pre_touched = troffset is not None
2840 2843 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2841 2844 raise error.RevlogError(
2842 2845 _(b"%s not found in the transaction") % self._indexfile
2843 2846 )
2844 2847
2845 2848 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2846 2849 tr.add(self._datafile, 0)
2847 2850
2848 2851 new_index_file_path = None
2849 2852 if side_write:
2850 2853 old_index_file_path = self._indexfile
2851 2854 new_index_file_path = self._split_index_file
2852 2855 opener = self.opener
2853 2856 weak_self = weakref.ref(self)
2854 2857
2855 2858 # the "split" index replace the real index when the transaction is
2856 2859 # finalized
2857 2860 def finalize_callback(tr):
2858 2861 opener.rename(
2859 2862 new_index_file_path,
2860 2863 old_index_file_path,
2861 2864 checkambig=True,
2862 2865 )
2863 2866 maybe_self = weak_self()
2864 2867 if maybe_self is not None:
2865 2868 maybe_self._indexfile = old_index_file_path
2866 2869 maybe_self._inner.index_file = maybe_self._indexfile
2867 2870
2868 2871 def abort_callback(tr):
2869 2872 maybe_self = weak_self()
2870 2873 if maybe_self is not None:
2871 2874 maybe_self._indexfile = old_index_file_path
2872 2875 maybe_self._inner.inline = True
2873 2876 maybe_self._inner.index_file = old_index_file_path
2874 2877
2875 2878 tr.registertmp(new_index_file_path)
2876 2879 if self.target[1] is not None:
2877 2880 callback_id = b'000-revlog-split-%d-%s' % self.target
2878 2881 else:
2879 2882 callback_id = b'000-revlog-split-%d' % self.target[0]
2880 2883 tr.addfinalize(callback_id, finalize_callback)
2881 2884 tr.addabort(callback_id, abort_callback)
2882 2885
2883 2886 self._format_flags &= ~FLAG_INLINE_DATA
2884 2887 self._inner.split_inline(
2885 2888 tr,
2886 2889 self._format_flags | self._format_version,
2887 2890 new_index_file_path=new_index_file_path,
2888 2891 )
2889 2892
2890 2893 self._inline = False
2891 2894 if new_index_file_path is not None:
2892 2895 self._indexfile = new_index_file_path
2893 2896
2894 2897 nodemaputil.setup_persistent_nodemap(tr, self)
2895 2898
2896 2899 def _nodeduplicatecallback(self, transaction, node):
2897 2900 """called when trying to add a node already stored."""
2898 2901
2899 2902 @contextlib.contextmanager
2900 2903 def reading(self):
2901 2904 with self._inner.reading():
2902 2905 yield
2903 2906
2904 2907 @contextlib.contextmanager
2905 2908 def _writing(self, transaction):
2906 2909 if self._trypending:
2907 2910 msg = b'try to write in a `trypending` revlog: %s'
2908 2911 msg %= self.display_id
2909 2912 raise error.ProgrammingError(msg)
2910 2913 if self._inner.is_writing:
2911 2914 yield
2912 2915 else:
2913 2916 data_end = None
2914 2917 sidedata_end = None
2915 2918 if self._docket is not None:
2916 2919 data_end = self._docket.data_end
2917 2920 sidedata_end = self._docket.sidedata_end
2918 2921 with self._inner.writing(
2919 2922 transaction,
2920 2923 data_end=data_end,
2921 2924 sidedata_end=sidedata_end,
2922 2925 ):
2923 2926 yield
2924 2927 if self._docket is not None:
2925 2928 self._write_docket(transaction)
2926 2929
2927 2930 @property
2928 2931 def is_delaying(self):
2929 2932 return self._inner.is_delaying
2930 2933
2931 2934 def _write_docket(self, transaction):
2932 2935 """write the current docket on disk
2933 2936
2934 2937 Exist as a method to help changelog to implement transaction logic
2935 2938
2936 2939 We could also imagine using the same transaction logic for all revlog
2937 2940 since docket are cheap."""
2938 2941 self._docket.write(transaction)
2939 2942
2940 2943 def addrevision(
2941 2944 self,
2942 2945 text,
2943 2946 transaction,
2944 2947 link,
2945 2948 p1,
2946 2949 p2,
2947 2950 cachedelta=None,
2948 2951 node=None,
2949 2952 flags=REVIDX_DEFAULT_FLAGS,
2950 2953 deltacomputer=None,
2951 2954 sidedata=None,
2952 2955 ):
2953 2956 """add a revision to the log
2954 2957
2955 2958 text - the revision data to add
2956 2959 transaction - the transaction object used for rollback
2957 2960 link - the linkrev data to add
2958 2961 p1, p2 - the parent nodeids of the revision
2959 2962 cachedelta - an optional precomputed delta
2960 2963 node - nodeid of revision; typically node is not specified, and it is
2961 2964 computed by default as hash(text, p1, p2), however subclasses might
2962 2965 use different hashing method (and override checkhash() in such case)
2963 2966 flags - the known flags to set on the revision
2964 2967 deltacomputer - an optional deltacomputer instance shared between
2965 2968 multiple calls
2966 2969 """
2967 2970 if link == nullrev:
2968 2971 raise error.RevlogError(
2969 2972 _(b"attempted to add linkrev -1 to %s") % self.display_id
2970 2973 )
2971 2974
2972 2975 if sidedata is None:
2973 2976 sidedata = {}
2974 2977 elif sidedata and not self.feature_config.has_side_data:
2975 2978 raise error.ProgrammingError(
2976 2979 _(b"trying to add sidedata to a revlog who don't support them")
2977 2980 )
2978 2981
2979 2982 if flags:
2980 2983 node = node or self.hash(text, p1, p2)
2981 2984
2982 2985 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2983 2986
2984 2987 # If the flag processor modifies the revision data, ignore any provided
2985 2988 # cachedelta.
2986 2989 if rawtext != text:
2987 2990 cachedelta = None
2988 2991
2989 2992 if len(rawtext) > _maxentrysize:
2990 2993 raise error.RevlogError(
2991 2994 _(
2992 2995 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2993 2996 )
2994 2997 % (self.display_id, len(rawtext))
2995 2998 )
2996 2999
2997 3000 node = node or self.hash(rawtext, p1, p2)
2998 3001 rev = self.index.get_rev(node)
2999 3002 if rev is not None:
3000 3003 return rev
3001 3004
3002 3005 if validatehash:
3003 3006 self.checkhash(rawtext, node, p1=p1, p2=p2)
3004 3007
3005 3008 return self.addrawrevision(
3006 3009 rawtext,
3007 3010 transaction,
3008 3011 link,
3009 3012 p1,
3010 3013 p2,
3011 3014 node,
3012 3015 flags,
3013 3016 cachedelta=cachedelta,
3014 3017 deltacomputer=deltacomputer,
3015 3018 sidedata=sidedata,
3016 3019 )
3017 3020
3018 3021 def addrawrevision(
3019 3022 self,
3020 3023 rawtext,
3021 3024 transaction,
3022 3025 link,
3023 3026 p1,
3024 3027 p2,
3025 3028 node,
3026 3029 flags,
3027 3030 cachedelta=None,
3028 3031 deltacomputer=None,
3029 3032 sidedata=None,
3030 3033 ):
3031 3034 """add a raw revision with known flags, node and parents
3032 3035 useful when reusing a revision not stored in this revlog (ex: received
3033 3036 over wire, or read from an external bundle).
3034 3037 """
3035 3038 with self._writing(transaction):
3036 3039 return self._addrevision(
3037 3040 node,
3038 3041 rawtext,
3039 3042 transaction,
3040 3043 link,
3041 3044 p1,
3042 3045 p2,
3043 3046 flags,
3044 3047 cachedelta,
3045 3048 deltacomputer=deltacomputer,
3046 3049 sidedata=sidedata,
3047 3050 )
3048 3051
3049 3052 def compress(self, data):
3050 3053 return self._inner.compress(data)
3051 3054
3052 3055 def decompress(self, data):
3053 3056 return self._inner.decompress(data)
3054 3057
3055 3058 def _addrevision(
3056 3059 self,
3057 3060 node,
3058 3061 rawtext,
3059 3062 transaction,
3060 3063 link,
3061 3064 p1,
3062 3065 p2,
3063 3066 flags,
3064 3067 cachedelta,
3065 3068 alwayscache=False,
3066 3069 deltacomputer=None,
3067 3070 sidedata=None,
3068 3071 ):
3069 3072 """internal function to add revisions to the log
3070 3073
3071 3074 see addrevision for argument descriptions.
3072 3075
3073 3076 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3074 3077
3075 3078 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3076 3079 be used.
3077 3080
3078 3081 invariants:
3079 3082 - rawtext is optional (can be None); if not set, cachedelta must be set.
3080 3083 if both are set, they must correspond to each other.
3081 3084 """
3082 3085 if node == self.nullid:
3083 3086 raise error.RevlogError(
3084 3087 _(b"%s: attempt to add null revision") % self.display_id
3085 3088 )
3086 3089 if (
3087 3090 node == self.nodeconstants.wdirid
3088 3091 or node in self.nodeconstants.wdirfilenodeids
3089 3092 ):
3090 3093 raise error.RevlogError(
3091 3094 _(b"%s: attempt to add wdir revision") % self.display_id
3092 3095 )
3093 3096 if self._inner._writinghandles is None:
3094 3097 msg = b'adding revision outside `revlog._writing` context'
3095 3098 raise error.ProgrammingError(msg)
3096 3099
3097 3100 btext = [rawtext]
3098 3101
3099 3102 curr = len(self)
3100 3103 prev = curr - 1
3101 3104
3102 3105 offset = self._get_data_offset(prev)
3103 3106
3104 3107 if self._concurrencychecker:
3105 3108 ifh, dfh, sdfh = self._inner._writinghandles
3106 3109 # XXX no checking for the sidedata file
3107 3110 if self._inline:
3108 3111 # offset is "as if" it were in the .d file, so we need to add on
3109 3112 # the size of the entry metadata.
3110 3113 self._concurrencychecker(
3111 3114 ifh, self._indexfile, offset + curr * self.index.entry_size
3112 3115 )
3113 3116 else:
3114 3117 # Entries in the .i are a consistent size.
3115 3118 self._concurrencychecker(
3116 3119 ifh, self._indexfile, curr * self.index.entry_size
3117 3120 )
3118 3121 self._concurrencychecker(dfh, self._datafile, offset)
3119 3122
3120 3123 p1r, p2r = self.rev(p1), self.rev(p2)
3121 3124
3122 3125 # full versions are inserted when the needed deltas
3123 3126 # become comparable to the uncompressed text
3124 3127 if rawtext is None:
3125 3128 # need rawtext size, before changed by flag processors, which is
3126 3129 # the non-raw size. use revlog explicitly to avoid filelog's extra
3127 3130 # logic that might remove metadata size.
3128 3131 textlen = mdiff.patchedsize(
3129 3132 revlog.size(self, cachedelta[0]), cachedelta[1]
3130 3133 )
3131 3134 else:
3132 3135 textlen = len(rawtext)
3133 3136
3134 3137 if deltacomputer is None:
3135 3138 write_debug = None
3136 3139 if self.delta_config.debug_delta:
3137 3140 write_debug = transaction._report
3138 3141 deltacomputer = deltautil.deltacomputer(
3139 3142 self, write_debug=write_debug
3140 3143 )
3141 3144
3142 3145 if cachedelta is not None and len(cachedelta) == 2:
3143 3146 # If the cached delta has no information about how it should be
3144 3147 # reused, add the default reuse instruction according to the
3145 3148 # revlog's configuration.
3146 3149 if (
3147 3150 self.delta_config.general_delta
3148 3151 and self.delta_config.lazy_delta_base
3149 3152 ):
3150 3153 delta_base_reuse = DELTA_BASE_REUSE_TRY
3151 3154 else:
3152 3155 delta_base_reuse = DELTA_BASE_REUSE_NO
3153 3156 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3154 3157
3155 3158 revinfo = revlogutils.revisioninfo(
3156 3159 node,
3157 3160 p1,
3158 3161 p2,
3159 3162 btext,
3160 3163 textlen,
3161 3164 cachedelta,
3162 3165 flags,
3163 3166 )
3164 3167
3165 3168 deltainfo = deltacomputer.finddeltainfo(revinfo)
3166 3169
3167 3170 compression_mode = COMP_MODE_INLINE
3168 3171 if self._docket is not None:
3169 3172 default_comp = self._docket.default_compression_header
3170 3173 r = deltautil.delta_compression(default_comp, deltainfo)
3171 3174 compression_mode, deltainfo = r
3172 3175
3173 3176 sidedata_compression_mode = COMP_MODE_INLINE
3174 3177 if sidedata and self.feature_config.has_side_data:
3175 3178 sidedata_compression_mode = COMP_MODE_PLAIN
3176 3179 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3177 3180 sidedata_offset = self._docket.sidedata_end
3178 3181 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3179 3182 if (
3180 3183 h != b'u'
3181 3184 and comp_sidedata[0:1] != b'\0'
3182 3185 and len(comp_sidedata) < len(serialized_sidedata)
3183 3186 ):
3184 3187 assert not h
3185 3188 if (
3186 3189 comp_sidedata[0:1]
3187 3190 == self._docket.default_compression_header
3188 3191 ):
3189 3192 sidedata_compression_mode = COMP_MODE_DEFAULT
3190 3193 serialized_sidedata = comp_sidedata
3191 3194 else:
3192 3195 sidedata_compression_mode = COMP_MODE_INLINE
3193 3196 serialized_sidedata = comp_sidedata
3194 3197 else:
3195 3198 serialized_sidedata = b""
3196 3199 # Don't store the offset if the sidedata is empty, that way
3197 3200 # we can easily detect empty sidedata and they will be no different
3198 3201 # than ones we manually add.
3199 3202 sidedata_offset = 0
3200 3203
3201 3204 rank = RANK_UNKNOWN
3202 3205 if self.feature_config.compute_rank:
3203 3206 if (p1r, p2r) == (nullrev, nullrev):
3204 3207 rank = 1
3205 3208 elif p1r != nullrev and p2r == nullrev:
3206 3209 rank = 1 + self.fast_rank(p1r)
3207 3210 elif p1r == nullrev and p2r != nullrev:
3208 3211 rank = 1 + self.fast_rank(p2r)
3209 3212 else: # merge node
3210 3213 if rustdagop is not None and self.index.rust_ext_compat:
3211 3214 rank = rustdagop.rank(self.index, p1r, p2r)
3212 3215 else:
3213 3216 pmin, pmax = sorted((p1r, p2r))
3214 3217 rank = 1 + self.fast_rank(pmax)
3215 3218 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3216 3219
3217 3220 e = revlogutils.entry(
3218 3221 flags=flags,
3219 3222 data_offset=offset,
3220 3223 data_compressed_length=deltainfo.deltalen,
3221 3224 data_uncompressed_length=textlen,
3222 3225 data_compression_mode=compression_mode,
3223 3226 data_delta_base=deltainfo.base,
3224 3227 link_rev=link,
3225 3228 parent_rev_1=p1r,
3226 3229 parent_rev_2=p2r,
3227 3230 node_id=node,
3228 3231 sidedata_offset=sidedata_offset,
3229 3232 sidedata_compressed_length=len(serialized_sidedata),
3230 3233 sidedata_compression_mode=sidedata_compression_mode,
3231 3234 rank=rank,
3232 3235 )
3233 3236
3234 3237 self.index.append(e)
3235 3238 entry = self.index.entry_binary(curr)
3236 3239 if curr == 0 and self._docket is None:
3237 3240 header = self._format_flags | self._format_version
3238 3241 header = self.index.pack_header(header)
3239 3242 entry = header + entry
3240 3243 self._writeentry(
3241 3244 transaction,
3242 3245 entry,
3243 3246 deltainfo.data,
3244 3247 link,
3245 3248 offset,
3246 3249 serialized_sidedata,
3247 3250 sidedata_offset,
3248 3251 )
3249 3252
3250 3253 rawtext = btext[0]
3251 3254
3252 3255 if alwayscache and rawtext is None:
3253 3256 rawtext = deltacomputer.buildtext(revinfo)
3254 3257
3255 3258 if type(rawtext) == bytes: # only accept immutable objects
3256 3259 self._inner._revisioncache = (node, curr, rawtext)
3257 3260 self._chainbasecache[curr] = deltainfo.chainbase
3258 3261 return curr
3259 3262
3260 3263 def _get_data_offset(self, prev):
3261 3264 """Returns the current offset in the (in-transaction) data file.
3262 3265 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3263 3266 file to store that information: since sidedata can be rewritten to the
3264 3267 end of the data file within a transaction, you can have cases where, for
3265 3268 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3266 3269 to `n - 1`'s sidedata being written after `n`'s data.
3267 3270
3268 3271 TODO cache this in a docket file before getting out of experimental."""
3269 3272 if self._docket is None:
3270 3273 return self.end(prev)
3271 3274 else:
3272 3275 return self._docket.data_end
3273 3276
3274 3277 def _writeentry(
3275 3278 self,
3276 3279 transaction,
3277 3280 entry,
3278 3281 data,
3279 3282 link,
3280 3283 offset,
3281 3284 sidedata,
3282 3285 sidedata_offset,
3283 3286 ):
3284 3287 # Files opened in a+ mode have inconsistent behavior on various
3285 3288 # platforms. Windows requires that a file positioning call be made
3286 3289 # when the file handle transitions between reads and writes. See
3287 3290 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3288 3291 # platforms, Python or the platform itself can be buggy. Some versions
3289 3292 # of Solaris have been observed to not append at the end of the file
3290 3293 # if the file was seeked to before the end. See issue4943 for more.
3291 3294 #
3292 3295 # We work around this issue by inserting a seek() before writing.
3293 3296 # Note: This is likely not necessary on Python 3. However, because
3294 3297 # the file handle is reused for reads and may be seeked there, we need
3295 3298 # to be careful before changing this.
3296 3299 index_end = data_end = sidedata_end = None
3297 3300 if self._docket is not None:
3298 3301 index_end = self._docket.index_end
3299 3302 data_end = self._docket.data_end
3300 3303 sidedata_end = self._docket.sidedata_end
3301 3304
3302 3305 files_end = self._inner.write_entry(
3303 3306 transaction,
3304 3307 entry,
3305 3308 data,
3306 3309 link,
3307 3310 offset,
3308 3311 sidedata,
3309 3312 sidedata_offset,
3310 3313 index_end,
3311 3314 data_end,
3312 3315 sidedata_end,
3313 3316 )
3314 3317 self._enforceinlinesize(transaction)
3315 3318 if self._docket is not None:
3316 3319 self._docket.index_end = files_end[0]
3317 3320 self._docket.data_end = files_end[1]
3318 3321 self._docket.sidedata_end = files_end[2]
3319 3322
3320 3323 nodemaputil.setup_persistent_nodemap(transaction, self)
3321 3324
3322 3325 def addgroup(
3323 3326 self,
3324 3327 deltas,
3325 3328 linkmapper,
3326 3329 transaction,
3327 3330 alwayscache=False,
3328 3331 addrevisioncb=None,
3329 3332 duplicaterevisioncb=None,
3330 3333 debug_info=None,
3331 3334 delta_base_reuse_policy=None,
3332 3335 ):
3333 3336 """
3334 3337 add a delta group
3335 3338
3336 3339 given a set of deltas, add them to the revision log. the
3337 3340 first delta is against its parent, which should be in our
3338 3341 log, the rest are against the previous delta.
3339 3342
3340 3343 If ``addrevisioncb`` is defined, it will be called with arguments of
3341 3344 this revlog and the node that was added.
3342 3345 """
3343 3346
3344 3347 if self._adding_group:
3345 3348 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3346 3349
3347 3350 # read the default delta-base reuse policy from revlog config if the
3348 3351 # group did not specify one.
3349 3352 if delta_base_reuse_policy is None:
3350 3353 if (
3351 3354 self.delta_config.general_delta
3352 3355 and self.delta_config.lazy_delta_base
3353 3356 ):
3354 3357 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3355 3358 else:
3356 3359 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3357 3360
3358 3361 self._adding_group = True
3359 3362 empty = True
3360 3363 try:
3361 3364 with self._writing(transaction):
3362 3365 write_debug = None
3363 3366 if self.delta_config.debug_delta:
3364 3367 write_debug = transaction._report
3365 3368 deltacomputer = deltautil.deltacomputer(
3366 3369 self,
3367 3370 write_debug=write_debug,
3368 3371 debug_info=debug_info,
3369 3372 )
3370 3373 # loop through our set of deltas
3371 3374 for data in deltas:
3372 3375 (
3373 3376 node,
3374 3377 p1,
3375 3378 p2,
3376 3379 linknode,
3377 3380 deltabase,
3378 3381 delta,
3379 3382 flags,
3380 3383 sidedata,
3381 3384 ) = data
3382 3385 link = linkmapper(linknode)
3383 3386 flags = flags or REVIDX_DEFAULT_FLAGS
3384 3387
3385 3388 rev = self.index.get_rev(node)
3386 3389 if rev is not None:
3387 3390 # this can happen if two branches make the same change
3388 3391 self._nodeduplicatecallback(transaction, rev)
3389 3392 if duplicaterevisioncb:
3390 3393 duplicaterevisioncb(self, rev)
3391 3394 empty = False
3392 3395 continue
3393 3396
3394 3397 for p in (p1, p2):
3395 3398 if not self.index.has_node(p):
3396 3399 raise error.LookupError(
3397 3400 p, self.radix, _(b'unknown parent')
3398 3401 )
3399 3402
3400 3403 if not self.index.has_node(deltabase):
3401 3404 raise error.LookupError(
3402 3405 deltabase, self.display_id, _(b'unknown delta base')
3403 3406 )
3404 3407
3405 3408 baserev = self.rev(deltabase)
3406 3409
3407 3410 if baserev != nullrev and self.iscensored(baserev):
3408 3411 # if base is censored, delta must be full replacement in a
3409 3412 # single patch operation
3410 3413 hlen = struct.calcsize(b">lll")
3411 3414 oldlen = self.rawsize(baserev)
3412 3415 newlen = len(delta) - hlen
3413 3416 if delta[:hlen] != mdiff.replacediffheader(
3414 3417 oldlen, newlen
3415 3418 ):
3416 3419 raise error.CensoredBaseError(
3417 3420 self.display_id, self.node(baserev)
3418 3421 )
3419 3422
3420 3423 if not flags and self._peek_iscensored(baserev, delta):
3421 3424 flags |= REVIDX_ISCENSORED
3422 3425
3423 3426 # We assume consumers of addrevisioncb will want to retrieve
3424 3427 # the added revision, which will require a call to
3425 3428 # revision(). revision() will fast path if there is a cache
3426 3429 # hit. So, we tell _addrevision() to always cache in this case.
3427 3430 # We're only using addgroup() in the context of changegroup
3428 3431 # generation so the revision data can always be handled as raw
3429 3432 # by the flagprocessor.
3430 3433 rev = self._addrevision(
3431 3434 node,
3432 3435 None,
3433 3436 transaction,
3434 3437 link,
3435 3438 p1,
3436 3439 p2,
3437 3440 flags,
3438 3441 (baserev, delta, delta_base_reuse_policy),
3439 3442 alwayscache=alwayscache,
3440 3443 deltacomputer=deltacomputer,
3441 3444 sidedata=sidedata,
3442 3445 )
3443 3446
3444 3447 if addrevisioncb:
3445 3448 addrevisioncb(self, rev)
3446 3449 empty = False
3447 3450 finally:
3448 3451 self._adding_group = False
3449 3452 return not empty
3450 3453
3451 3454 def iscensored(self, rev):
3452 3455 """Check if a file revision is censored."""
3453 3456 if not self.feature_config.censorable:
3454 3457 return False
3455 3458
3456 3459 return self.flags(rev) & REVIDX_ISCENSORED
3457 3460
3458 3461 def _peek_iscensored(self, baserev, delta):
3459 3462 """Quickly check if a delta produces a censored revision."""
3460 3463 if not self.feature_config.censorable:
3461 3464 return False
3462 3465
3463 3466 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3464 3467
3465 3468 def getstrippoint(self, minlink):
3466 3469 """find the minimum rev that must be stripped to strip the linkrev
3467 3470
3468 3471 Returns a tuple containing the minimum rev and a set of all revs that
3469 3472 have linkrevs that will be broken by this strip.
3470 3473 """
3471 3474 return storageutil.resolvestripinfo(
3472 3475 minlink,
3473 3476 len(self) - 1,
3474 3477 self.headrevs(),
3475 3478 self.linkrev,
3476 3479 self.parentrevs,
3477 3480 )
3478 3481
3479 3482 def strip(self, minlink, transaction):
3480 3483 """truncate the revlog on the first revision with a linkrev >= minlink
3481 3484
3482 3485 This function is called when we're stripping revision minlink and
3483 3486 its descendants from the repository.
3484 3487
3485 3488 We have to remove all revisions with linkrev >= minlink, because
3486 3489 the equivalent changelog revisions will be renumbered after the
3487 3490 strip.
3488 3491
3489 3492 So we truncate the revlog on the first of these revisions, and
3490 3493 trust that the caller has saved the revisions that shouldn't be
3491 3494 removed and that it'll re-add them after this truncation.
3492 3495 """
3493 3496 if len(self) == 0:
3494 3497 return
3495 3498
3496 3499 rev, _ = self.getstrippoint(minlink)
3497 3500 if rev == len(self):
3498 3501 return
3499 3502
3500 3503 # first truncate the files on disk
3501 3504 data_end = self.start(rev)
3502 3505 if not self._inline:
3503 3506 transaction.add(self._datafile, data_end)
3504 3507 end = rev * self.index.entry_size
3505 3508 else:
3506 3509 end = data_end + (rev * self.index.entry_size)
3507 3510
3508 3511 if self._sidedatafile:
3509 3512 sidedata_end = self.sidedata_cut_off(rev)
3510 3513 transaction.add(self._sidedatafile, sidedata_end)
3511 3514
3512 3515 transaction.add(self._indexfile, end)
3513 3516 if self._docket is not None:
3514 3517 # XXX we could, leverage the docket while stripping. However it is
3515 3518 # not powerfull enough at the time of this comment
3516 3519 self._docket.index_end = end
3517 3520 self._docket.data_end = data_end
3518 3521 self._docket.sidedata_end = sidedata_end
3519 3522 self._docket.write(transaction, stripping=True)
3520 3523
3521 3524 # then reset internal state in memory to forget those revisions
3522 3525 self._chaininfocache = util.lrucachedict(500)
3523 3526 self._inner.clear_cache()
3524 3527
3525 3528 del self.index[rev:-1]
3526 3529
3527 3530 def checksize(self):
3528 3531 """Check size of index and data files
3529 3532
3530 3533 return a (dd, di) tuple.
3531 3534 - dd: extra bytes for the "data" file
3532 3535 - di: extra bytes for the "index" file
3533 3536
3534 3537 A healthy revlog will return (0, 0).
3535 3538 """
3536 3539 expected = 0
3537 3540 if len(self):
3538 3541 expected = max(0, self.end(len(self) - 1))
3539 3542
3540 3543 try:
3541 3544 with self._datafp() as f:
3542 3545 f.seek(0, io.SEEK_END)
3543 3546 actual = f.tell()
3544 3547 dd = actual - expected
3545 3548 except FileNotFoundError:
3546 3549 dd = 0
3547 3550
3548 3551 try:
3549 3552 f = self.opener(self._indexfile)
3550 3553 f.seek(0, io.SEEK_END)
3551 3554 actual = f.tell()
3552 3555 f.close()
3553 3556 s = self.index.entry_size
3554 3557 i = max(0, actual // s)
3555 3558 di = actual - (i * s)
3556 3559 if self._inline:
3557 3560 databytes = 0
3558 3561 for r in self:
3559 3562 databytes += max(0, self.length(r))
3560 3563 dd = 0
3561 3564 di = actual - len(self) * s - databytes
3562 3565 except FileNotFoundError:
3563 3566 di = 0
3564 3567
3565 3568 return (dd, di)
3566 3569
3567 3570 def files(self):
3568 3571 """return list of files that compose this revlog"""
3569 3572 res = [self._indexfile]
3570 3573 if self._docket_file is None:
3571 3574 if not self._inline:
3572 3575 res.append(self._datafile)
3573 3576 else:
3574 3577 res.append(self._docket_file)
3575 3578 res.extend(self._docket.old_index_filepaths(include_empty=False))
3576 3579 if self._docket.data_end:
3577 3580 res.append(self._datafile)
3578 3581 res.extend(self._docket.old_data_filepaths(include_empty=False))
3579 3582 if self._docket.sidedata_end:
3580 3583 res.append(self._sidedatafile)
3581 3584 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3582 3585 return res
3583 3586
3584 3587 def emitrevisions(
3585 3588 self,
3586 3589 nodes,
3587 3590 nodesorder=None,
3588 3591 revisiondata=False,
3589 3592 assumehaveparentrevisions=False,
3590 3593 deltamode=repository.CG_DELTAMODE_STD,
3591 3594 sidedata_helpers=None,
3592 3595 debug_info=None,
3593 3596 ):
3594 3597 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3595 3598 raise error.ProgrammingError(
3596 3599 b'unhandled value for nodesorder: %s' % nodesorder
3597 3600 )
3598 3601
3599 3602 if nodesorder is None and not self.delta_config.general_delta:
3600 3603 nodesorder = b'storage'
3601 3604
3602 3605 if (
3603 3606 not self._storedeltachains
3604 3607 and deltamode != repository.CG_DELTAMODE_PREV
3605 3608 ):
3606 3609 deltamode = repository.CG_DELTAMODE_FULL
3607 3610
3608 3611 return storageutil.emitrevisions(
3609 3612 self,
3610 3613 nodes,
3611 3614 nodesorder,
3612 3615 revlogrevisiondelta,
3613 3616 deltaparentfn=self.deltaparent,
3614 3617 candeltafn=self._candelta,
3615 3618 rawsizefn=self.rawsize,
3616 3619 revdifffn=self.revdiff,
3617 3620 flagsfn=self.flags,
3618 3621 deltamode=deltamode,
3619 3622 revisiondata=revisiondata,
3620 3623 assumehaveparentrevisions=assumehaveparentrevisions,
3621 3624 sidedata_helpers=sidedata_helpers,
3622 3625 debug_info=debug_info,
3623 3626 )
3624 3627
3625 3628 DELTAREUSEALWAYS = b'always'
3626 3629 DELTAREUSESAMEREVS = b'samerevs'
3627 3630 DELTAREUSENEVER = b'never'
3628 3631
3629 3632 DELTAREUSEFULLADD = b'fulladd'
3630 3633
3631 3634 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3632 3635
3633 3636 def clone(
3634 3637 self,
3635 3638 tr,
3636 3639 destrevlog,
3637 3640 addrevisioncb=None,
3638 3641 deltareuse=DELTAREUSESAMEREVS,
3639 3642 forcedeltabothparents=None,
3640 3643 sidedata_helpers=None,
3641 3644 ):
3642 3645 """Copy this revlog to another, possibly with format changes.
3643 3646
3644 3647 The destination revlog will contain the same revisions and nodes.
3645 3648 However, it may not be bit-for-bit identical due to e.g. delta encoding
3646 3649 differences.
3647 3650
3648 3651 The ``deltareuse`` argument control how deltas from the existing revlog
3649 3652 are preserved in the destination revlog. The argument can have the
3650 3653 following values:
3651 3654
3652 3655 DELTAREUSEALWAYS
3653 3656 Deltas will always be reused (if possible), even if the destination
3654 3657 revlog would not select the same revisions for the delta. This is the
3655 3658 fastest mode of operation.
3656 3659 DELTAREUSESAMEREVS
3657 3660 Deltas will be reused if the destination revlog would pick the same
3658 3661 revisions for the delta. This mode strikes a balance between speed
3659 3662 and optimization.
3660 3663 DELTAREUSENEVER
3661 3664 Deltas will never be reused. This is the slowest mode of execution.
3662 3665 This mode can be used to recompute deltas (e.g. if the diff/delta
3663 3666 algorithm changes).
3664 3667 DELTAREUSEFULLADD
3665 3668 Revision will be re-added as if their were new content. This is
3666 3669 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3667 3670 eg: large file detection and handling.
3668 3671
3669 3672 Delta computation can be slow, so the choice of delta reuse policy can
3670 3673 significantly affect run time.
3671 3674
3672 3675 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3673 3676 two extremes. Deltas will be reused if they are appropriate. But if the
3674 3677 delta could choose a better revision, it will do so. This means if you
3675 3678 are converting a non-generaldelta revlog to a generaldelta revlog,
3676 3679 deltas will be recomputed if the delta's parent isn't a parent of the
3677 3680 revision.
3678 3681
3679 3682 In addition to the delta policy, the ``forcedeltabothparents``
3680 3683 argument controls whether to force compute deltas against both parents
3681 3684 for merges. By default, the current default is used.
3682 3685
3683 3686 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3684 3687 `sidedata_helpers`.
3685 3688 """
3686 3689 if deltareuse not in self.DELTAREUSEALL:
3687 3690 raise ValueError(
3688 3691 _(b'value for deltareuse invalid: %s') % deltareuse
3689 3692 )
3690 3693
3691 3694 if len(destrevlog):
3692 3695 raise ValueError(_(b'destination revlog is not empty'))
3693 3696
3694 3697 if getattr(self, 'filteredrevs', None):
3695 3698 raise ValueError(_(b'source revlog has filtered revisions'))
3696 3699 if getattr(destrevlog, 'filteredrevs', None):
3697 3700 raise ValueError(_(b'destination revlog has filtered revisions'))
3698 3701
3699 3702 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3700 3703 # if possible.
3701 3704 old_delta_config = destrevlog.delta_config
3702 3705 destrevlog.delta_config = destrevlog.delta_config.copy()
3703 3706
3704 3707 try:
3705 3708 if deltareuse == self.DELTAREUSEALWAYS:
3706 3709 destrevlog.delta_config.lazy_delta_base = True
3707 3710 destrevlog.delta_config.lazy_delta = True
3708 3711 elif deltareuse == self.DELTAREUSESAMEREVS:
3709 3712 destrevlog.delta_config.lazy_delta_base = False
3710 3713 destrevlog.delta_config.lazy_delta = True
3711 3714 elif deltareuse == self.DELTAREUSENEVER:
3712 3715 destrevlog.delta_config.lazy_delta_base = False
3713 3716 destrevlog.delta_config.lazy_delta = False
3714 3717
3715 3718 delta_both_parents = (
3716 3719 forcedeltabothparents or old_delta_config.delta_both_parents
3717 3720 )
3718 3721 destrevlog.delta_config.delta_both_parents = delta_both_parents
3719 3722
3720 3723 with self.reading(), destrevlog._writing(tr):
3721 3724 self._clone(
3722 3725 tr,
3723 3726 destrevlog,
3724 3727 addrevisioncb,
3725 3728 deltareuse,
3726 3729 forcedeltabothparents,
3727 3730 sidedata_helpers,
3728 3731 )
3729 3732
3730 3733 finally:
3731 3734 destrevlog.delta_config = old_delta_config
3732 3735
3733 3736 def _clone(
3734 3737 self,
3735 3738 tr,
3736 3739 destrevlog,
3737 3740 addrevisioncb,
3738 3741 deltareuse,
3739 3742 forcedeltabothparents,
3740 3743 sidedata_helpers,
3741 3744 ):
3742 3745 """perform the core duty of `revlog.clone` after parameter processing"""
3743 3746 write_debug = None
3744 3747 if self.delta_config.debug_delta:
3745 3748 write_debug = tr._report
3746 3749 deltacomputer = deltautil.deltacomputer(
3747 3750 destrevlog,
3748 3751 write_debug=write_debug,
3749 3752 )
3750 3753 index = self.index
3751 3754 for rev in self:
3752 3755 entry = index[rev]
3753 3756
3754 3757 # Some classes override linkrev to take filtered revs into
3755 3758 # account. Use raw entry from index.
3756 3759 flags = entry[0] & 0xFFFF
3757 3760 linkrev = entry[4]
3758 3761 p1 = index[entry[5]][7]
3759 3762 p2 = index[entry[6]][7]
3760 3763 node = entry[7]
3761 3764
3762 3765 # (Possibly) reuse the delta from the revlog if allowed and
3763 3766 # the revlog chunk is a delta.
3764 3767 cachedelta = None
3765 3768 rawtext = None
3766 3769 if deltareuse == self.DELTAREUSEFULLADD:
3767 3770 text = self._revisiondata(rev)
3768 3771 sidedata = self.sidedata(rev)
3769 3772
3770 3773 if sidedata_helpers is not None:
3771 3774 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3772 3775 self, sidedata_helpers, sidedata, rev
3773 3776 )
3774 3777 flags = flags | new_flags[0] & ~new_flags[1]
3775 3778
3776 3779 destrevlog.addrevision(
3777 3780 text,
3778 3781 tr,
3779 3782 linkrev,
3780 3783 p1,
3781 3784 p2,
3782 3785 cachedelta=cachedelta,
3783 3786 node=node,
3784 3787 flags=flags,
3785 3788 deltacomputer=deltacomputer,
3786 3789 sidedata=sidedata,
3787 3790 )
3788 3791 else:
3789 3792 if destrevlog.delta_config.lazy_delta:
3790 3793 dp = self.deltaparent(rev)
3791 3794 if dp != nullrev:
3792 3795 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3793 3796
3794 3797 sidedata = None
3795 3798 if not cachedelta:
3796 3799 try:
3797 3800 rawtext = self._revisiondata(rev)
3798 3801 except error.CensoredNodeError as censored:
3799 3802 assert flags & REVIDX_ISCENSORED
3800 3803 rawtext = censored.tombstone
3801 3804 sidedata = self.sidedata(rev)
3802 3805 if sidedata is None:
3803 3806 sidedata = self.sidedata(rev)
3804 3807
3805 3808 if sidedata_helpers is not None:
3806 3809 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3807 3810 self, sidedata_helpers, sidedata, rev
3808 3811 )
3809 3812 flags = flags | new_flags[0] & ~new_flags[1]
3810 3813
3811 3814 destrevlog._addrevision(
3812 3815 node,
3813 3816 rawtext,
3814 3817 tr,
3815 3818 linkrev,
3816 3819 p1,
3817 3820 p2,
3818 3821 flags,
3819 3822 cachedelta,
3820 3823 deltacomputer=deltacomputer,
3821 3824 sidedata=sidedata,
3822 3825 )
3823 3826
3824 3827 if addrevisioncb:
3825 3828 addrevisioncb(self, rev, node)
3826 3829
3827 3830 def censorrevision(self, tr, censornode, tombstone=b''):
3828 3831 if self._format_version == REVLOGV0:
3829 3832 raise error.RevlogError(
3830 3833 _(b'cannot censor with version %d revlogs')
3831 3834 % self._format_version
3832 3835 )
3833 3836 elif self._format_version == REVLOGV1:
3834 3837 rewrite.v1_censor(self, tr, censornode, tombstone)
3835 3838 else:
3836 3839 rewrite.v2_censor(self, tr, censornode, tombstone)
3837 3840
3838 3841 def verifyintegrity(self, state):
3839 3842 """Verifies the integrity of the revlog.
3840 3843
3841 3844 Yields ``revlogproblem`` instances describing problems that are
3842 3845 found.
3843 3846 """
3844 3847 dd, di = self.checksize()
3845 3848 if dd:
3846 3849 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3847 3850 if di:
3848 3851 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3849 3852
3850 3853 version = self._format_version
3851 3854
3852 3855 # The verifier tells us what version revlog we should be.
3853 3856 if version != state[b'expectedversion']:
3854 3857 yield revlogproblem(
3855 3858 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3856 3859 % (self.display_id, version, state[b'expectedversion'])
3857 3860 )
3858 3861
3859 3862 state[b'skipread'] = set()
3860 3863 state[b'safe_renamed'] = set()
3861 3864
3862 3865 for rev in self:
3863 3866 node = self.node(rev)
3864 3867
3865 3868 # Verify contents. 4 cases to care about:
3866 3869 #
3867 3870 # common: the most common case
3868 3871 # rename: with a rename
3869 3872 # meta: file content starts with b'\1\n', the metadata
3870 3873 # header defined in filelog.py, but without a rename
3871 3874 # ext: content stored externally
3872 3875 #
3873 3876 # More formally, their differences are shown below:
3874 3877 #
3875 3878 # | common | rename | meta | ext
3876 3879 # -------------------------------------------------------
3877 3880 # flags() | 0 | 0 | 0 | not 0
3878 3881 # renamed() | False | True | False | ?
3879 3882 # rawtext[0:2]=='\1\n'| False | True | True | ?
3880 3883 #
3881 3884 # "rawtext" means the raw text stored in revlog data, which
3882 3885 # could be retrieved by "rawdata(rev)". "text"
3883 3886 # mentioned below is "revision(rev)".
3884 3887 #
3885 3888 # There are 3 different lengths stored physically:
3886 3889 # 1. L1: rawsize, stored in revlog index
3887 3890 # 2. L2: len(rawtext), stored in revlog data
3888 3891 # 3. L3: len(text), stored in revlog data if flags==0, or
3889 3892 # possibly somewhere else if flags!=0
3890 3893 #
3891 3894 # L1 should be equal to L2. L3 could be different from them.
3892 3895 # "text" may or may not affect commit hash depending on flag
3893 3896 # processors (see flagutil.addflagprocessor).
3894 3897 #
3895 3898 # | common | rename | meta | ext
3896 3899 # -------------------------------------------------
3897 3900 # rawsize() | L1 | L1 | L1 | L1
3898 3901 # size() | L1 | L2-LM | L1(*) | L1 (?)
3899 3902 # len(rawtext) | L2 | L2 | L2 | L2
3900 3903 # len(text) | L2 | L2 | L2 | L3
3901 3904 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3902 3905 #
3903 3906 # LM: length of metadata, depending on rawtext
3904 3907 # (*): not ideal, see comment in filelog.size
3905 3908 # (?): could be "- len(meta)" if the resolved content has
3906 3909 # rename metadata
3907 3910 #
3908 3911 # Checks needed to be done:
3909 3912 # 1. length check: L1 == L2, in all cases.
3910 3913 # 2. hash check: depending on flag processor, we may need to
3911 3914 # use either "text" (external), or "rawtext" (in revlog).
3912 3915
3913 3916 try:
3914 3917 skipflags = state.get(b'skipflags', 0)
3915 3918 if skipflags:
3916 3919 skipflags &= self.flags(rev)
3917 3920
3918 3921 _verify_revision(self, skipflags, state, node)
3919 3922
3920 3923 l1 = self.rawsize(rev)
3921 3924 l2 = len(self.rawdata(node))
3922 3925
3923 3926 if l1 != l2:
3924 3927 yield revlogproblem(
3925 3928 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3926 3929 node=node,
3927 3930 )
3928 3931
3929 3932 except error.CensoredNodeError:
3930 3933 if state[b'erroroncensored']:
3931 3934 yield revlogproblem(
3932 3935 error=_(b'censored file data'), node=node
3933 3936 )
3934 3937 state[b'skipread'].add(node)
3935 3938 except Exception as e:
3936 3939 yield revlogproblem(
3937 3940 error=_(b'unpacking %s: %s')
3938 3941 % (short(node), stringutil.forcebytestr(e)),
3939 3942 node=node,
3940 3943 )
3941 3944 state[b'skipread'].add(node)
3942 3945
3943 3946 def storageinfo(
3944 3947 self,
3945 3948 exclusivefiles=False,
3946 3949 sharedfiles=False,
3947 3950 revisionscount=False,
3948 3951 trackedsize=False,
3949 3952 storedsize=False,
3950 3953 ):
3951 3954 d = {}
3952 3955
3953 3956 if exclusivefiles:
3954 3957 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3955 3958 if not self._inline:
3956 3959 d[b'exclusivefiles'].append((self.opener, self._datafile))
3957 3960
3958 3961 if sharedfiles:
3959 3962 d[b'sharedfiles'] = []
3960 3963
3961 3964 if revisionscount:
3962 3965 d[b'revisionscount'] = len(self)
3963 3966
3964 3967 if trackedsize:
3965 3968 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3966 3969
3967 3970 if storedsize:
3968 3971 d[b'storedsize'] = sum(
3969 3972 self.opener.stat(path).st_size for path in self.files()
3970 3973 )
3971 3974
3972 3975 return d
3973 3976
3974 3977 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3975 3978 if not self.feature_config.has_side_data:
3976 3979 return
3977 3980 # revlog formats with sidedata support does not support inline
3978 3981 assert not self._inline
3979 3982 if not helpers[1] and not helpers[2]:
3980 3983 # Nothing to generate or remove
3981 3984 return
3982 3985
3983 3986 new_entries = []
3984 3987 # append the new sidedata
3985 3988 with self._writing(transaction):
3986 3989 ifh, dfh, sdfh = self._inner._writinghandles
3987 3990 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3988 3991
3989 3992 current_offset = sdfh.tell()
3990 3993 for rev in range(startrev, endrev + 1):
3991 3994 entry = self.index[rev]
3992 3995 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3993 3996 store=self,
3994 3997 sidedata_helpers=helpers,
3995 3998 sidedata={},
3996 3999 rev=rev,
3997 4000 )
3998 4001
3999 4002 serialized_sidedata = sidedatautil.serialize_sidedata(
4000 4003 new_sidedata
4001 4004 )
4002 4005
4003 4006 sidedata_compression_mode = COMP_MODE_INLINE
4004 4007 if serialized_sidedata and self.feature_config.has_side_data:
4005 4008 sidedata_compression_mode = COMP_MODE_PLAIN
4006 4009 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4007 4010 if (
4008 4011 h != b'u'
4009 4012 and comp_sidedata[0] != b'\0'
4010 4013 and len(comp_sidedata) < len(serialized_sidedata)
4011 4014 ):
4012 4015 assert not h
4013 4016 if (
4014 4017 comp_sidedata[0]
4015 4018 == self._docket.default_compression_header
4016 4019 ):
4017 4020 sidedata_compression_mode = COMP_MODE_DEFAULT
4018 4021 serialized_sidedata = comp_sidedata
4019 4022 else:
4020 4023 sidedata_compression_mode = COMP_MODE_INLINE
4021 4024 serialized_sidedata = comp_sidedata
4022 4025 if entry[8] != 0 or entry[9] != 0:
4023 4026 # rewriting entries that already have sidedata is not
4024 4027 # supported yet, because it introduces garbage data in the
4025 4028 # revlog.
4026 4029 msg = b"rewriting existing sidedata is not supported yet"
4027 4030 raise error.Abort(msg)
4028 4031
4029 4032 # Apply (potential) flags to add and to remove after running
4030 4033 # the sidedata helpers
4031 4034 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4032 4035 entry_update = (
4033 4036 current_offset,
4034 4037 len(serialized_sidedata),
4035 4038 new_offset_flags,
4036 4039 sidedata_compression_mode,
4037 4040 )
4038 4041
4039 4042 # the sidedata computation might have move the file cursors around
4040 4043 sdfh.seek(current_offset, os.SEEK_SET)
4041 4044 sdfh.write(serialized_sidedata)
4042 4045 new_entries.append(entry_update)
4043 4046 current_offset += len(serialized_sidedata)
4044 4047 self._docket.sidedata_end = sdfh.tell()
4045 4048
4046 4049 # rewrite the new index entries
4047 4050 ifh.seek(startrev * self.index.entry_size)
4048 4051 for i, e in enumerate(new_entries):
4049 4052 rev = startrev + i
4050 4053 self.index.replace_sidedata_info(rev, *e)
4051 4054 packed = self.index.entry_binary(rev)
4052 4055 if rev == 0 and self._docket is None:
4053 4056 header = self._format_flags | self._format_version
4054 4057 header = self.index.pack_header(header)
4055 4058 packed = header + packed
4056 4059 ifh.write(packed)
@@ -1,38 +1,38 b''
1 1 // debugdata.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::repo::Repo;
9 use crate::requirements;
10 9 use crate::revlog::{Revlog, RevlogError};
11 10
12 11 /// Kind of data to debug
13 #[derive(Debug, Copy, Clone)]
12 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
14 13 pub enum DebugDataKind {
15 14 Changelog,
16 15 Manifest,
17 16 }
18 17
19 18 /// Dump the contents data of a revision.
20 19 pub fn debug_data(
21 20 repo: &Repo,
22 21 revset: &str,
23 22 kind: DebugDataKind,
24 23 ) -> Result<Vec<u8>, RevlogError> {
25 24 let index_file = match kind {
26 25 DebugDataKind::Changelog => "00changelog.i",
27 26 DebugDataKind::Manifest => "00manifest.i",
28 27 };
29 let use_nodemap = repo
30 .requirements()
31 .contains(requirements::NODEMAP_REQUIREMENT);
32 let revlog =
33 Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?;
28 let revlog = Revlog::open(
29 &repo.store_vfs(),
30 index_file,
31 None,
32 repo.default_revlog_options(kind == DebugDataKind::Changelog)?,
33 )?;
34 34 let rev =
35 35 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
36 36 let data = revlog.get_rev_data_for_checked_rev(rev)?;
37 37 Ok(data.into_owned())
38 38 }
@@ -1,782 +1,820 b''
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
5 5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
6 6 use crate::dirstate_tree::owning::OwningDirstateMap;
7 7 use crate::errors::HgResultExt;
8 8 use crate::errors::{HgError, IoResultExt};
9 9 use crate::lock::{try_with_lock_no_wait, LockError};
10 10 use crate::manifest::{Manifest, Manifestlog};
11 use crate::requirements::{
12 CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT,
13 REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
14 };
11 15 use crate::revlog::filelog::Filelog;
12 16 use crate::revlog::RevlogError;
13 17 use crate::utils::debug::debug_wait_for_file_or_print;
14 18 use crate::utils::files::get_path_from_bytes;
15 19 use crate::utils::hg_path::HgPath;
16 20 use crate::utils::SliceExt;
17 21 use crate::vfs::{is_dir, is_file, Vfs};
18 use crate::DirstateError;
19 use crate::{requirements, NodePrefix, UncheckedRevision};
22 use crate::{
23 requirements, NodePrefix, RevlogVersionOptions, UncheckedRevision,
24 };
25 use crate::{DirstateError, RevlogOpenOptions};
20 26 use std::cell::{Ref, RefCell, RefMut};
21 27 use std::collections::HashSet;
22 28 use std::io::Seek;
23 29 use std::io::SeekFrom;
24 30 use std::io::Write as IoWrite;
25 31 use std::path::{Path, PathBuf};
26 32
27 33 const V2_MAX_READ_ATTEMPTS: usize = 5;
28 34
29 35 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
30 36
31 37 /// A repository on disk
32 38 pub struct Repo {
33 39 working_directory: PathBuf,
34 40 dot_hg: PathBuf,
35 41 store: PathBuf,
36 42 requirements: HashSet<String>,
37 43 config: Config,
38 44 dirstate_parents: LazyCell<DirstateParents>,
39 45 dirstate_map: LazyCell<OwningDirstateMap>,
40 46 changelog: LazyCell<Changelog>,
41 47 manifestlog: LazyCell<Manifestlog>,
42 48 }
43 49
44 50 #[derive(Debug, derive_more::From)]
45 51 pub enum RepoError {
46 52 NotFound {
47 53 at: PathBuf,
48 54 },
49 55 #[from]
50 56 ConfigParseError(ConfigParseError),
51 57 #[from]
52 58 Other(HgError),
53 59 }
54 60
55 61 impl From<ConfigError> for RepoError {
56 62 fn from(error: ConfigError) -> Self {
57 63 match error {
58 64 ConfigError::Parse(error) => error.into(),
59 65 ConfigError::Other(error) => error.into(),
60 66 }
61 67 }
62 68 }
63 69
64 70 impl Repo {
65 71 /// tries to find nearest repository root in current working directory or
66 72 /// its ancestors
67 73 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
68 74 let current_directory = crate::utils::current_dir()?;
69 75 // ancestors() is inclusive: it first yields `current_directory`
70 76 // as-is.
71 77 for ancestor in current_directory.ancestors() {
72 78 if is_dir(ancestor.join(".hg"))? {
73 79 return Ok(ancestor.to_path_buf());
74 80 }
75 81 }
76 82 Err(RepoError::NotFound {
77 83 at: current_directory,
78 84 })
79 85 }
80 86
81 87 /// Find a repository, either at the given path (which must contain a `.hg`
82 88 /// sub-directory) or by searching the current directory and its
83 89 /// ancestors.
84 90 ///
85 91 /// A method with two very different "modes" like this usually a code smell
86 92 /// to make two methods instead, but in this case an `Option` is what rhg
87 93 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
88 94 /// Having two methods would just move that `if` to almost all callers.
89 95 pub fn find(
90 96 config: &Config,
91 97 explicit_path: Option<PathBuf>,
92 98 ) -> Result<Self, RepoError> {
93 99 if let Some(root) = explicit_path {
94 100 if is_dir(root.join(".hg"))? {
95 101 Self::new_at_path(root, config)
96 102 } else if is_file(&root)? {
97 103 Err(HgError::unsupported("bundle repository").into())
98 104 } else {
99 105 Err(RepoError::NotFound { at: root })
100 106 }
101 107 } else {
102 108 let root = Self::find_repo_root()?;
103 109 Self::new_at_path(root, config)
104 110 }
105 111 }
106 112
107 113 /// To be called after checking that `.hg` is a sub-directory
108 114 fn new_at_path(
109 115 working_directory: PathBuf,
110 116 config: &Config,
111 117 ) -> Result<Self, RepoError> {
112 118 let dot_hg = working_directory.join(".hg");
113 119
114 120 let mut repo_config_files =
115 121 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
116 122
117 123 let hg_vfs = Vfs { base: &dot_hg };
118 124 let mut reqs = requirements::load_if_exists(hg_vfs)?;
119 125 let relative =
120 126 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
121 127 let shared =
122 128 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
123 129
124 130 // From `mercurial/localrepo.py`:
125 131 //
126 132 // if .hg/requires contains the sharesafe requirement, it means
127 133 // there exists a `.hg/store/requires` too and we should read it
128 134 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
129 135 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
130 136 // is not present, refer checkrequirementscompat() for that
131 137 //
132 138 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
133 139 // repository was shared the old way. We check the share source
134 140 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
135 141 // current repository needs to be reshared
136 142 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
137 143
138 144 let store_path;
139 145 if !shared {
140 146 store_path = dot_hg.join("store");
141 147 } else {
142 148 let bytes = hg_vfs.read("sharedpath")?;
143 149 let mut shared_path =
144 150 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
145 151 .to_owned();
146 152 if relative {
147 153 shared_path = dot_hg.join(shared_path)
148 154 }
149 155 if !is_dir(&shared_path)? {
150 156 return Err(HgError::corrupted(format!(
151 157 ".hg/sharedpath points to nonexistent directory {}",
152 158 shared_path.display()
153 159 ))
154 160 .into());
155 161 }
156 162
157 163 store_path = shared_path.join("store");
158 164
159 165 let source_is_share_safe =
160 166 requirements::load(Vfs { base: &shared_path })?
161 167 .contains(requirements::SHARESAFE_REQUIREMENT);
162 168
163 169 if share_safe != source_is_share_safe {
164 170 return Err(HgError::unsupported("share-safe mismatch").into());
165 171 }
166 172
167 173 if share_safe {
168 174 repo_config_files.insert(0, shared_path.join("hgrc"))
169 175 }
170 176 }
171 177 if share_safe {
172 178 reqs.extend(requirements::load(Vfs { base: &store_path })?);
173 179 }
174 180
175 181 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
176 182 config.combine_with_repo(&repo_config_files)?
177 183 } else {
178 184 config.clone()
179 185 };
180 186
181 187 let repo = Self {
182 188 requirements: reqs,
183 189 working_directory,
184 190 store: store_path,
185 191 dot_hg,
186 192 config: repo_config,
187 193 dirstate_parents: LazyCell::new(),
188 194 dirstate_map: LazyCell::new(),
189 195 changelog: LazyCell::new(),
190 196 manifestlog: LazyCell::new(),
191 197 };
192 198
193 199 requirements::check(&repo)?;
194 200
195 201 Ok(repo)
196 202 }
197 203
198 204 pub fn working_directory_path(&self) -> &Path {
199 205 &self.working_directory
200 206 }
201 207
202 208 pub fn requirements(&self) -> &HashSet<String> {
203 209 &self.requirements
204 210 }
205 211
206 212 pub fn config(&self) -> &Config {
207 213 &self.config
208 214 }
209 215
210 216 /// For accessing repository files (in `.hg`), except for the store
211 217 /// (`.hg/store`).
212 218 pub fn hg_vfs(&self) -> Vfs<'_> {
213 219 Vfs { base: &self.dot_hg }
214 220 }
215 221
216 222 /// For accessing repository store files (in `.hg/store`)
217 223 pub fn store_vfs(&self) -> Vfs<'_> {
218 224 Vfs { base: &self.store }
219 225 }
220 226
221 227 /// For accessing the working copy
222 228 pub fn working_directory_vfs(&self) -> Vfs<'_> {
223 229 Vfs {
224 230 base: &self.working_directory,
225 231 }
226 232 }
227 233
228 234 pub fn try_with_wlock_no_wait<R>(
229 235 &self,
230 236 f: impl FnOnce() -> R,
231 237 ) -> Result<R, LockError> {
232 238 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
233 239 }
234 240
235 241 /// Whether this repo should use dirstate-v2.
236 242 /// The presence of `dirstate-v2` in the requirements does not mean that
237 243 /// the on-disk dirstate is necessarily in version 2. In most cases,
238 244 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
239 245 /// upgrade mechanism being cut short), the on-disk version will be a
240 246 /// v1 file.
241 247 /// Semantically, having a requirement only means that a client cannot
242 248 /// properly understand or properly update the repo if it lacks the support
243 249 /// for the required feature, but not that that feature is actually used
244 250 /// in all occasions.
245 251 pub fn use_dirstate_v2(&self) -> bool {
246 252 self.requirements
247 253 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
248 254 }
249 255
250 256 pub fn has_sparse(&self) -> bool {
251 257 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
252 258 }
253 259
254 260 pub fn has_narrow(&self) -> bool {
255 261 self.requirements.contains(requirements::NARROW_REQUIREMENT)
256 262 }
257 263
258 264 pub fn has_nodemap(&self) -> bool {
259 265 self.requirements
260 266 .contains(requirements::NODEMAP_REQUIREMENT)
261 267 }
262 268
263 269 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
264 270 Ok(self
265 271 .hg_vfs()
266 272 .read("dirstate")
267 273 .io_not_found_as_none()?
268 274 .unwrap_or_default())
269 275 }
270 276
271 277 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
272 278 use std::os::unix::fs::MetadataExt;
273 279 Ok(self
274 280 .hg_vfs()
275 281 .symlink_metadata("dirstate")
276 282 .io_not_found_as_none()?
277 283 .map(|meta| meta.ino()))
278 284 }
279 285
280 286 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
281 287 Ok(*self
282 288 .dirstate_parents
283 289 .get_or_init(|| self.read_dirstate_parents())?)
284 290 }
285 291
286 292 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
287 293 let dirstate = self.dirstate_file_contents()?;
288 294 let parents = if dirstate.is_empty() {
289 295 DirstateParents::NULL
290 296 } else if self.use_dirstate_v2() {
291 297 let docket_res =
292 298 crate::dirstate_tree::on_disk::read_docket(&dirstate);
293 299 match docket_res {
294 300 Ok(docket) => docket.parents(),
295 301 Err(_) => {
296 302 log::info!(
297 303 "Parsing dirstate docket failed, \
298 304 falling back to dirstate-v1"
299 305 );
300 306 *crate::dirstate::parsers::parse_dirstate_parents(
301 307 &dirstate,
302 308 )?
303 309 }
304 310 }
305 311 } else {
306 312 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
307 313 };
308 314 self.dirstate_parents.set(parents);
309 315 Ok(parents)
310 316 }
311 317
312 318 /// Returns the information read from the dirstate docket necessary to
313 319 /// check if the data file has been updated/deleted by another process
314 320 /// since we last read the dirstate.
315 321 /// Namely, the inode, data file uuid and the data size.
316 322 fn get_dirstate_data_file_integrity(
317 323 &self,
318 324 ) -> Result<DirstateMapIdentity, HgError> {
319 325 assert!(
320 326 self.use_dirstate_v2(),
321 327 "accessing dirstate data file ID without dirstate-v2"
322 328 );
323 329 // Get the identity before the contents since we could have a race
324 330 // between the two. Having an identity that is too old is fine, but
325 331 // one that is younger than the content change is bad.
326 332 let identity = self.dirstate_identity()?;
327 333 let dirstate = self.dirstate_file_contents()?;
328 334 if dirstate.is_empty() {
329 335 self.dirstate_parents.set(DirstateParents::NULL);
330 336 Ok((identity, None, 0))
331 337 } else {
332 338 let docket_res =
333 339 crate::dirstate_tree::on_disk::read_docket(&dirstate);
334 340 match docket_res {
335 341 Ok(docket) => {
336 342 self.dirstate_parents.set(docket.parents());
337 343 Ok((
338 344 identity,
339 345 Some(docket.uuid.to_owned()),
340 346 docket.data_size(),
341 347 ))
342 348 }
343 349 Err(_) => {
344 350 log::info!(
345 351 "Parsing dirstate docket failed, \
346 352 falling back to dirstate-v1"
347 353 );
348 354 let parents =
349 355 *crate::dirstate::parsers::parse_dirstate_parents(
350 356 &dirstate,
351 357 )?;
352 358 self.dirstate_parents.set(parents);
353 359 Ok((identity, None, 0))
354 360 }
355 361 }
356 362 }
357 363 }
358 364
359 365 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
360 366 if self.use_dirstate_v2() {
361 367 // The v2 dirstate is split into a docket and a data file.
362 368 // Since we don't always take the `wlock` to read it
363 369 // (like in `hg status`), it is susceptible to races.
364 370 // A simple retry method should be enough since full rewrites
365 371 // only happen when too much garbage data is present and
366 372 // this race is unlikely.
367 373 let mut tries = 0;
368 374
369 375 while tries < V2_MAX_READ_ATTEMPTS {
370 376 tries += 1;
371 377 match self.read_docket_and_data_file() {
372 378 Ok(m) => {
373 379 return Ok(m);
374 380 }
375 381 Err(e) => match e {
376 382 DirstateError::Common(HgError::RaceDetected(
377 383 context,
378 384 )) => {
379 385 log::info!(
380 386 "dirstate read race detected {} (retry {}/{})",
381 387 context,
382 388 tries,
383 389 V2_MAX_READ_ATTEMPTS,
384 390 );
385 391 continue;
386 392 }
387 393 _ => {
388 394 log::info!(
389 395 "Reading dirstate v2 failed, \
390 396 falling back to v1"
391 397 );
392 398 return self.new_dirstate_map_v1();
393 399 }
394 400 },
395 401 }
396 402 }
397 403 let error = HgError::abort(
398 404 format!("dirstate read race happened {tries} times in a row"),
399 405 255,
400 406 None,
401 407 );
402 408 Err(DirstateError::Common(error))
403 409 } else {
404 410 self.new_dirstate_map_v1()
405 411 }
406 412 }
407 413
408 414 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
409 415 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
410 416 let identity = self.dirstate_identity()?;
411 417 let dirstate_file_contents = self.dirstate_file_contents()?;
412 418 if dirstate_file_contents.is_empty() {
413 419 self.dirstate_parents.set(DirstateParents::NULL);
414 420 Ok(OwningDirstateMap::new_empty(Vec::new()))
415 421 } else {
416 422 let (map, parents) =
417 423 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
418 424 self.dirstate_parents.set(parents);
419 425 Ok(map)
420 426 }
421 427 }
422 428
423 429 fn read_docket_and_data_file(
424 430 &self,
425 431 ) -> Result<OwningDirstateMap, DirstateError> {
426 432 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
427 433 let dirstate_file_contents = self.dirstate_file_contents()?;
428 434 let identity = self.dirstate_identity()?;
429 435 if dirstate_file_contents.is_empty() {
430 436 self.dirstate_parents.set(DirstateParents::NULL);
431 437 return Ok(OwningDirstateMap::new_empty(Vec::new()));
432 438 }
433 439 let docket = crate::dirstate_tree::on_disk::read_docket(
434 440 &dirstate_file_contents,
435 441 )?;
436 442 debug_wait_for_file_or_print(
437 443 self.config(),
438 444 "dirstate.post-docket-read-file",
439 445 );
440 446 self.dirstate_parents.set(docket.parents());
441 447 let uuid = docket.uuid.to_owned();
442 448 let data_size = docket.data_size();
443 449
444 450 let context = "between reading dirstate docket and data file";
445 451 let race_error = HgError::RaceDetected(context.into());
446 452 let metadata = docket.tree_metadata();
447 453
448 454 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
449 455 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
450 456 let contents = self.hg_vfs().read(docket.data_filename());
451 457 let contents = match contents {
452 458 Ok(c) => c,
453 459 Err(HgError::IoError { error, context }) => {
454 460 match error.raw_os_error().expect("real os error") {
455 461 // 2 = ENOENT, No such file or directory
456 462 // 116 = ESTALE, Stale NFS file handle
457 463 //
458 464 // TODO match on `error.kind()` when
459 465 // `ErrorKind::StaleNetworkFileHandle` is stable.
460 466 2 | 116 => {
461 467 // Race where the data file was deleted right after
462 468 // we read the docket, try again
463 469 return Err(race_error.into());
464 470 }
465 471 _ => {
466 472 return Err(
467 473 HgError::IoError { error, context }.into()
468 474 )
469 475 }
470 476 }
471 477 }
472 478 Err(e) => return Err(e.into()),
473 479 };
474 480 OwningDirstateMap::new_v2(
475 481 contents, data_size, metadata, uuid, identity,
476 482 )
477 483 } else {
478 484 match self
479 485 .hg_vfs()
480 486 .mmap_open(docket.data_filename())
481 487 .io_not_found_as_none()
482 488 {
483 489 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
484 490 data_mmap, data_size, metadata, uuid, identity,
485 491 ),
486 492 Ok(None) => {
487 493 // Race where the data file was deleted right after we
488 494 // read the docket, try again
489 495 return Err(race_error.into());
490 496 }
491 497 Err(e) => return Err(e.into()),
492 498 }
493 499 }?;
494 500
495 501 let write_mode_config = self
496 502 .config()
497 503 .get_str(b"devel", b"dirstate.v2.data_update_mode")
498 504 .unwrap_or(Some("auto"))
499 505 .unwrap_or("auto"); // don't bother for devel options
500 506 let write_mode = match write_mode_config {
501 507 "auto" => DirstateMapWriteMode::Auto,
502 508 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
503 509 "force-append" => DirstateMapWriteMode::ForceAppend,
504 510 _ => DirstateMapWriteMode::Auto,
505 511 };
506 512
507 513 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
508 514
509 515 Ok(map)
510 516 }
511 517
512 518 pub fn dirstate_map(
513 519 &self,
514 520 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
515 521 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
516 522 }
517 523
518 524 pub fn dirstate_map_mut(
519 525 &self,
520 526 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
521 527 self.dirstate_map
522 528 .get_mut_or_init(|| self.new_dirstate_map())
523 529 }
524 530
525 531 fn new_changelog(&self) -> Result<Changelog, HgError> {
526 Changelog::open(&self.store_vfs(), self.has_nodemap())
532 Changelog::open(&self.store_vfs(), self.default_revlog_options(true)?)
527 533 }
528 534
529 535 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
530 536 self.changelog.get_or_init(|| self.new_changelog())
531 537 }
532 538
533 539 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
534 540 self.changelog.get_mut_or_init(|| self.new_changelog())
535 541 }
536 542
537 543 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
538 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
544 Manifestlog::open(
545 &self.store_vfs(),
546 self.default_revlog_options(false)?,
547 )
539 548 }
540 549
541 550 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
542 551 self.manifestlog.get_or_init(|| self.new_manifestlog())
543 552 }
544 553
545 554 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
546 555 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
547 556 }
548 557
549 558 /// Returns the manifest of the *changeset* with the given node ID
550 559 pub fn manifest_for_node(
551 560 &self,
552 561 node: impl Into<NodePrefix>,
553 562 ) -> Result<Manifest, RevlogError> {
554 563 self.manifestlog()?.data_for_node(
555 564 self.changelog()?
556 565 .data_for_node(node.into())?
557 566 .manifest_node()?
558 567 .into(),
559 568 )
560 569 }
561 570
562 571 /// Returns the manifest of the *changeset* with the given revision number
563 572 pub fn manifest_for_rev(
564 573 &self,
565 574 revision: UncheckedRevision,
566 575 ) -> Result<Manifest, RevlogError> {
567 576 self.manifestlog()?.data_for_node(
568 577 self.changelog()?
569 578 .data_for_rev(revision)?
570 579 .manifest_node()?
571 580 .into(),
572 581 )
573 582 }
574 583
575 584 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
576 585 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
577 586 Ok(entry.tracked())
578 587 } else {
579 588 Ok(false)
580 589 }
581 590 }
582 591
583 592 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
584 Filelog::open(self, path)
593 Filelog::open(self, path, self.default_revlog_options(false)?)
585 594 }
586 595
587 596 /// Write to disk any updates that were made through `dirstate_map_mut`.
588 597 ///
589 598 /// The "wlock" must be held while calling this.
590 599 /// See for example `try_with_wlock_no_wait`.
591 600 ///
592 601 /// TODO: have a `WritableRepo` type only accessible while holding the
593 602 /// lock?
594 603 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
595 604 let map = self.dirstate_map()?;
596 605 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
597 606 // it’s unset
598 607 let parents = self.dirstate_parents()?;
599 608 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
600 609 let (identity, uuid, data_size) =
601 610 self.get_dirstate_data_file_integrity()?;
602 611 let identity_changed = identity != map.old_identity();
603 612 let uuid_changed = uuid.as_deref() != map.old_uuid();
604 613 let data_length_changed = data_size != map.old_data_size();
605 614
606 615 if identity_changed || uuid_changed || data_length_changed {
607 616 // If any of identity, uuid or length have changed since
608 617 // last disk read, don't write.
609 618 // This is fine because either we're in a command that doesn't
610 619 // write anything too important (like `hg status`), or we're in
611 620 // `hg add` and we're supposed to have taken the lock before
612 621 // reading anyway.
613 622 //
614 623 // TODO complain loudly if we've changed anything important
615 624 // without taking the lock.
616 625 // (see `hg help config.format.use-dirstate-tracked-hint`)
617 626 log::debug!(
618 627 "dirstate has changed since last read, not updating."
619 628 );
620 629 return Ok(());
621 630 }
622 631
623 632 let uuid_opt = map.old_uuid();
624 633 let write_mode = if uuid_opt.is_some() {
625 634 DirstateMapWriteMode::Auto
626 635 } else {
627 636 DirstateMapWriteMode::ForceNewDataFile
628 637 };
629 638 let (data, tree_metadata, append, old_data_size) =
630 639 map.pack_v2(write_mode)?;
631 640
632 641 // Reuse the uuid, or generate a new one, keeping the old for
633 642 // deletion.
634 643 let (uuid, old_uuid) = match uuid_opt {
635 644 Some(uuid) => {
636 645 let as_str = std::str::from_utf8(uuid)
637 646 .map_err(|_| {
638 647 HgError::corrupted(
639 648 "non-UTF-8 dirstate data file ID",
640 649 )
641 650 })?
642 651 .to_owned();
643 652 if append {
644 653 (as_str, None)
645 654 } else {
646 655 (DirstateDocket::new_uid(), Some(as_str))
647 656 }
648 657 }
649 658 None => (DirstateDocket::new_uid(), None),
650 659 };
651 660
652 661 let data_filename = format!("dirstate.{}", uuid);
653 662 let data_filename = self.hg_vfs().join(data_filename);
654 663 let mut options = std::fs::OpenOptions::new();
655 664 options.write(true);
656 665
657 666 // Why are we not using the O_APPEND flag when appending?
658 667 //
659 668 // - O_APPEND makes it trickier to deal with garbage at the end of
660 669 // the file, left by a previous uncommitted transaction. By
661 670 // starting the write at [old_data_size] we make sure we erase
662 671 // all such garbage.
663 672 //
664 673 // - O_APPEND requires to special-case 0-byte writes, whereas we
665 674 // don't need that.
666 675 //
667 676 // - Some OSes have bugs in implementation O_APPEND:
668 677 // revlog.py talks about a Solaris bug, but we also saw some ZFS
669 678 // bug: https://github.com/openzfs/zfs/pull/3124,
670 679 // https://github.com/openzfs/zfs/issues/13370
671 680 //
672 681 if !append {
673 682 log::trace!("creating a new dirstate data file");
674 683 options.create_new(true);
675 684 } else {
676 685 log::trace!("appending to the dirstate data file");
677 686 }
678 687
679 688 let data_size = (|| {
680 689 // TODO: loop and try another random ID if !append and this
681 690 // returns `ErrorKind::AlreadyExists`? Collision chance of two
682 691 // random IDs is one in 2**32
683 692 let mut file = options.open(&data_filename)?;
684 693 if append {
685 694 file.seek(SeekFrom::Start(old_data_size as u64))?;
686 695 }
687 696 file.write_all(&data)?;
688 697 file.flush()?;
689 698 file.stream_position()
690 699 })()
691 700 .when_writing_file(&data_filename)?;
692 701
693 702 let packed_dirstate = DirstateDocket::serialize(
694 703 parents,
695 704 tree_metadata,
696 705 data_size,
697 706 uuid.as_bytes(),
698 707 )
699 708 .map_err(|_: std::num::TryFromIntError| {
700 709 HgError::corrupted("overflow in dirstate docket serialization")
701 710 })?;
702 711
703 712 (packed_dirstate, old_uuid)
704 713 } else {
705 714 let identity = self.dirstate_identity()?;
706 715 if identity != map.old_identity() {
707 716 // If identity changed since last disk read, don't write.
708 717 // This is fine because either we're in a command that doesn't
709 718 // write anything too important (like `hg status`), or we're in
710 719 // `hg add` and we're supposed to have taken the lock before
711 720 // reading anyway.
712 721 //
713 722 // TODO complain loudly if we've changed anything important
714 723 // without taking the lock.
715 724 // (see `hg help config.format.use-dirstate-tracked-hint`)
716 725 log::debug!(
717 726 "dirstate has changed since last read, not updating."
718 727 );
719 728 return Ok(());
720 729 }
721 730 (map.pack_v1(parents)?, None)
722 731 };
723 732
724 733 let vfs = self.hg_vfs();
725 734 vfs.atomic_write("dirstate", &packed_dirstate)?;
726 735 if let Some(uuid) = old_uuid_to_remove {
727 736 // Remove the old data file after the new docket pointing to the
728 737 // new data file was written.
729 738 vfs.remove_file(format!("dirstate.{}", uuid))?;
730 739 }
731 740 Ok(())
732 741 }
742
743 pub fn default_revlog_options(
744 &self,
745 changelog: bool,
746 ) -> Result<RevlogOpenOptions, HgError> {
747 let requirements = self.requirements();
748 let version = if changelog
749 && requirements.contains(CHANGELOGV2_REQUIREMENT)
750 {
751 let compute_rank = self
752 .config()
753 .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
754 RevlogVersionOptions::ChangelogV2 { compute_rank }
755 } else if requirements.contains(REVLOGV2_REQUIREMENT) {
756 RevlogVersionOptions::V2
757 } else if requirements.contains(REVLOGV1_REQUIREMENT) {
758 RevlogVersionOptions::V1 {
759 generaldelta: requirements.contains(GENERALDELTA_REQUIREMENT),
760 }
761 } else {
762 RevlogVersionOptions::V0
763 };
764 Ok(RevlogOpenOptions {
765 version,
766 // We don't need to dance around the slow path like in the Python
767 // implementation since we know we have access to the fast code.
768 use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
769 })
770 }
733 771 }
734 772
735 773 /// Lazily-initialized component of `Repo` with interior mutability
736 774 ///
737 775 /// This differs from `OnceCell` in that the value can still be "deinitialized"
738 776 /// later by setting its inner `Option` to `None`. It also takes the
739 777 /// initialization function as an argument when the value is requested, not
740 778 /// when the instance is created.
741 779 struct LazyCell<T> {
742 780 value: RefCell<Option<T>>,
743 781 }
744 782
745 783 impl<T> LazyCell<T> {
746 784 fn new() -> Self {
747 785 Self {
748 786 value: RefCell::new(None),
749 787 }
750 788 }
751 789
752 790 fn set(&self, value: T) {
753 791 *self.value.borrow_mut() = Some(value)
754 792 }
755 793
756 794 fn get_or_init<E>(
757 795 &self,
758 796 init: impl Fn() -> Result<T, E>,
759 797 ) -> Result<Ref<T>, E> {
760 798 let mut borrowed = self.value.borrow();
761 799 if borrowed.is_none() {
762 800 drop(borrowed);
763 801 // Only use `borrow_mut` if it is really needed to avoid panic in
764 802 // case there is another outstanding borrow but mutation is not
765 803 // needed.
766 804 *self.value.borrow_mut() = Some(init()?);
767 805 borrowed = self.value.borrow()
768 806 }
769 807 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
770 808 }
771 809
772 810 fn get_mut_or_init<E>(
773 811 &self,
774 812 init: impl Fn() -> Result<T, E>,
775 813 ) -> Result<RefMut<T>, E> {
776 814 let mut borrowed = self.value.borrow_mut();
777 815 if borrowed.is_none() {
778 816 *borrowed = Some(init()?);
779 817 }
780 818 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
781 819 }
782 820 }
@@ -1,173 +1,183 b''
1 1 use crate::errors::{HgError, HgResultExt};
2 2 use crate::repo::Repo;
3 3 use crate::utils::join_display;
4 4 use crate::vfs::Vfs;
5 5 use std::collections::HashSet;
6 6
7 7 fn parse(bytes: &[u8]) -> Result<HashSet<String>, HgError> {
8 8 // The Python code reading this file uses `str.splitlines`
9 9 // which looks for a number of line separators (even including a couple of
10 10 // non-ASCII ones), but Python code writing it always uses `\n`.
11 11 let lines = bytes.split(|&byte| byte == b'\n');
12 12
13 13 lines
14 14 .filter(|line| !line.is_empty())
15 15 .map(|line| {
16 16 // Python uses Unicode `str.isalnum` but feature names are all
17 17 // ASCII
18 18 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
19 19 Ok(String::from_utf8(line.into()).unwrap())
20 20 } else {
21 21 Err(HgError::corrupted("parse error in 'requires' file"))
22 22 }
23 23 })
24 24 .collect()
25 25 }
26 26
27 27 pub(crate) fn load(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
28 28 parse(&hg_vfs.read("requires")?)
29 29 }
30 30
31 31 pub(crate) fn load_if_exists(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
32 32 if let Some(bytes) = hg_vfs.read("requires").io_not_found_as_none()? {
33 33 parse(&bytes)
34 34 } else {
35 35 // Treat a missing file the same as an empty file.
36 36 // From `mercurial/localrepo.py`:
37 37 // > requires file contains a newline-delimited list of
38 38 // > features/capabilities the opener (us) must have in order to use
39 39 // > the repository. This file was introduced in Mercurial 0.9.2,
40 40 // > which means very old repositories may not have one. We assume
41 41 // > a missing file translates to no requirements.
42 42 Ok(HashSet::new())
43 43 }
44 44 }
45 45
46 46 pub(crate) fn check(repo: &Repo) -> Result<(), HgError> {
47 47 let unknown: Vec<_> = repo
48 48 .requirements()
49 49 .iter()
50 50 .map(String::as_str)
51 51 // .filter(|feature| !ALL_SUPPORTED.contains(feature.as_str()))
52 52 .filter(|feature| {
53 53 !REQUIRED.contains(feature) && !SUPPORTED.contains(feature)
54 54 })
55 55 .collect();
56 56 if !unknown.is_empty() {
57 57 return Err(HgError::unsupported(format!(
58 58 "repository requires feature unknown to this Mercurial: {}",
59 59 join_display(&unknown, ", ")
60 60 )));
61 61 }
62 62 let missing: Vec<_> = REQUIRED
63 63 .iter()
64 64 .filter(|&&feature| !repo.requirements().contains(feature))
65 65 .collect();
66 66 if !missing.is_empty() {
67 67 return Err(HgError::unsupported(format!(
68 68 "repository is missing feature required by this Mercurial: {}",
69 69 join_display(&missing, ", ")
70 70 )));
71 71 }
72 72 Ok(())
73 73 }
74 74
75 75 /// rhg does not support repositories that are *missing* any of these features
76 76 const REQUIRED: &[&str] = &["revlogv1", "store", "fncache", "dotencode"];
77 77
78 78 /// rhg supports repository with or without these
79 79 const SUPPORTED: &[&str] = &[
80 "generaldelta",
80 GENERALDELTA_REQUIREMENT,
81 81 SHARED_REQUIREMENT,
82 82 SHARESAFE_REQUIREMENT,
83 83 SPARSEREVLOG_REQUIREMENT,
84 84 RELATIVE_SHARED_REQUIREMENT,
85 85 REVLOG_COMPRESSION_ZSTD,
86 86 DIRSTATE_V2_REQUIREMENT,
87 87 DIRSTATE_TRACKED_HINT_V1,
88 88 // As of this writing everything rhg does is read-only.
89 89 // When it starts writing to the repository, it’ll need to either keep the
90 90 // persistent nodemap up to date or remove this entry:
91 91 NODEMAP_REQUIREMENT,
92 92 // Not all commands support `sparse` and `narrow`. The commands that do
93 93 // not should opt out by checking `has_sparse` and `has_narrow`.
94 94 SPARSE_REQUIREMENT,
95 95 NARROW_REQUIREMENT,
96 96 // rhg doesn't care about bookmarks at all yet
97 97 BOOKMARKS_IN_STORE_REQUIREMENT,
98 98 ];
99 99
100 100 // Copied from mercurial/requirements.py:
101 101
102 102 pub const DIRSTATE_V2_REQUIREMENT: &str = "dirstate-v2";
103 pub const GENERALDELTA_REQUIREMENT: &str = "generaldelta";
103 104
104 105 /// A repository that uses the tracked hint dirstate file
105 106 #[allow(unused)]
106 107 pub const DIRSTATE_TRACKED_HINT_V1: &str = "dirstate-tracked-key-v1";
107 108
108 109 /// When narrowing is finalized and no longer subject to format changes,
109 110 /// we should move this to just "narrow" or similar.
110 111 #[allow(unused)]
111 112 pub const NARROW_REQUIREMENT: &str = "narrowhg-experimental";
112 113
113 114 /// Bookmarks must be stored in the `store` part of the repository and will be
114 115 /// share accross shares
115 116 #[allow(unused)]
116 117 pub const BOOKMARKS_IN_STORE_REQUIREMENT: &str = "bookmarksinstore";
117 118
118 119 /// Enables sparse working directory usage
119 120 #[allow(unused)]
120 121 pub const SPARSE_REQUIREMENT: &str = "exp-sparse";
121 122
122 123 /// Enables the internal phase which is used to hide changesets instead
123 124 /// of stripping them
124 125 #[allow(unused)]
125 126 pub const INTERNAL_PHASE_REQUIREMENT: &str = "internal-phase";
126 127
127 128 /// Stores manifest in Tree structure
128 129 #[allow(unused)]
129 130 pub const TREEMANIFEST_REQUIREMENT: &str = "treemanifest";
130 131
132 /// Whether to use the "RevlogNG" or V1 of the revlog format
133 #[allow(unused)]
134 pub const REVLOGV1_REQUIREMENT: &str = "revlogv1";
135
131 136 /// Increment the sub-version when the revlog v2 format changes to lock out old
132 137 /// clients.
133 138 #[allow(unused)]
134 139 pub const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1";
135 140
141 /// Increment the sub-version when the revlog v2 format changes to lock out old
142 /// clients.
143 #[allow(unused)]
144 pub const CHANGELOGV2_REQUIREMENT: &str = "exp-changelog-v2";
145
136 146 /// A repository with the sparserevlog feature will have delta chains that
137 147 /// can spread over a larger span. Sparse reading cuts these large spans into
138 148 /// pieces, so that each piece isn't too big.
139 149 /// Without the sparserevlog capability, reading from the repository could use
140 150 /// huge amounts of memory, because the whole span would be read at once,
141 151 /// including all the intermediate revisions that aren't pertinent for the
142 152 /// chain. This is why once a repository has enabled sparse-read, it becomes
143 153 /// required.
144 154 #[allow(unused)]
145 155 pub const SPARSEREVLOG_REQUIREMENT: &str = "sparserevlog";
146 156
147 157 /// A repository with the the copies-sidedata-changeset requirement will store
148 158 /// copies related information in changeset's sidedata.
149 159 #[allow(unused)]
150 160 pub const COPIESSDC_REQUIREMENT: &str = "exp-copies-sidedata-changeset";
151 161
152 162 /// The repository use persistent nodemap for the changelog and the manifest.
153 163 #[allow(unused)]
154 164 pub const NODEMAP_REQUIREMENT: &str = "persistent-nodemap";
155 165
156 166 /// Denotes that the current repository is a share
157 167 #[allow(unused)]
158 168 pub const SHARED_REQUIREMENT: &str = "shared";
159 169
160 170 /// Denotes that current repository is a share and the shared source path is
161 171 /// relative to the current repository root path
162 172 #[allow(unused)]
163 173 pub const RELATIVE_SHARED_REQUIREMENT: &str = "relshared";
164 174
165 175 /// A repository with share implemented safely. The repository has different
166 176 /// store and working copy requirements i.e. both `.hg/requires` and
167 177 /// `.hg/store/requires` are present.
168 178 #[allow(unused)]
169 179 pub const SHARESAFE_REQUIREMENT: &str = "share-safe";
170 180
171 181 /// A repository that use zstd compression inside its revlog
172 182 #[allow(unused)]
173 183 pub const REVLOG_COMPRESSION_ZSTD: &str = "revlog-compression-zstd";
@@ -1,359 +1,363 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::Revision;
3 3 use crate::revlog::{Node, NodePrefix};
4 4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 5 use crate::utils::hg_path::HgPath;
6 6 use crate::vfs::Vfs;
7 use crate::{Graph, GraphError, UncheckedRevision};
7 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
8 8 use itertools::Itertools;
9 9 use std::ascii::escape_default;
10 10 use std::borrow::Cow;
11 11 use std::fmt::{Debug, Formatter};
12 12
13 13 /// A specialized `Revlog` to work with changelog data format.
14 14 pub struct Changelog {
15 15 /// The generic `revlog` format.
16 16 pub(crate) revlog: Revlog,
17 17 }
18 18
19 19 impl Changelog {
20 20 /// Open the `changelog` of a repository given by its root.
21 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
22 let revlog =
23 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
21 pub fn open(
22 store_vfs: &Vfs,
23 options: RevlogOpenOptions,
24 ) -> Result<Self, HgError> {
25 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
24 26 Ok(Self { revlog })
25 27 }
26 28
27 29 /// Return the `ChangelogRevisionData` for the given node ID.
28 30 pub fn data_for_node(
29 31 &self,
30 32 node: NodePrefix,
31 33 ) -> Result<ChangelogRevisionData, RevlogError> {
32 34 let rev = self.revlog.rev_from_node(node)?;
33 35 self.entry_for_checked_rev(rev)?.data()
34 36 }
35 37
36 38 /// Return the [`ChangelogEntry`] for the given revision number.
37 39 pub fn entry_for_rev(
38 40 &self,
39 41 rev: UncheckedRevision,
40 42 ) -> Result<ChangelogEntry, RevlogError> {
41 43 let revlog_entry = self.revlog.get_entry(rev)?;
42 44 Ok(ChangelogEntry { revlog_entry })
43 45 }
44 46
45 47 /// Same as [`Self::entry_for_rev`] for checked revisions.
46 48 fn entry_for_checked_rev(
47 49 &self,
48 50 rev: Revision,
49 51 ) -> Result<ChangelogEntry, RevlogError> {
50 52 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
51 53 Ok(ChangelogEntry { revlog_entry })
52 54 }
53 55
54 56 /// Return the [`ChangelogRevisionData`] for the given revision number.
55 57 ///
56 58 /// This is a useful shortcut in case the caller does not need the
57 59 /// generic revlog information (parents, hashes etc). Otherwise
58 60 /// consider taking a [`ChangelogEntry`] with
59 61 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
60 62 pub fn data_for_rev(
61 63 &self,
62 64 rev: UncheckedRevision,
63 65 ) -> Result<ChangelogRevisionData, RevlogError> {
64 66 self.entry_for_rev(rev)?.data()
65 67 }
66 68
67 69 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
68 70 self.revlog.node_from_rev(rev)
69 71 }
70 72
71 73 pub fn rev_from_node(
72 74 &self,
73 75 node: NodePrefix,
74 76 ) -> Result<Revision, RevlogError> {
75 77 self.revlog.rev_from_node(node)
76 78 }
77 79 }
78 80
79 81 impl Graph for Changelog {
80 82 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
81 83 self.revlog.parents(rev)
82 84 }
83 85 }
84 86
85 87 /// A specialized `RevlogEntry` for `changelog` data format
86 88 ///
87 89 /// This is a `RevlogEntry` with the added semantics that the associated
88 90 /// data should meet the requirements for `changelog`, materialized by
89 91 /// the fact that `data()` constructs a `ChangelogRevisionData`.
90 92 /// In case that promise would be broken, the `data` method returns an error.
91 93 #[derive(Clone)]
92 94 pub struct ChangelogEntry<'changelog> {
93 95 /// Same data, as a generic `RevlogEntry`.
94 96 pub(crate) revlog_entry: RevlogEntry<'changelog>,
95 97 }
96 98
97 99 impl<'changelog> ChangelogEntry<'changelog> {
98 100 pub fn data<'a>(
99 101 &'a self,
100 102 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
101 103 let bytes = self.revlog_entry.data()?;
102 104 if bytes.is_empty() {
103 105 Ok(ChangelogRevisionData::null())
104 106 } else {
105 107 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
106 108 RevlogError::Other(HgError::CorruptedRepository(format!(
107 109 "Invalid changelog data for revision {}: {:?}",
108 110 self.revlog_entry.revision(),
109 111 err
110 112 )))
111 113 })?)
112 114 }
113 115 }
114 116
115 117 /// Obtain a reference to the underlying `RevlogEntry`.
116 118 ///
117 119 /// This allows the caller to access the information that is common
118 120 /// to all revlog entries: revision number, node id, parent revisions etc.
119 121 pub fn as_revlog_entry(&self) -> &RevlogEntry {
120 122 &self.revlog_entry
121 123 }
122 124
123 125 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
124 126 Ok(self
125 127 .revlog_entry
126 128 .p1_entry()?
127 129 .map(|revlog_entry| Self { revlog_entry }))
128 130 }
129 131
130 132 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 133 Ok(self
132 134 .revlog_entry
133 135 .p2_entry()?
134 136 .map(|revlog_entry| Self { revlog_entry }))
135 137 }
136 138 }
137 139
138 140 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
139 141 #[derive(PartialEq)]
140 142 pub struct ChangelogRevisionData<'changelog> {
141 143 /// The data bytes of the `changelog` entry.
142 144 bytes: Cow<'changelog, [u8]>,
143 145 /// The end offset for the hex manifest (not including the newline)
144 146 manifest_end: usize,
145 147 /// The end offset for the user+email (not including the newline)
146 148 user_end: usize,
147 149 /// The end offset for the timestamp+timezone+extras (not including the
148 150 /// newline)
149 151 timestamp_end: usize,
150 152 /// The end offset for the file list (not including the newline)
151 153 files_end: usize,
152 154 }
153 155
154 156 impl<'changelog> ChangelogRevisionData<'changelog> {
155 157 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
156 158 let mut line_iter = bytes.split(|b| b == &b'\n');
157 159 let manifest_end = line_iter
158 160 .next()
159 161 .expect("Empty iterator from split()?")
160 162 .len();
161 163 let user_slice = line_iter.next().ok_or_else(|| {
162 164 HgError::corrupted("Changeset data truncated after manifest line")
163 165 })?;
164 166 let user_end = manifest_end + 1 + user_slice.len();
165 167 let timestamp_slice = line_iter.next().ok_or_else(|| {
166 168 HgError::corrupted("Changeset data truncated after user line")
167 169 })?;
168 170 let timestamp_end = user_end + 1 + timestamp_slice.len();
169 171 let mut files_end = timestamp_end + 1;
170 172 loop {
171 173 let line = line_iter.next().ok_or_else(|| {
172 174 HgError::corrupted("Changeset data truncated in files list")
173 175 })?;
174 176 if line.is_empty() {
175 177 if files_end == bytes.len() {
176 178 // The list of files ended with a single newline (there
177 179 // should be two)
178 180 return Err(HgError::corrupted(
179 181 "Changeset data truncated after files list",
180 182 ));
181 183 }
182 184 files_end -= 1;
183 185 break;
184 186 }
185 187 files_end += line.len() + 1;
186 188 }
187 189
188 190 Ok(Self {
189 191 bytes,
190 192 manifest_end,
191 193 user_end,
192 194 timestamp_end,
193 195 files_end,
194 196 })
195 197 }
196 198
197 199 fn null() -> Self {
198 200 Self::new(Cow::Borrowed(
199 201 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
200 202 ))
201 203 .unwrap()
202 204 }
203 205
204 206 /// Return an iterator over the lines of the entry.
205 207 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
206 208 self.bytes.split(|b| b == &b'\n')
207 209 }
208 210
209 211 /// Return the node id of the `manifest` referenced by this `changelog`
210 212 /// entry.
211 213 pub fn manifest_node(&self) -> Result<Node, HgError> {
212 214 let manifest_node_hex = &self.bytes[..self.manifest_end];
213 215 Node::from_hex_for_repo(manifest_node_hex)
214 216 }
215 217
216 218 /// The full user string (usually a name followed by an email enclosed in
217 219 /// angle brackets)
218 220 pub fn user(&self) -> &[u8] {
219 221 &self.bytes[self.manifest_end + 1..self.user_end]
220 222 }
221 223
222 224 /// The full timestamp line (timestamp in seconds, offset in seconds, and
223 225 /// possibly extras)
224 226 // TODO: We should expose this in a more useful way
225 227 pub fn timestamp_line(&self) -> &[u8] {
226 228 &self.bytes[self.user_end + 1..self.timestamp_end]
227 229 }
228 230
229 231 /// The files changed in this revision.
230 232 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
231 233 self.bytes[self.timestamp_end + 1..self.files_end]
232 234 .split(|b| b == &b'\n')
233 235 .map(HgPath::new)
234 236 }
235 237
236 238 /// The change description.
237 239 pub fn description(&self) -> &[u8] {
238 240 &self.bytes[self.files_end + 2..]
239 241 }
240 242 }
241 243
242 244 impl Debug for ChangelogRevisionData<'_> {
243 245 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
244 246 f.debug_struct("ChangelogRevisionData")
245 247 .field("bytes", &debug_bytes(&self.bytes))
246 248 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
247 249 .field(
248 250 "user",
249 251 &debug_bytes(
250 252 &self.bytes[self.manifest_end + 1..self.user_end],
251 253 ),
252 254 )
253 255 .field(
254 256 "timestamp",
255 257 &debug_bytes(
256 258 &self.bytes[self.user_end + 1..self.timestamp_end],
257 259 ),
258 260 )
259 261 .field(
260 262 "files",
261 263 &debug_bytes(
262 264 &self.bytes[self.timestamp_end + 1..self.files_end],
263 265 ),
264 266 )
265 267 .field(
266 268 "description",
267 269 &debug_bytes(&self.bytes[self.files_end + 2..]),
268 270 )
269 271 .finish()
270 272 }
271 273 }
272 274
273 275 fn debug_bytes(bytes: &[u8]) -> String {
274 276 String::from_utf8_lossy(
275 277 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
276 278 )
277 279 .to_string()
278 280 }
279 281
280 282 #[cfg(test)]
281 283 mod tests {
282 284 use super::*;
283 285 use crate::vfs::Vfs;
284 286 use crate::NULL_REVISION;
285 287 use pretty_assertions::assert_eq;
286 288
287 289 #[test]
288 290 fn test_create_changelogrevisiondata_invalid() {
289 291 // Completely empty
290 292 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
291 293 // No newline after manifest
292 294 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
293 295 // No newline after user
294 296 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
295 297 // No newline after timestamp
296 298 assert!(
297 299 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
298 300 );
299 301 // Missing newline after files
300 302 assert!(ChangelogRevisionData::new(Cow::Borrowed(
301 303 b"abcd\n\n0 0\nfile1\nfile2"
302 304 ))
303 305 .is_err(),);
304 306 // Only one newline after files
305 307 assert!(ChangelogRevisionData::new(Cow::Borrowed(
306 308 b"abcd\n\n0 0\nfile1\nfile2\n"
307 309 ))
308 310 .is_err(),);
309 311 }
310 312
311 313 #[test]
312 314 fn test_create_changelogrevisiondata() {
313 315 let data = ChangelogRevisionData::new(Cow::Borrowed(
314 316 b"0123456789abcdef0123456789abcdef01234567
315 317 Some One <someone@example.com>
316 318 0 0
317 319 file1
318 320 file2
319 321
320 322 some
321 323 commit
322 324 message",
323 325 ))
324 326 .unwrap();
325 327 assert_eq!(
326 328 data.manifest_node().unwrap(),
327 329 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
328 330 .unwrap()
329 331 );
330 332 assert_eq!(data.user(), b"Some One <someone@example.com>");
331 333 assert_eq!(data.timestamp_line(), b"0 0");
332 334 assert_eq!(
333 335 data.files().collect_vec(),
334 336 vec![HgPath::new("file1"), HgPath::new("file2")]
335 337 );
336 338 assert_eq!(data.description(), b"some\ncommit\nmessage");
337 339 }
338 340
339 341 #[test]
340 342 fn test_data_from_rev_null() -> Result<(), RevlogError> {
341 343 // an empty revlog will be enough for this case
342 344 let temp = tempfile::tempdir().unwrap();
343 345 let vfs = Vfs { base: temp.path() };
344 346 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
345 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
347 let revlog =
348 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
349 .unwrap();
346 350
347 351 let changelog = Changelog { revlog };
348 352 assert_eq!(
349 353 changelog.data_for_rev(NULL_REVISION.into())?,
350 354 ChangelogRevisionData::null()
351 355 );
352 356 // same with the intermediate entry object
353 357 assert_eq!(
354 358 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
355 359 ChangelogRevisionData::null()
356 360 );
357 361 Ok(())
358 362 }
359 363 }
@@ -1,239 +1,245 b''
1 1 use crate::errors::HgError;
2 2 use crate::exit_codes;
3 3 use crate::repo::Repo;
4 4 use crate::revlog::path_encode::path_encode;
5 5 use crate::revlog::NodePrefix;
6 6 use crate::revlog::Revision;
7 7 use crate::revlog::RevlogEntry;
8 8 use crate::revlog::{Revlog, RevlogError};
9 9 use crate::utils::files::get_path_from_bytes;
10 10 use crate::utils::hg_path::HgPath;
11 11 use crate::utils::SliceExt;
12 12 use crate::Graph;
13 13 use crate::GraphError;
14 use crate::RevlogOpenOptions;
14 15 use crate::UncheckedRevision;
15 16 use std::path::PathBuf;
16 17
17 18 /// A specialized `Revlog` to work with file data logs.
18 19 pub struct Filelog {
19 20 /// The generic `revlog` format.
20 21 revlog: Revlog,
21 22 }
22 23
23 24 impl Graph for Filelog {
24 25 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
25 26 self.revlog.parents(rev)
26 27 }
27 28 }
28 29
29 30 impl Filelog {
30 31 pub fn open_vfs(
31 32 store_vfs: &crate::vfs::Vfs<'_>,
32 33 file_path: &HgPath,
34 options: RevlogOpenOptions,
33 35 ) -> Result<Self, HgError> {
34 36 let index_path = store_path(file_path, b".i");
35 37 let data_path = store_path(file_path, b".d");
36 38 let revlog =
37 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
39 Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
38 40 Ok(Self { revlog })
39 41 }
40 42
41 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
42 Self::open_vfs(&repo.store_vfs(), file_path)
43 pub fn open(
44 repo: &Repo,
45 file_path: &HgPath,
46 options: RevlogOpenOptions,
47 ) -> Result<Self, HgError> {
48 Self::open_vfs(&repo.store_vfs(), file_path, options)
43 49 }
44 50
45 51 /// The given node ID is that of the file as found in a filelog, not of a
46 52 /// changeset.
47 53 pub fn data_for_node(
48 54 &self,
49 55 file_node: impl Into<NodePrefix>,
50 56 ) -> Result<FilelogRevisionData, RevlogError> {
51 57 let file_rev = self.revlog.rev_from_node(file_node.into())?;
52 58 self.data_for_rev(file_rev.into())
53 59 }
54 60
55 61 /// The given revision is that of the file as found in a filelog, not of a
56 62 /// changeset.
57 63 pub fn data_for_rev(
58 64 &self,
59 65 file_rev: UncheckedRevision,
60 66 ) -> Result<FilelogRevisionData, RevlogError> {
61 67 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
62 68 Ok(FilelogRevisionData(data))
63 69 }
64 70
65 71 /// The given node ID is that of the file as found in a filelog, not of a
66 72 /// changeset.
67 73 pub fn entry_for_node(
68 74 &self,
69 75 file_node: impl Into<NodePrefix>,
70 76 ) -> Result<FilelogEntry, RevlogError> {
71 77 let file_rev = self.revlog.rev_from_node(file_node.into())?;
72 78 self.entry_for_checked_rev(file_rev)
73 79 }
74 80
75 81 /// The given revision is that of the file as found in a filelog, not of a
76 82 /// changeset.
77 83 pub fn entry_for_rev(
78 84 &self,
79 85 file_rev: UncheckedRevision,
80 86 ) -> Result<FilelogEntry, RevlogError> {
81 87 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
82 88 }
83 89
84 90 fn entry_for_checked_rev(
85 91 &self,
86 92 file_rev: Revision,
87 93 ) -> Result<FilelogEntry, RevlogError> {
88 94 Ok(FilelogEntry(
89 95 self.revlog.get_entry_for_checked_rev(file_rev)?,
90 96 ))
91 97 }
92 98 }
93 99
94 100 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
95 101 let encoded_bytes =
96 102 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
97 103 get_path_from_bytes(&encoded_bytes).into()
98 104 }
99 105
100 106 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
101 107
102 108 impl FilelogEntry<'_> {
103 109 /// `self.data()` can be expensive, with decompression and delta
104 110 /// resolution.
105 111 ///
106 112 /// *Without* paying this cost, based on revlog index information
107 113 /// including `RevlogEntry::uncompressed_len`:
108 114 ///
109 115 /// * Returns `true` if the length that `self.data().file_data().len()`
110 116 /// would return is definitely **not equal** to `other_len`.
111 117 /// * Returns `false` if available information is inconclusive.
112 118 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
113 119 // Relevant code that implement this behavior in Python code:
114 120 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
115 121 // revlog.size, revlog.rawsize
116 122
117 123 // Let’s call `file_data_len` what would be returned by
118 124 // `self.data().file_data().len()`.
119 125
120 126 if self.0.is_censored() {
121 127 let file_data_len = 0;
122 128 return other_len != file_data_len;
123 129 }
124 130
125 131 if self.0.has_length_affecting_flag_processor() {
126 132 // We can’t conclude anything about `file_data_len`.
127 133 return false;
128 134 }
129 135
130 136 // Revlog revisions (usually) have metadata for the size of
131 137 // their data after decompression and delta resolution
132 138 // as would be returned by `Revlog::get_rev_data`.
133 139 //
134 140 // For filelogs this is the file’s contents preceded by an optional
135 141 // metadata block.
136 142 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
137 143 l as u64
138 144 } else {
139 145 // The field was set to -1, the actual uncompressed len is unknown.
140 146 // We need to decompress to say more.
141 147 return false;
142 148 };
143 149 // `uncompressed_len = file_data_len + optional_metadata_len`,
144 150 // so `file_data_len <= uncompressed_len`.
145 151 if uncompressed_len < other_len {
146 152 // Transitively, `file_data_len < other_len`.
147 153 // So `other_len != file_data_len` definitely.
148 154 return true;
149 155 }
150 156
151 157 if uncompressed_len == other_len + 4 {
152 158 // It’s possible that `file_data_len == other_len` with an empty
153 159 // metadata block (2 start marker bytes + 2 end marker bytes).
154 160 // This happens when there wouldn’t otherwise be metadata, but
155 161 // the first 2 bytes of file data happen to match a start marker
156 162 // and would be ambiguous.
157 163 return false;
158 164 }
159 165
160 166 if !self.0.has_p1() {
161 167 // There may or may not be copy metadata, so we can’t deduce more
162 168 // about `file_data_len` without computing file data.
163 169 return false;
164 170 }
165 171
166 172 // Filelog ancestry is not meaningful in the way changelog ancestry is.
167 173 // It only provides hints to delta generation.
168 174 // p1 and p2 are set to null when making a copy or rename since
169 175 // contents are likely unrelatedto what might have previously existed
170 176 // at the destination path.
171 177 //
172 178 // Conversely, since here p1 is non-null, there is no copy metadata.
173 179 // Note that this reasoning may be invalidated in the presence of
174 180 // merges made by some previous versions of Mercurial that
175 181 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
176 182 // and `tests/test-issue6528.t`.
177 183 //
178 184 // Since copy metadata is currently the only kind of metadata
179 185 // kept in revlog data of filelogs,
180 186 // this `FilelogEntry` does not have such metadata:
181 187 let file_data_len = uncompressed_len;
182 188
183 189 file_data_len != other_len
184 190 }
185 191
186 192 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
187 193 let data = self.0.data();
188 194 match data {
189 195 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
190 196 // Errors other than `HgError` should not happen at this point
191 197 Err(e) => match e {
192 198 RevlogError::Other(hg_error) => Err(hg_error),
193 199 revlog_error => Err(HgError::abort(
194 200 revlog_error.to_string(),
195 201 exit_codes::ABORT,
196 202 None,
197 203 )),
198 204 },
199 205 }
200 206 }
201 207 }
202 208
203 209 /// The data for one revision in a filelog, uncompressed and delta-resolved.
204 210 pub struct FilelogRevisionData(Vec<u8>);
205 211
206 212 impl FilelogRevisionData {
207 213 /// Split into metadata and data
208 214 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
209 215 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
210 216
211 217 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
212 218 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
213 219 Ok((Some(metadata), data))
214 220 } else {
215 221 Err(HgError::corrupted(
216 222 "Missing metadata end delimiter in filelog entry",
217 223 ))
218 224 }
219 225 } else {
220 226 Ok((None, &self.0))
221 227 }
222 228 }
223 229
224 230 /// Returns the file contents at this revision, stripped of any metadata
225 231 pub fn file_data(&self) -> Result<&[u8], HgError> {
226 232 let (_metadata, data) = self.split()?;
227 233 Ok(data)
228 234 }
229 235
230 236 /// Consume the entry, and convert it into data, discarding any metadata,
231 237 /// if present.
232 238 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
233 239 if let (Some(_metadata), data) = self.split()? {
234 240 Ok(data.to_owned())
235 241 } else {
236 242 Ok(self.0)
237 243 }
238 244 }
239 245 }
@@ -1,775 +1,770 b''
1 1 use std::fmt::Debug;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 5 use bytes_cast::{unaligned, BytesCast};
6 6
7 7 use super::REVIDX_KNOWN_FLAGS;
8 8 use crate::errors::HgError;
9 9 use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES};
10 10 use crate::revlog::node::Node;
11 11 use crate::revlog::{Revision, NULL_REVISION};
12 12 use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision};
13 13
14 14 pub const INDEX_ENTRY_SIZE: usize = 64;
15 15 pub const COMPRESSION_MODE_INLINE: u8 = 2;
16 16
17 17 pub struct IndexHeader {
18 header_bytes: [u8; 4],
18 pub(super) header_bytes: [u8; 4],
19 19 }
20 20
21 21 #[derive(Copy, Clone)]
22 22 pub struct IndexHeaderFlags {
23 23 flags: u16,
24 24 }
25 25
26 26 /// Corresponds to the high bits of `_format_flags` in python
27 27 impl IndexHeaderFlags {
28 28 /// Corresponds to FLAG_INLINE_DATA in python
29 29 pub fn is_inline(self) -> bool {
30 30 self.flags & 1 != 0
31 31 }
32 32 /// Corresponds to FLAG_GENERALDELTA in python
33 33 pub fn uses_generaldelta(self) -> bool {
34 34 self.flags & 2 != 0
35 35 }
36 36 }
37 37
38 38 /// Corresponds to the INDEX_HEADER structure,
39 39 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
40 40 impl IndexHeader {
41 41 fn format_flags(&self) -> IndexHeaderFlags {
42 42 // No "unknown flags" check here, unlike in python. Maybe there should
43 43 // be.
44 44 IndexHeaderFlags {
45 45 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
46 46 }
47 47 }
48 48
49 49 /// The only revlog version currently supported by rhg.
50 50 const REVLOGV1: u16 = 1;
51 51
52 52 /// Corresponds to `_format_version` in Python.
53 53 fn format_version(&self) -> u16 {
54 54 BigEndian::read_u16(&self.header_bytes[2..4])
55 55 }
56 56
57 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
58 // We treat an empty file as a valid index with no entries.
59 // Here we make an arbitrary choice of what we assume the format of the
60 // index to be (V1, using generaldelta).
61 // This doesn't matter too much, since we're only doing read-only
62 // access. but the value corresponds to the `new_header` variable in
63 // `revlog.py`, `_loadindex`
64 header_bytes: [0, 3, 0, 1],
65 };
66
67 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
57 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
68 58 if index_bytes.is_empty() {
69 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
59 return Ok(None);
70 60 }
71 61 if index_bytes.len() < 4 {
72 62 return Err(HgError::corrupted(
73 63 "corrupted revlog: can't read the index format header",
74 64 ));
75 65 }
76 Ok(IndexHeader {
66 Ok(Some(IndexHeader {
77 67 header_bytes: {
78 68 let bytes: [u8; 4] =
79 69 index_bytes[0..4].try_into().expect("impossible");
80 70 bytes
81 71 },
82 })
72 }))
83 73 }
84 74 }
85 75
86 76 /// Abstracts the access to the index bytes since they can be spread between
87 77 /// the immutable (bytes) part and the mutable (added) part if any appends
88 78 /// happened. This makes it transparent for the callers.
89 79 struct IndexData {
90 80 /// Immutable bytes, most likely taken from disk
91 81 bytes: Box<dyn Deref<Target = [u8]> + Send>,
92 82 /// Bytes that were added after reading the index
93 83 added: Vec<u8>,
94 84 }
95 85
96 86 impl IndexData {
97 87 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
98 88 Self {
99 89 bytes,
100 90 added: vec![],
101 91 }
102 92 }
103 93
104 94 pub fn len(&self) -> usize {
105 95 self.bytes.len() + self.added.len()
106 96 }
107 97 }
108 98
109 99 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
110 100 type Output = [u8];
111 101
112 102 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
113 103 let start = index.start;
114 104 let end = index.end;
115 105 let immutable_len = self.bytes.len();
116 106 if start < immutable_len {
117 107 if end > immutable_len {
118 108 panic!("index data cannot span existing and added ranges");
119 109 }
120 110 &self.bytes[index]
121 111 } else {
122 112 &self.added[start - immutable_len..end - immutable_len]
123 113 }
124 114 }
125 115 }
126 116
127 117 pub struct RevisionDataParams {
128 118 flags: u16,
129 119 data_offset: u64,
130 120 data_compressed_length: i32,
131 121 data_uncompressed_length: i32,
132 122 data_delta_base: i32,
133 123 link_rev: i32,
134 124 parent_rev_1: i32,
135 125 parent_rev_2: i32,
136 126 node_id: [u8; NODE_BYTES_LENGTH],
137 127 _sidedata_offset: u64,
138 128 _sidedata_compressed_length: i32,
139 129 data_compression_mode: u8,
140 130 _sidedata_compression_mode: u8,
141 131 _rank: i32,
142 132 }
143 133
144 134 #[derive(BytesCast)]
145 135 #[repr(C)]
146 136 pub struct RevisionDataV1 {
147 137 data_offset_or_flags: unaligned::U64Be,
148 138 data_compressed_length: unaligned::I32Be,
149 139 data_uncompressed_length: unaligned::I32Be,
150 140 data_delta_base: unaligned::I32Be,
151 141 link_rev: unaligned::I32Be,
152 142 parent_rev_1: unaligned::I32Be,
153 143 parent_rev_2: unaligned::I32Be,
154 144 node_id: [u8; STORED_NODE_ID_BYTES],
155 145 }
156 146
157 147 fn _static_assert_size_of_revision_data_v1() {
158 148 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
159 149 }
160 150
161 151 impl RevisionDataParams {
162 152 pub fn validate(&self) -> Result<(), RevlogError> {
163 153 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
164 154 return Err(RevlogError::corrupted(format!(
165 155 "unknown revlog index flags: {}",
166 156 self.flags
167 157 )));
168 158 }
169 159 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
170 160 return Err(RevlogError::corrupted(format!(
171 161 "invalid data compression mode: {}",
172 162 self.data_compression_mode
173 163 )));
174 164 }
175 165 // FIXME isn't this only for v2 or changelog v2?
176 166 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
177 167 return Err(RevlogError::corrupted(format!(
178 168 "invalid sidedata compression mode: {}",
179 169 self._sidedata_compression_mode
180 170 )));
181 171 }
182 172 Ok(())
183 173 }
184 174
185 175 pub fn into_v1(self) -> RevisionDataV1 {
186 176 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
187 177 let mut node_id = [0; STORED_NODE_ID_BYTES];
188 178 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
189 179 RevisionDataV1 {
190 180 data_offset_or_flags: data_offset_or_flags.into(),
191 181 data_compressed_length: self.data_compressed_length.into(),
192 182 data_uncompressed_length: self.data_uncompressed_length.into(),
193 183 data_delta_base: self.data_delta_base.into(),
194 184 link_rev: self.link_rev.into(),
195 185 parent_rev_1: self.parent_rev_1.into(),
196 186 parent_rev_2: self.parent_rev_2.into(),
197 187 node_id,
198 188 }
199 189 }
200 190 }
201 191
202 192 /// A Revlog index
203 193 pub struct Index {
204 194 bytes: IndexData,
205 195 /// Offsets of starts of index blocks.
206 196 /// Only needed when the index is interleaved with data.
207 197 offsets: Option<Vec<usize>>,
208 198 uses_generaldelta: bool,
209 199 }
210 200
211 201 impl Debug for Index {
212 202 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213 203 f.debug_struct("Index")
214 204 .field("offsets", &self.offsets)
215 205 .field("uses_generaldelta", &self.uses_generaldelta)
216 206 .finish()
217 207 }
218 208 }
219 209
220 210 impl Graph for Index {
221 211 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
222 212 let err = || GraphError::ParentOutOfRange(rev);
223 213 match self.get_entry(rev) {
224 214 Some(entry) => {
225 215 // The C implementation checks that the parents are valid
226 216 // before returning
227 217 Ok([
228 218 self.check_revision(entry.p1()).ok_or_else(err)?,
229 219 self.check_revision(entry.p2()).ok_or_else(err)?,
230 220 ])
231 221 }
232 222 None => Ok([NULL_REVISION, NULL_REVISION]),
233 223 }
234 224 }
235 225 }
236 226
237 227 impl Index {
238 228 /// Create an index from bytes.
239 229 /// Calculate the start of each entry when is_inline is true.
240 230 pub fn new(
241 231 bytes: Box<dyn Deref<Target = [u8]> + Send>,
232 default_header: IndexHeader,
242 233 ) -> Result<Self, HgError> {
243 let header = IndexHeader::parse(bytes.as_ref())?;
234 let header =
235 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
244 236
245 237 if header.format_version() != IndexHeader::REVLOGV1 {
246 238 // A proper new version should have had a repo/store
247 239 // requirement.
248 240 return Err(HgError::corrupted("unsupported revlog version"));
249 241 }
250 242
251 243 // This is only correct because we know version is REVLOGV1.
252 244 // In v2 we always use generaldelta, while in v0 we never use
253 245 // generaldelta. Similar for [is_inline] (it's only used in v1).
254 246 let uses_generaldelta = header.format_flags().uses_generaldelta();
255 247
256 248 if header.format_flags().is_inline() {
257 249 let mut offset: usize = 0;
258 250 let mut offsets = Vec::new();
259 251
260 252 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
261 253 offsets.push(offset);
262 254 let end = offset + INDEX_ENTRY_SIZE;
263 255 let entry = IndexEntry {
264 256 bytes: &bytes[offset..end],
265 257 offset_override: None,
266 258 };
267 259
268 260 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
269 261 }
270 262
271 263 if offset == bytes.len() {
272 264 Ok(Self {
273 265 bytes: IndexData::new(bytes),
274 266 offsets: Some(offsets),
275 267 uses_generaldelta,
276 268 })
277 269 } else {
278 270 Err(HgError::corrupted("unexpected inline revlog length"))
279 271 }
280 272 } else {
281 273 Ok(Self {
282 274 bytes: IndexData::new(bytes),
283 275 offsets: None,
284 276 uses_generaldelta,
285 277 })
286 278 }
287 279 }
288 280
289 281 pub fn uses_generaldelta(&self) -> bool {
290 282 self.uses_generaldelta
291 283 }
292 284
293 285 /// Value of the inline flag.
294 286 pub fn is_inline(&self) -> bool {
295 287 self.offsets.is_some()
296 288 }
297 289
298 290 /// Return a slice of bytes if `revlog` is inline. Panic if not.
299 291 pub fn data(&self, start: usize, end: usize) -> &[u8] {
300 292 if !self.is_inline() {
301 293 panic!("tried to access data in the index of a revlog that is not inline");
302 294 }
303 295 &self.bytes[start..end]
304 296 }
305 297
306 298 /// Return number of entries of the revlog index.
307 299 pub fn len(&self) -> usize {
308 300 if let Some(offsets) = &self.offsets {
309 301 offsets.len()
310 302 } else {
311 303 self.bytes.len() / INDEX_ENTRY_SIZE
312 304 }
313 305 }
314 306
315 307 /// Returns `true` if the `Index` has zero `entries`.
316 308 pub fn is_empty(&self) -> bool {
317 309 self.len() == 0
318 310 }
319 311
320 312 /// Return the index entry corresponding to the given revision if it
321 313 /// exists.
322 314 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
323 315 if rev == NULL_REVISION {
324 316 return None;
325 317 }
326 318 Some(if let Some(offsets) = &self.offsets {
327 319 self.get_entry_inline(rev, offsets)
328 320 } else {
329 321 self.get_entry_separated(rev)
330 322 })
331 323 }
332 324
333 325 fn get_entry_inline(
334 326 &self,
335 327 rev: Revision,
336 328 offsets: &[usize],
337 329 ) -> IndexEntry {
338 330 let start = offsets[rev.0 as usize];
339 331 let end = start + INDEX_ENTRY_SIZE;
340 332 let bytes = &self.bytes[start..end];
341 333
342 334 // See IndexEntry for an explanation of this override.
343 335 let offset_override = Some(end);
344 336
345 337 IndexEntry {
346 338 bytes,
347 339 offset_override,
348 340 }
349 341 }
350 342
351 343 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
352 344 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
353 345 let end = start + INDEX_ENTRY_SIZE;
354 346 let bytes = &self.bytes[start..end];
355 347
356 348 // Override the offset of the first revision as its bytes are used
357 349 // for the index's metadata (saving space because it is always 0)
358 350 let offset_override = if rev == Revision(0) { Some(0) } else { None };
359 351
360 352 IndexEntry {
361 353 bytes,
362 354 offset_override,
363 355 }
364 356 }
365 357
366 358 /// TODO move this to the trait probably, along with other things
367 359 pub fn append(
368 360 &mut self,
369 361 revision_data: RevisionDataParams,
370 362 ) -> Result<(), RevlogError> {
371 363 revision_data.validate()?;
372 364 let new_offset = self.bytes.len();
373 365 if let Some(offsets) = self.offsets.as_mut() {
374 366 offsets.push(new_offset)
375 367 }
376 368 self.bytes.added.extend(revision_data.into_v1().as_bytes());
377 369 Ok(())
378 370 }
379 371 }
380 372
381 373 impl super::RevlogIndex for Index {
382 374 fn len(&self) -> usize {
383 375 self.len()
384 376 }
385 377
386 378 fn node(&self, rev: Revision) -> Option<&Node> {
387 379 self.get_entry(rev).map(|entry| entry.hash())
388 380 }
389 381 }
390 382
391 383 #[derive(Debug)]
392 384 pub struct IndexEntry<'a> {
393 385 bytes: &'a [u8],
394 386 /// Allows to override the offset value of the entry.
395 387 ///
396 388 /// For interleaved index and data, the offset stored in the index
397 389 /// corresponds to the separated data offset.
398 390 /// It has to be overridden with the actual offset in the interleaved
399 391 /// index which is just after the index block.
400 392 ///
401 393 /// For separated index and data, the offset stored in the first index
402 394 /// entry is mixed with the index headers.
403 395 /// It has to be overridden with 0.
404 396 offset_override: Option<usize>,
405 397 }
406 398
407 399 impl<'a> IndexEntry<'a> {
408 400 /// Return the offset of the data.
409 401 pub fn offset(&self) -> usize {
410 402 if let Some(offset_override) = self.offset_override {
411 403 offset_override
412 404 } else {
413 405 let mut bytes = [0; 8];
414 406 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
415 407 BigEndian::read_u64(&bytes[..]) as usize
416 408 }
417 409 }
418 410
419 411 pub fn flags(&self) -> u16 {
420 412 BigEndian::read_u16(&self.bytes[6..=7])
421 413 }
422 414
423 415 /// Return the compressed length of the data.
424 416 pub fn compressed_len(&self) -> u32 {
425 417 BigEndian::read_u32(&self.bytes[8..=11])
426 418 }
427 419
428 420 /// Return the uncompressed length of the data.
429 421 pub fn uncompressed_len(&self) -> i32 {
430 422 BigEndian::read_i32(&self.bytes[12..=15])
431 423 }
432 424
433 425 /// Return the revision upon which the data has been derived.
434 426 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
435 427 // TODO Maybe return an Option when base_revision == rev?
436 428 // Requires to add rev to IndexEntry
437 429
438 430 BigEndian::read_i32(&self.bytes[16..]).into()
439 431 }
440 432
441 433 pub fn link_revision(&self) -> UncheckedRevision {
442 434 BigEndian::read_i32(&self.bytes[20..]).into()
443 435 }
444 436
445 437 pub fn p1(&self) -> UncheckedRevision {
446 438 BigEndian::read_i32(&self.bytes[24..]).into()
447 439 }
448 440
449 441 pub fn p2(&self) -> UncheckedRevision {
450 442 BigEndian::read_i32(&self.bytes[28..]).into()
451 443 }
452 444
453 445 /// Return the hash of revision's full text.
454 446 ///
455 447 /// Currently, SHA-1 is used and only the first 20 bytes of this field
456 448 /// are used.
457 449 pub fn hash(&self) -> &'a Node {
458 450 (&self.bytes[32..52]).try_into().unwrap()
459 451 }
460 452 }
461 453
462 454 #[cfg(test)]
463 455 mod tests {
464 456 use super::*;
465 457 use crate::node::NULL_NODE;
466 458
467 459 #[cfg(test)]
468 460 #[derive(Debug, Copy, Clone)]
469 461 pub struct IndexEntryBuilder {
470 462 is_first: bool,
471 463 is_inline: bool,
472 464 is_general_delta: bool,
473 465 version: u16,
474 466 offset: usize,
475 467 compressed_len: usize,
476 468 uncompressed_len: usize,
477 469 base_revision_or_base_of_delta_chain: Revision,
478 470 link_revision: Revision,
479 471 p1: Revision,
480 472 p2: Revision,
481 473 node: Node,
482 474 }
483 475
484 476 #[cfg(test)]
485 477 impl IndexEntryBuilder {
486 478 #[allow(clippy::new_without_default)]
487 479 pub fn new() -> Self {
488 480 Self {
489 481 is_first: false,
490 482 is_inline: false,
491 483 is_general_delta: true,
492 484 version: 1,
493 485 offset: 0,
494 486 compressed_len: 0,
495 487 uncompressed_len: 0,
496 488 base_revision_or_base_of_delta_chain: Revision(0),
497 489 link_revision: Revision(0),
498 490 p1: NULL_REVISION,
499 491 p2: NULL_REVISION,
500 492 node: NULL_NODE,
501 493 }
502 494 }
503 495
504 496 pub fn is_first(&mut self, value: bool) -> &mut Self {
505 497 self.is_first = value;
506 498 self
507 499 }
508 500
509 501 pub fn with_inline(&mut self, value: bool) -> &mut Self {
510 502 self.is_inline = value;
511 503 self
512 504 }
513 505
514 506 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
515 507 self.is_general_delta = value;
516 508 self
517 509 }
518 510
519 511 pub fn with_version(&mut self, value: u16) -> &mut Self {
520 512 self.version = value;
521 513 self
522 514 }
523 515
524 516 pub fn with_offset(&mut self, value: usize) -> &mut Self {
525 517 self.offset = value;
526 518 self
527 519 }
528 520
529 521 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
530 522 self.compressed_len = value;
531 523 self
532 524 }
533 525
534 526 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
535 527 self.uncompressed_len = value;
536 528 self
537 529 }
538 530
539 531 pub fn with_base_revision_or_base_of_delta_chain(
540 532 &mut self,
541 533 value: Revision,
542 534 ) -> &mut Self {
543 535 self.base_revision_or_base_of_delta_chain = value;
544 536 self
545 537 }
546 538
547 539 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
548 540 self.link_revision = value;
549 541 self
550 542 }
551 543
552 544 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
553 545 self.p1 = value;
554 546 self
555 547 }
556 548
557 549 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
558 550 self.p2 = value;
559 551 self
560 552 }
561 553
562 554 pub fn with_node(&mut self, value: Node) -> &mut Self {
563 555 self.node = value;
564 556 self
565 557 }
566 558
567 559 pub fn build(&self) -> Vec<u8> {
568 560 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
569 561 if self.is_first {
570 562 bytes.extend(&match (self.is_general_delta, self.is_inline) {
571 563 (false, false) => [0u8, 0],
572 564 (false, true) => [0u8, 1],
573 565 (true, false) => [0u8, 2],
574 566 (true, true) => [0u8, 3],
575 567 });
576 568 bytes.extend(&self.version.to_be_bytes());
577 569 // Remaining offset bytes.
578 570 bytes.extend(&[0u8; 2]);
579 571 } else {
580 572 // Offset stored on 48 bits (6 bytes)
581 573 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
582 574 }
583 575 bytes.extend(&[0u8; 2]); // Revision flags.
584 576 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
585 577 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
586 578 bytes.extend(
587 579 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
588 580 );
589 581 bytes.extend(&self.link_revision.0.to_be_bytes());
590 582 bytes.extend(&self.p1.0.to_be_bytes());
591 583 bytes.extend(&self.p2.0.to_be_bytes());
592 584 bytes.extend(self.node.as_bytes());
593 585 bytes.extend(vec![0u8; 12]);
594 586 bytes
595 587 }
596 588 }
597 589
598 590 pub fn is_inline(index_bytes: &[u8]) -> bool {
599 591 IndexHeader::parse(index_bytes)
600 592 .expect("too short")
593 .unwrap()
601 594 .format_flags()
602 595 .is_inline()
603 596 }
604 597
605 598 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
606 599 IndexHeader::parse(index_bytes)
607 600 .expect("too short")
601 .unwrap()
608 602 .format_flags()
609 603 .uses_generaldelta()
610 604 }
611 605
612 606 pub fn get_version(index_bytes: &[u8]) -> u16 {
613 607 IndexHeader::parse(index_bytes)
614 608 .expect("too short")
609 .unwrap()
615 610 .format_version()
616 611 }
617 612
618 613 #[test]
619 614 fn flags_when_no_inline_flag_test() {
620 615 let bytes = IndexEntryBuilder::new()
621 616 .is_first(true)
622 617 .with_general_delta(false)
623 618 .with_inline(false)
624 619 .build();
625 620
626 621 assert!(!is_inline(&bytes));
627 622 assert!(!uses_generaldelta(&bytes));
628 623 }
629 624
630 625 #[test]
631 626 fn flags_when_inline_flag_test() {
632 627 let bytes = IndexEntryBuilder::new()
633 628 .is_first(true)
634 629 .with_general_delta(false)
635 630 .with_inline(true)
636 631 .build();
637 632
638 633 assert!(is_inline(&bytes));
639 634 assert!(!uses_generaldelta(&bytes));
640 635 }
641 636
642 637 #[test]
643 638 fn flags_when_inline_and_generaldelta_flags_test() {
644 639 let bytes = IndexEntryBuilder::new()
645 640 .is_first(true)
646 641 .with_general_delta(true)
647 642 .with_inline(true)
648 643 .build();
649 644
650 645 assert!(is_inline(&bytes));
651 646 assert!(uses_generaldelta(&bytes));
652 647 }
653 648
654 649 #[test]
655 650 fn test_offset() {
656 651 let bytes = IndexEntryBuilder::new().with_offset(1).build();
657 652 let entry = IndexEntry {
658 653 bytes: &bytes,
659 654 offset_override: None,
660 655 };
661 656
662 657 assert_eq!(entry.offset(), 1)
663 658 }
664 659
665 660 #[test]
666 661 fn test_with_overridden_offset() {
667 662 let bytes = IndexEntryBuilder::new().with_offset(1).build();
668 663 let entry = IndexEntry {
669 664 bytes: &bytes,
670 665 offset_override: Some(2),
671 666 };
672 667
673 668 assert_eq!(entry.offset(), 2)
674 669 }
675 670
676 671 #[test]
677 672 fn test_compressed_len() {
678 673 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
679 674 let entry = IndexEntry {
680 675 bytes: &bytes,
681 676 offset_override: None,
682 677 };
683 678
684 679 assert_eq!(entry.compressed_len(), 1)
685 680 }
686 681
687 682 #[test]
688 683 fn test_uncompressed_len() {
689 684 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
690 685 let entry = IndexEntry {
691 686 bytes: &bytes,
692 687 offset_override: None,
693 688 };
694 689
695 690 assert_eq!(entry.uncompressed_len(), 1)
696 691 }
697 692
698 693 #[test]
699 694 fn test_base_revision_or_base_of_delta_chain() {
700 695 let bytes = IndexEntryBuilder::new()
701 696 .with_base_revision_or_base_of_delta_chain(Revision(1))
702 697 .build();
703 698 let entry = IndexEntry {
704 699 bytes: &bytes,
705 700 offset_override: None,
706 701 };
707 702
708 703 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
709 704 }
710 705
711 706 #[test]
712 707 fn link_revision_test() {
713 708 let bytes = IndexEntryBuilder::new()
714 709 .with_link_revision(Revision(123))
715 710 .build();
716 711
717 712 let entry = IndexEntry {
718 713 bytes: &bytes,
719 714 offset_override: None,
720 715 };
721 716
722 717 assert_eq!(entry.link_revision(), 123.into());
723 718 }
724 719
725 720 #[test]
726 721 fn p1_test() {
727 722 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
728 723
729 724 let entry = IndexEntry {
730 725 bytes: &bytes,
731 726 offset_override: None,
732 727 };
733 728
734 729 assert_eq!(entry.p1(), 123.into());
735 730 }
736 731
737 732 #[test]
738 733 fn p2_test() {
739 734 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
740 735
741 736 let entry = IndexEntry {
742 737 bytes: &bytes,
743 738 offset_override: None,
744 739 };
745 740
746 741 assert_eq!(entry.p2(), 123.into());
747 742 }
748 743
749 744 #[test]
750 745 fn node_test() {
751 746 let node = Node::from_hex("0123456789012345678901234567890123456789")
752 747 .unwrap();
753 748 let bytes = IndexEntryBuilder::new().with_node(node).build();
754 749
755 750 let entry = IndexEntry {
756 751 bytes: &bytes,
757 752 offset_override: None,
758 753 };
759 754
760 755 assert_eq!(*entry.hash(), node);
761 756 }
762 757
763 758 #[test]
764 759 fn version_test() {
765 760 let bytes = IndexEntryBuilder::new()
766 761 .is_first(true)
767 762 .with_version(2)
768 763 .build();
769 764
770 765 assert_eq!(get_version(&bytes), 2)
771 766 }
772 767 }
773 768
774 769 #[cfg(test)]
775 770 pub use tests::IndexEntryBuilder;
@@ -1,209 +1,213 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::{Node, NodePrefix};
3 3 use crate::revlog::{Revlog, RevlogError};
4 4 use crate::utils::hg_path::HgPath;
5 5 use crate::utils::SliceExt;
6 6 use crate::vfs::Vfs;
7 use crate::{Graph, GraphError, Revision, UncheckedRevision};
7 use crate::{
8 Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision,
9 };
8 10
9 11 /// A specialized `Revlog` to work with `manifest` data format.
10 12 pub struct Manifestlog {
11 13 /// The generic `revlog` format.
12 revlog: Revlog,
14 pub(crate) revlog: Revlog,
13 15 }
14 16
15 17 impl Graph for Manifestlog {
16 18 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
17 19 self.revlog.parents(rev)
18 20 }
19 21 }
20 22
21 23 impl Manifestlog {
22 24 /// Open the `manifest` of a repository given by its root.
23 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
24 let revlog =
25 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
25 pub fn open(
26 store_vfs: &Vfs,
27 options: RevlogOpenOptions,
28 ) -> Result<Self, HgError> {
29 let revlog = Revlog::open(store_vfs, "00manifest.i", None, options)?;
26 30 Ok(Self { revlog })
27 31 }
28 32
29 33 /// Return the `Manifest` for the given node ID.
30 34 ///
31 35 /// Note: this is a node ID in the manifestlog, typically found through
32 36 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
33 37 /// changeset.
34 38 ///
35 39 /// See also `Repo::manifest_for_node`
36 40 pub fn data_for_node(
37 41 &self,
38 42 node: NodePrefix,
39 43 ) -> Result<Manifest, RevlogError> {
40 44 let rev = self.revlog.rev_from_node(node)?;
41 45 self.data_for_checked_rev(rev)
42 46 }
43 47
44 48 /// Return the `Manifest` of a given revision number.
45 49 ///
46 50 /// Note: this is a revision number in the manifestlog, *not* of any
47 51 /// changeset.
48 52 ///
49 53 /// See also `Repo::manifest_for_rev`
50 54 pub fn data_for_rev(
51 55 &self,
52 56 rev: UncheckedRevision,
53 57 ) -> Result<Manifest, RevlogError> {
54 58 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
55 59 Ok(Manifest { bytes })
56 60 }
57 61
58 62 pub fn data_for_checked_rev(
59 63 &self,
60 64 rev: Revision,
61 65 ) -> Result<Manifest, RevlogError> {
62 66 let bytes =
63 67 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
64 68 Ok(Manifest { bytes })
65 69 }
66 70 }
67 71
68 72 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
69 73 #[derive(Debug)]
70 74 pub struct Manifest {
71 75 /// Format for a manifest: flat sequence of variable-size entries,
72 76 /// sorted by path, each as:
73 77 ///
74 78 /// ```text
75 79 /// <path> \0 <hex_node_id> <flags> \n
76 80 /// ```
77 81 ///
78 82 /// The last entry is also terminated by a newline character.
79 83 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
80 84 bytes: Vec<u8>,
81 85 }
82 86
83 87 impl Manifest {
84 88 pub fn iter(
85 89 &self,
86 90 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
87 91 self.bytes
88 92 .split(|b| b == &b'\n')
89 93 .filter(|line| !line.is_empty())
90 94 .map(ManifestEntry::from_raw)
91 95 }
92 96
93 97 /// If the given path is in this manifest, return its filelog node ID
94 98 pub fn find_by_path(
95 99 &self,
96 100 path: &HgPath,
97 101 ) -> Result<Option<ManifestEntry>, HgError> {
98 102 use std::cmp::Ordering::*;
99 103 let path = path.as_bytes();
100 104 // Both boundaries of this `&[u8]` slice are always at the boundary of
101 105 // an entry
102 106 let mut bytes = &*self.bytes;
103 107
104 108 // Binary search algorithm derived from `[T]::binary_search_by`
105 109 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
106 110 // except we don’t have a slice of entries. Instead we jump to the
107 111 // middle of the byte slice and look around for entry delimiters
108 112 // (newlines).
109 113 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
110 114 let (entry_path, rest) =
111 115 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
112 116 let cmp = entry_path.cmp(path);
113 117 if cmp == Less {
114 118 let after_newline = entry_range.end + 1;
115 119 bytes = &bytes[after_newline..];
116 120 } else if cmp == Greater {
117 121 bytes = &bytes[..entry_range.start];
118 122 } else {
119 123 return Ok(Some(ManifestEntry::from_path_and_rest(
120 124 entry_path, rest,
121 125 )));
122 126 }
123 127 }
124 128 Ok(None)
125 129 }
126 130
127 131 /// If there is at least one, return the byte range of an entry *excluding*
128 132 /// the final newline.
129 133 fn find_entry_near_middle_of(
130 134 bytes: &[u8],
131 135 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
132 136 let len = bytes.len();
133 137 if len > 0 {
134 138 let middle = bytes.len() / 2;
135 139 // Integer division rounds down, so `middle < len`.
136 140 let (before, after) = bytes.split_at(middle);
137 141 let is_newline = |&byte: &u8| byte == b'\n';
138 142 let entry_start = match before.iter().rposition(is_newline) {
139 143 Some(i) => i + 1,
140 144 None => 0, // We choose the first entry in `bytes`
141 145 };
142 146 let entry_end = match after.iter().position(is_newline) {
143 147 Some(i) => {
144 148 // No `+ 1` here to exclude this newline from the range
145 149 middle + i
146 150 }
147 151 None => {
148 152 // In a well-formed manifest:
149 153 //
150 154 // * Since `len > 0`, `bytes` contains at least one entry
151 155 // * Every entry ends with a newline
152 156 // * Since `middle < len`, `after` contains at least the
153 157 // newline at the end of the last entry of `bytes`.
154 158 //
155 159 // We didn’t find a newline, so this manifest is not
156 160 // well-formed.
157 161 return Err(HgError::corrupted(
158 162 "manifest entry without \\n delimiter",
159 163 ));
160 164 }
161 165 };
162 166 Ok(Some(entry_start..entry_end))
163 167 } else {
164 168 // len == 0
165 169 Ok(None)
166 170 }
167 171 }
168 172 }
169 173
170 174 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
171 175 #[derive(Debug)]
172 176 pub struct ManifestEntry<'manifest> {
173 177 pub path: &'manifest HgPath,
174 178 pub hex_node_id: &'manifest [u8],
175 179
176 180 /// `Some` values are b'x', b'l', or 't'
177 181 pub flags: Option<u8>,
178 182 }
179 183
180 184 impl<'a> ManifestEntry<'a> {
181 185 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
182 186 bytes.split_2(b'\0').ok_or_else(|| {
183 187 HgError::corrupted("manifest entry without \\0 delimiter")
184 188 })
185 189 }
186 190
187 191 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
188 192 let (hex_node_id, flags) = match rest.split_last() {
189 193 Some((&b'x', rest)) => (rest, Some(b'x')),
190 194 Some((&b'l', rest)) => (rest, Some(b'l')),
191 195 Some((&b't', rest)) => (rest, Some(b't')),
192 196 _ => (rest, None),
193 197 };
194 198 Self {
195 199 path: HgPath::new(path),
196 200 hex_node_id,
197 201 flags,
198 202 }
199 203 }
200 204
201 205 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
202 206 let (path, rest) = Self::split_path(bytes)?;
203 207 Ok(Self::from_path_and_rest(path, rest))
204 208 }
205 209
206 210 pub fn node_id(&self) -> Result<Node, HgError> {
207 211 Node::from_hex_for_repo(self.hex_node_id)
208 212 }
209 213 }
@@ -1,965 +1,1030 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// As noted in revlog.c, revision numbers are actually encoded in
37 37 /// 4 bytes, and are liberally converted to ints, whence the i32
38 38 pub type BaseRevision = i32;
39 39
40 40 /// Mercurial revision numbers
41 41 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
42 42 /// in the sense that they should only be used for revisions that are
43 43 /// valid for a given index (i.e. in bounds).
44 44 #[derive(
45 45 Debug,
46 46 derive_more::Display,
47 47 Clone,
48 48 Copy,
49 49 Hash,
50 50 PartialEq,
51 51 Eq,
52 52 PartialOrd,
53 53 Ord,
54 54 )]
55 55 pub struct Revision(pub BaseRevision);
56 56
57 57 impl format_bytes::DisplayBytes for Revision {
58 58 fn display_bytes(
59 59 &self,
60 60 output: &mut dyn std::io::Write,
61 61 ) -> std::io::Result<()> {
62 62 self.0.display_bytes(output)
63 63 }
64 64 }
65 65
66 66 /// Unchecked Mercurial revision numbers.
67 67 ///
68 68 /// Values of this type have no guarantee of being a valid revision number
69 69 /// in any context. Use method `check_revision` to get a valid revision within
70 70 /// the appropriate index object.
71 71 #[derive(
72 72 Debug,
73 73 derive_more::Display,
74 74 Clone,
75 75 Copy,
76 76 Hash,
77 77 PartialEq,
78 78 Eq,
79 79 PartialOrd,
80 80 Ord,
81 81 )]
82 82 pub struct UncheckedRevision(pub BaseRevision);
83 83
84 84 impl format_bytes::DisplayBytes for UncheckedRevision {
85 85 fn display_bytes(
86 86 &self,
87 87 output: &mut dyn std::io::Write,
88 88 ) -> std::io::Result<()> {
89 89 self.0.display_bytes(output)
90 90 }
91 91 }
92 92
93 93 impl From<Revision> for UncheckedRevision {
94 94 fn from(value: Revision) -> Self {
95 95 Self(value.0)
96 96 }
97 97 }
98 98
99 99 impl From<BaseRevision> for UncheckedRevision {
100 100 fn from(value: BaseRevision) -> Self {
101 101 Self(value)
102 102 }
103 103 }
104 104
105 105 /// Marker expressing the absence of a parent
106 106 ///
107 107 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
108 108 /// to be smaller than all existing revisions.
109 109 pub const NULL_REVISION: Revision = Revision(-1);
110 110
111 111 /// Same as `mercurial.node.wdirrev`
112 112 ///
113 113 /// This is also equal to `i32::max_value()`, but it's better to spell
114 114 /// it out explicitely, same as in `mercurial.node`
115 115 #[allow(clippy::unreadable_literal)]
116 116 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
117 117 UncheckedRevision(0x7fffffff);
118 118
119 119 pub const WORKING_DIRECTORY_HEX: &str =
120 120 "ffffffffffffffffffffffffffffffffffffffff";
121 121
122 122 /// The simplest expression of what we need of Mercurial DAGs.
123 123 pub trait Graph {
124 124 /// Return the two parents of the given `Revision`.
125 125 ///
126 126 /// Each of the parents can be independently `NULL_REVISION`
127 127 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
128 128 }
129 129
130 130 #[derive(Clone, Debug, PartialEq)]
131 131 pub enum GraphError {
132 132 ParentOutOfRange(Revision),
133 133 }
134 134
135 135 /// The Mercurial Revlog Index
136 136 ///
137 137 /// This is currently limited to the minimal interface that is needed for
138 138 /// the [`nodemap`](nodemap/index.html) module
139 139 pub trait RevlogIndex {
140 140 /// Total number of Revisions referenced in this index
141 141 fn len(&self) -> usize;
142 142
143 143 fn is_empty(&self) -> bool {
144 144 self.len() == 0
145 145 }
146 146
147 147 /// Return a reference to the Node or `None` for `NULL_REVISION`
148 148 fn node(&self, rev: Revision) -> Option<&Node>;
149 149
150 150 /// Return a [`Revision`] if `rev` is a valid revision number for this
151 151 /// index
152 152 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
153 153 let rev = rev.0;
154 154
155 155 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
156 156 {
157 157 Some(Revision(rev))
158 158 } else {
159 159 None
160 160 }
161 161 }
162 162 }
163 163
164 164 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
165 165 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
166 166 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
167 167 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
168 168
169 169 // Keep this in sync with REVIDX_KNOWN_FLAGS in
170 170 // mercurial/revlogutils/flagutil.py
171 171 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
172 172 | REVISION_FLAG_ELLIPSIS
173 173 | REVISION_FLAG_EXTSTORED
174 174 | REVISION_FLAG_HASCOPIESINFO;
175 175
176 176 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
177 177
178 178 #[derive(Debug, derive_more::From, derive_more::Display)]
179 179 pub enum RevlogError {
180 180 InvalidRevision,
181 181 /// Working directory is not supported
182 182 WDirUnsupported,
183 183 /// Found more than one entry whose ID match the requested prefix
184 184 AmbiguousPrefix,
185 185 #[from]
186 186 Other(HgError),
187 187 }
188 188
189 189 impl From<NodeMapError> for RevlogError {
190 190 fn from(error: NodeMapError) -> Self {
191 191 match error {
192 192 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
193 193 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
194 194 format!("nodemap point to revision {} not in index", rev),
195 195 ),
196 196 }
197 197 }
198 198 }
199 199
200 200 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
201 201 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
202 202 }
203 203
204 204 impl RevlogError {
205 205 fn corrupted<S: AsRef<str>>(context: S) -> Self {
206 206 RevlogError::Other(corrupted(context))
207 207 }
208 208 }
209 209
210 210 /// Read only implementation of revlog.
211 211 pub struct Revlog {
212 212 /// When index and data are not interleaved: bytes of the revlog index.
213 213 /// When index and data are interleaved: bytes of the revlog index and
214 214 /// data.
215 215 index: Index,
216 216 /// When index and data are not interleaved: bytes of the revlog data
217 217 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
218 218 /// When present on disk: the persistent nodemap for this revlog
219 219 nodemap: Option<nodemap::NodeTree>,
220 220 }
221 221
222 222 impl Graph for Revlog {
223 223 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
224 224 self.index.parents(rev)
225 225 }
226 226 }
227 227
228 #[derive(Debug, Copy, Clone)]
229 pub enum RevlogVersionOptions {
230 V0,
231 V1 { generaldelta: bool },
232 V2,
233 ChangelogV2 { compute_rank: bool },
234 }
235
236 /// Options to govern how a revlog should be opened, usually from the
237 /// repository configuration or requirements.
238 #[derive(Debug, Copy, Clone)]
239 pub struct RevlogOpenOptions {
240 /// The revlog version, along with any option specific to this version
241 pub version: RevlogVersionOptions,
242 /// Whether the revlog uses a persistent nodemap.
243 pub use_nodemap: bool,
244 // TODO other non-header/version options,
245 }
246
247 impl RevlogOpenOptions {
248 pub fn new() -> Self {
249 Self {
250 version: RevlogVersionOptions::V1 { generaldelta: true },
251 use_nodemap: false,
252 }
253 }
254
255 fn default_index_header(&self) -> index::IndexHeader {
256 index::IndexHeader {
257 header_bytes: match self.version {
258 RevlogVersionOptions::V0 => [0, 0, 0, 0],
259 RevlogVersionOptions::V1 { generaldelta } => {
260 [0, if generaldelta { 3 } else { 1 }, 0, 1]
261 }
262 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
263 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
264 0xD34Du32.to_be_bytes()
265 }
266 },
267 }
268 }
269 }
270
271 impl Default for RevlogOpenOptions {
272 fn default() -> Self {
273 Self::new()
274 }
275 }
276
228 277 impl Revlog {
229 278 /// Open a revlog index file.
230 279 ///
231 280 /// It will also open the associated data file if index and data are not
232 281 /// interleaved.
233 282 pub fn open(
234 283 store_vfs: &Vfs,
235 284 index_path: impl AsRef<Path>,
236 285 data_path: Option<&Path>,
237 use_nodemap: bool,
286 options: RevlogOpenOptions,
238 287 ) -> Result<Self, HgError> {
239 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
288 Self::open_gen(store_vfs, index_path, data_path, options, None)
240 289 }
241 290
242 291 fn open_gen(
243 292 store_vfs: &Vfs,
244 293 index_path: impl AsRef<Path>,
245 294 data_path: Option<&Path>,
246 use_nodemap: bool,
295 options: RevlogOpenOptions,
247 296 nodemap_for_test: Option<nodemap::NodeTree>,
248 297 ) -> Result<Self, HgError> {
249 298 let index_path = index_path.as_ref();
250 299 let index = {
251 300 match store_vfs.mmap_open_opt(index_path)? {
252 None => Index::new(Box::<Vec<_>>::default()),
301 None => Index::new(
302 Box::<Vec<_>>::default(),
303 options.default_index_header(),
304 ),
253 305 Some(index_mmap) => {
254 let index = Index::new(Box::new(index_mmap))?;
306 let index = Index::new(
307 Box::new(index_mmap),
308 options.default_index_header(),
309 )?;
255 310 Ok(index)
256 311 }
257 312 }
258 313 }?;
259 314
260 315 let default_data_path = index_path.with_extension("d");
261 316
262 317 // type annotation required
263 318 // won't recognize Mmap as Deref<Target = [u8]>
264 319 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
265 320 if index.is_inline() {
266 321 None
267 322 } else {
268 323 let data_path = data_path.unwrap_or(&default_data_path);
269 324 let data_mmap = store_vfs.mmap_open(data_path)?;
270 325 Some(Box::new(data_mmap))
271 326 };
272 327
273 let nodemap = if index.is_inline() || !use_nodemap {
328 let nodemap = if index.is_inline() || !options.use_nodemap {
274 329 None
275 330 } else {
276 331 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
277 332 |(docket, data)| {
278 333 nodemap::NodeTree::load_bytes(
279 334 Box::new(data),
280 335 docket.data_length,
281 336 )
282 337 },
283 338 )
284 339 };
285 340
286 341 let nodemap = nodemap_for_test.or(nodemap);
287 342
288 343 Ok(Revlog {
289 344 index,
290 345 data_bytes,
291 346 nodemap,
292 347 })
293 348 }
294 349
295 350 /// Return number of entries of the `Revlog`.
296 351 pub fn len(&self) -> usize {
297 352 self.index.len()
298 353 }
299 354
300 355 /// Returns `true` if the `Revlog` has zero `entries`.
301 356 pub fn is_empty(&self) -> bool {
302 357 self.index.is_empty()
303 358 }
304 359
305 360 /// Returns the node ID for the given revision number, if it exists in this
306 361 /// revlog
307 362 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
308 363 if rev == NULL_REVISION.into() {
309 364 return Some(&NULL_NODE);
310 365 }
311 366 let rev = self.index.check_revision(rev)?;
312 367 Some(self.index.get_entry(rev)?.hash())
313 368 }
314 369
315 370 /// Return the revision number for the given node ID, if it exists in this
316 371 /// revlog
317 372 pub fn rev_from_node(
318 373 &self,
319 374 node: NodePrefix,
320 375 ) -> Result<Revision, RevlogError> {
321 376 if let Some(nodemap) = &self.nodemap {
322 377 nodemap
323 378 .find_bin(&self.index, node)?
324 379 .ok_or(RevlogError::InvalidRevision)
325 380 } else {
326 381 self.rev_from_node_no_persistent_nodemap(node)
327 382 }
328 383 }
329 384
330 385 /// Same as `rev_from_node`, without using a persistent nodemap
331 386 ///
332 387 /// This is used as fallback when a persistent nodemap is not present.
333 388 /// This happens when the persistent-nodemap experimental feature is not
334 389 /// enabled, or for small revlogs.
335 390 fn rev_from_node_no_persistent_nodemap(
336 391 &self,
337 392 node: NodePrefix,
338 393 ) -> Result<Revision, RevlogError> {
339 394 // Linear scan of the revlog
340 395 // TODO: consider building a non-persistent nodemap in memory to
341 396 // optimize these cases.
342 397 let mut found_by_prefix = None;
343 398 for rev in (-1..self.len() as BaseRevision).rev() {
344 399 let rev = Revision(rev as BaseRevision);
345 400 let candidate_node = if rev == Revision(-1) {
346 401 NULL_NODE
347 402 } else {
348 403 let index_entry =
349 404 self.index.get_entry(rev).ok_or_else(|| {
350 405 HgError::corrupted(
351 406 "revlog references a revision not in the index",
352 407 )
353 408 })?;
354 409 *index_entry.hash()
355 410 };
356 411 if node == candidate_node {
357 412 return Ok(rev);
358 413 }
359 414 if node.is_prefix_of(&candidate_node) {
360 415 if found_by_prefix.is_some() {
361 416 return Err(RevlogError::AmbiguousPrefix);
362 417 }
363 418 found_by_prefix = Some(rev)
364 419 }
365 420 }
366 421 found_by_prefix.ok_or(RevlogError::InvalidRevision)
367 422 }
368 423
369 424 /// Returns whether the given revision exists in this revlog.
370 425 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
371 426 self.index.check_revision(rev).is_some()
372 427 }
373 428
374 429 /// Return the full data associated to a revision.
375 430 ///
376 431 /// All entries required to build the final data out of deltas will be
377 432 /// retrieved as needed, and the deltas will be applied to the inital
378 433 /// snapshot to rebuild the final data.
379 434 pub fn get_rev_data(
380 435 &self,
381 436 rev: UncheckedRevision,
382 437 ) -> Result<Cow<[u8]>, RevlogError> {
383 438 if rev == NULL_REVISION.into() {
384 439 return Ok(Cow::Borrowed(&[]));
385 440 };
386 441 self.get_entry(rev)?.data()
387 442 }
388 443
389 444 /// [`Self::get_rev_data`] for checked revisions.
390 445 pub fn get_rev_data_for_checked_rev(
391 446 &self,
392 447 rev: Revision,
393 448 ) -> Result<Cow<[u8]>, RevlogError> {
394 449 if rev == NULL_REVISION {
395 450 return Ok(Cow::Borrowed(&[]));
396 451 };
397 452 self.get_entry_for_checked_rev(rev)?.data()
398 453 }
399 454
400 455 /// Check the hash of some given data against the recorded hash.
401 456 pub fn check_hash(
402 457 &self,
403 458 p1: Revision,
404 459 p2: Revision,
405 460 expected: &[u8],
406 461 data: &[u8],
407 462 ) -> bool {
408 463 let e1 = self.index.get_entry(p1);
409 464 let h1 = match e1 {
410 465 Some(ref entry) => entry.hash(),
411 466 None => &NULL_NODE,
412 467 };
413 468 let e2 = self.index.get_entry(p2);
414 469 let h2 = match e2 {
415 470 Some(ref entry) => entry.hash(),
416 471 None => &NULL_NODE,
417 472 };
418 473
419 474 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
420 475 }
421 476
422 477 /// Build the full data of a revision out its snapshot
423 478 /// and its deltas.
424 479 fn build_data_from_deltas(
425 480 snapshot: RevlogEntry,
426 481 deltas: &[RevlogEntry],
427 482 ) -> Result<Vec<u8>, HgError> {
428 483 let snapshot = snapshot.data_chunk()?;
429 484 let deltas = deltas
430 485 .iter()
431 486 .rev()
432 487 .map(RevlogEntry::data_chunk)
433 488 .collect::<Result<Vec<_>, _>>()?;
434 489 let patches: Vec<_> =
435 490 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
436 491 let patch = patch::fold_patch_lists(&patches);
437 492 Ok(patch.apply(&snapshot))
438 493 }
439 494
440 495 /// Return the revlog data.
441 496 fn data(&self) -> &[u8] {
442 497 match &self.data_bytes {
443 498 Some(data_bytes) => data_bytes,
444 499 None => panic!(
445 500 "forgot to load the data or trying to access inline data"
446 501 ),
447 502 }
448 503 }
449 504
450 505 pub fn make_null_entry(&self) -> RevlogEntry {
451 506 RevlogEntry {
452 507 revlog: self,
453 508 rev: NULL_REVISION,
454 509 bytes: b"",
455 510 compressed_len: 0,
456 511 uncompressed_len: 0,
457 512 base_rev_or_base_of_delta_chain: None,
458 513 p1: NULL_REVISION,
459 514 p2: NULL_REVISION,
460 515 flags: NULL_REVLOG_ENTRY_FLAGS,
461 516 hash: NULL_NODE,
462 517 }
463 518 }
464 519
465 520 fn get_entry_for_checked_rev(
466 521 &self,
467 522 rev: Revision,
468 523 ) -> Result<RevlogEntry, RevlogError> {
469 524 if rev == NULL_REVISION {
470 525 return Ok(self.make_null_entry());
471 526 }
472 527 let index_entry = self
473 528 .index
474 529 .get_entry(rev)
475 530 .ok_or(RevlogError::InvalidRevision)?;
476 531 let start = index_entry.offset();
477 532 let end = start + index_entry.compressed_len() as usize;
478 533 let data = if self.index.is_inline() {
479 534 self.index.data(start, end)
480 535 } else {
481 536 &self.data()[start..end]
482 537 };
483 538 let base_rev = self
484 539 .index
485 540 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
486 541 .ok_or_else(|| {
487 542 RevlogError::corrupted(format!(
488 543 "base revision for rev {} is invalid",
489 544 rev
490 545 ))
491 546 })?;
492 547 let p1 =
493 548 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
494 549 RevlogError::corrupted(format!(
495 550 "p1 for rev {} is invalid",
496 551 rev
497 552 ))
498 553 })?;
499 554 let p2 =
500 555 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
501 556 RevlogError::corrupted(format!(
502 557 "p2 for rev {} is invalid",
503 558 rev
504 559 ))
505 560 })?;
506 561 let entry = RevlogEntry {
507 562 revlog: self,
508 563 rev,
509 564 bytes: data,
510 565 compressed_len: index_entry.compressed_len(),
511 566 uncompressed_len: index_entry.uncompressed_len(),
512 567 base_rev_or_base_of_delta_chain: if base_rev == rev {
513 568 None
514 569 } else {
515 570 Some(base_rev)
516 571 },
517 572 p1,
518 573 p2,
519 574 flags: index_entry.flags(),
520 575 hash: *index_entry.hash(),
521 576 };
522 577 Ok(entry)
523 578 }
524 579
525 580 /// Get an entry of the revlog.
526 581 pub fn get_entry(
527 582 &self,
528 583 rev: UncheckedRevision,
529 584 ) -> Result<RevlogEntry, RevlogError> {
530 585 if rev == NULL_REVISION.into() {
531 586 return Ok(self.make_null_entry());
532 587 }
533 588 let rev = self.index.check_revision(rev).ok_or_else(|| {
534 589 RevlogError::corrupted(format!("rev {} is invalid", rev))
535 590 })?;
536 591 self.get_entry_for_checked_rev(rev)
537 592 }
538 593 }
539 594
540 595 /// The revlog entry's bytes and the necessary informations to extract
541 596 /// the entry's data.
542 597 #[derive(Clone)]
543 598 pub struct RevlogEntry<'revlog> {
544 599 revlog: &'revlog Revlog,
545 600 rev: Revision,
546 601 bytes: &'revlog [u8],
547 602 compressed_len: u32,
548 603 uncompressed_len: i32,
549 604 base_rev_or_base_of_delta_chain: Option<Revision>,
550 605 p1: Revision,
551 606 p2: Revision,
552 607 flags: u16,
553 608 hash: Node,
554 609 }
555 610
556 611 thread_local! {
557 612 // seems fine to [unwrap] here: this can only fail due to memory allocation
558 613 // failing, and it's normal for that to cause panic.
559 614 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
560 615 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
561 616 }
562 617
563 618 fn zstd_decompress_to_buffer(
564 619 bytes: &[u8],
565 620 buf: &mut Vec<u8>,
566 621 ) -> Result<usize, std::io::Error> {
567 622 ZSTD_DECODER
568 623 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
569 624 }
570 625
571 626 impl<'revlog> RevlogEntry<'revlog> {
572 627 pub fn revision(&self) -> Revision {
573 628 self.rev
574 629 }
575 630
576 631 pub fn node(&self) -> &Node {
577 632 &self.hash
578 633 }
579 634
580 635 pub fn uncompressed_len(&self) -> Option<u32> {
581 636 u32::try_from(self.uncompressed_len).ok()
582 637 }
583 638
584 639 pub fn has_p1(&self) -> bool {
585 640 self.p1 != NULL_REVISION
586 641 }
587 642
588 643 pub fn p1_entry(
589 644 &self,
590 645 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
591 646 if self.p1 == NULL_REVISION {
592 647 Ok(None)
593 648 } else {
594 649 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
595 650 }
596 651 }
597 652
598 653 pub fn p2_entry(
599 654 &self,
600 655 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
601 656 if self.p2 == NULL_REVISION {
602 657 Ok(None)
603 658 } else {
604 659 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
605 660 }
606 661 }
607 662
608 663 pub fn p1(&self) -> Option<Revision> {
609 664 if self.p1 == NULL_REVISION {
610 665 None
611 666 } else {
612 667 Some(self.p1)
613 668 }
614 669 }
615 670
616 671 pub fn p2(&self) -> Option<Revision> {
617 672 if self.p2 == NULL_REVISION {
618 673 None
619 674 } else {
620 675 Some(self.p2)
621 676 }
622 677 }
623 678
624 679 pub fn is_censored(&self) -> bool {
625 680 (self.flags & REVISION_FLAG_CENSORED) != 0
626 681 }
627 682
628 683 pub fn has_length_affecting_flag_processor(&self) -> bool {
629 684 // Relevant Python code: revlog.size()
630 685 // note: ELLIPSIS is known to not change the content
631 686 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
632 687 }
633 688
634 689 /// The data for this entry, after resolving deltas if any.
635 690 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
636 691 let mut entry = self.clone();
637 692 let mut delta_chain = vec![];
638 693
639 694 // The meaning of `base_rev_or_base_of_delta_chain` depends on
640 695 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
641 696 // `mercurial/revlogutils/constants.py` and the code in
642 697 // [_chaininfo] and in [index_deltachain].
643 698 let uses_generaldelta = self.revlog.index.uses_generaldelta();
644 699 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
645 700 entry = if uses_generaldelta {
646 701 delta_chain.push(entry);
647 702 self.revlog.get_entry_for_checked_rev(base_rev)?
648 703 } else {
649 704 let base_rev = UncheckedRevision(entry.rev.0 - 1);
650 705 delta_chain.push(entry);
651 706 self.revlog.get_entry(base_rev)?
652 707 };
653 708 }
654 709
655 710 let data = if delta_chain.is_empty() {
656 711 entry.data_chunk()?
657 712 } else {
658 713 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
659 714 };
660 715
661 716 Ok(data)
662 717 }
663 718
664 719 fn check_data(
665 720 &self,
666 721 data: Cow<'revlog, [u8]>,
667 722 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
668 723 if self.revlog.check_hash(
669 724 self.p1,
670 725 self.p2,
671 726 self.hash.as_bytes(),
672 727 &data,
673 728 ) {
674 729 Ok(data)
675 730 } else {
676 731 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
677 732 return Err(HgError::unsupported(
678 733 "ellipsis revisions are not supported by rhg",
679 734 )
680 735 .into());
681 736 }
682 737 Err(corrupted(format!(
683 738 "hash check failed for revision {}",
684 739 self.rev
685 740 ))
686 741 .into())
687 742 }
688 743 }
689 744
690 745 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
691 746 let data = self.rawdata()?;
692 747 if self.rev == NULL_REVISION {
693 748 return Ok(data);
694 749 }
695 750 if self.is_censored() {
696 751 return Err(HgError::CensoredNodeError.into());
697 752 }
698 753 self.check_data(data)
699 754 }
700 755
701 756 /// Extract the data contained in the entry.
702 757 /// This may be a delta. (See `is_delta`.)
703 758 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
704 759 if self.bytes.is_empty() {
705 760 return Ok(Cow::Borrowed(&[]));
706 761 }
707 762 match self.bytes[0] {
708 763 // Revision data is the entirety of the entry, including this
709 764 // header.
710 765 b'\0' => Ok(Cow::Borrowed(self.bytes)),
711 766 // Raw revision data follows.
712 767 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
713 768 // zlib (RFC 1950) data.
714 769 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
715 770 // zstd data.
716 771 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
717 772 // A proper new format should have had a repo/store requirement.
718 773 format_type => Err(corrupted(format!(
719 774 "unknown compression header '{}'",
720 775 format_type
721 776 ))),
722 777 }
723 778 }
724 779
725 780 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
726 781 let mut decoder = ZlibDecoder::new(self.bytes);
727 782 if self.is_delta() {
728 783 let mut buf = Vec::with_capacity(self.compressed_len as usize);
729 784 decoder
730 785 .read_to_end(&mut buf)
731 786 .map_err(|e| corrupted(e.to_string()))?;
732 787 Ok(buf)
733 788 } else {
734 789 let cap = self.uncompressed_len.max(0) as usize;
735 790 let mut buf = vec![0; cap];
736 791 decoder
737 792 .read_exact(&mut buf)
738 793 .map_err(|e| corrupted(e.to_string()))?;
739 794 Ok(buf)
740 795 }
741 796 }
742 797
743 798 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
744 799 let cap = self.uncompressed_len.max(0) as usize;
745 800 if self.is_delta() {
746 801 // [cap] is usually an over-estimate of the space needed because
747 802 // it's the length of delta-decoded data, but we're interested
748 803 // in the size of the delta.
749 804 // This means we have to [shrink_to_fit] to avoid holding on
750 805 // to a large chunk of memory, but it also means we must have a
751 806 // fallback branch, for the case when the delta is longer than
752 807 // the original data (surprisingly, this does happen in practice)
753 808 let mut buf = Vec::with_capacity(cap);
754 809 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
755 810 Ok(_) => buf.shrink_to_fit(),
756 811 Err(_) => {
757 812 buf.clear();
758 813 zstd::stream::copy_decode(self.bytes, &mut buf)
759 814 .map_err(|e| corrupted(e.to_string()))?;
760 815 }
761 816 };
762 817 Ok(buf)
763 818 } else {
764 819 let mut buf = Vec::with_capacity(cap);
765 820 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
766 821 .map_err(|e| corrupted(e.to_string()))?;
767 822 if len != self.uncompressed_len as usize {
768 823 Err(corrupted("uncompressed length does not match"))
769 824 } else {
770 825 Ok(buf)
771 826 }
772 827 }
773 828 }
774 829
775 830 /// Tell if the entry is a snapshot or a delta
776 831 /// (influences on decompression).
777 832 fn is_delta(&self) -> bool {
778 833 self.base_rev_or_base_of_delta_chain.is_some()
779 834 }
780 835 }
781 836
782 837 /// Calculate the hash of a revision given its data and its parents.
783 838 fn hash(
784 839 data: &[u8],
785 840 p1_hash: &[u8],
786 841 p2_hash: &[u8],
787 842 ) -> [u8; NODE_BYTES_LENGTH] {
788 843 let mut hasher = Sha1::new();
789 844 let (a, b) = (p1_hash, p2_hash);
790 845 if a > b {
791 846 hasher.update(b);
792 847 hasher.update(a);
793 848 } else {
794 849 hasher.update(a);
795 850 hasher.update(b);
796 851 }
797 852 hasher.update(data);
798 853 *hasher.finalize().as_ref()
799 854 }
800 855
801 856 #[cfg(test)]
802 857 mod tests {
803 858 use super::*;
804 859 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
805 860 use itertools::Itertools;
806 861
807 862 #[test]
808 863 fn test_empty() {
809 864 let temp = tempfile::tempdir().unwrap();
810 865 let vfs = Vfs { base: temp.path() };
811 866 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
867 let revlog =
868 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
869 .unwrap();
813 870 assert!(revlog.is_empty());
814 871 assert_eq!(revlog.len(), 0);
815 872 assert!(revlog.get_entry(0.into()).is_err());
816 873 assert!(!revlog.has_rev(0.into()));
817 874 assert_eq!(
818 875 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
819 876 NULL_REVISION
820 877 );
821 878 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
822 879 assert_eq!(null_entry.revision(), NULL_REVISION);
823 880 assert!(null_entry.data().unwrap().is_empty());
824 881 }
825 882
826 883 #[test]
827 884 fn test_inline() {
828 885 let temp = tempfile::tempdir().unwrap();
829 886 let vfs = Vfs { base: temp.path() };
830 887 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
831 888 .unwrap();
832 889 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
833 890 .unwrap();
834 891 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
835 892 .unwrap();
836 893 let entry0_bytes = IndexEntryBuilder::new()
837 894 .is_first(true)
838 895 .with_version(1)
839 896 .with_inline(true)
840 897 .with_offset(INDEX_ENTRY_SIZE)
841 898 .with_node(node0)
842 899 .build();
843 900 let entry1_bytes = IndexEntryBuilder::new()
844 901 .with_offset(INDEX_ENTRY_SIZE)
845 902 .with_node(node1)
846 903 .build();
847 904 let entry2_bytes = IndexEntryBuilder::new()
848 905 .with_offset(INDEX_ENTRY_SIZE)
849 906 .with_p1(Revision(0))
850 907 .with_p2(Revision(1))
851 908 .with_node(node2)
852 909 .build();
853 910 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
854 911 .into_iter()
855 912 .flatten()
856 913 .collect_vec();
857 914 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
858 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
915 let revlog =
916 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
917 .unwrap();
859 918
860 919 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
861 920 assert_eq!(entry0.revision(), Revision(0));
862 921 assert_eq!(*entry0.node(), node0);
863 922 assert!(!entry0.has_p1());
864 923 assert_eq!(entry0.p1(), None);
865 924 assert_eq!(entry0.p2(), None);
866 925 let p1_entry = entry0.p1_entry().unwrap();
867 926 assert!(p1_entry.is_none());
868 927 let p2_entry = entry0.p2_entry().unwrap();
869 928 assert!(p2_entry.is_none());
870 929
871 930 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
872 931 assert_eq!(entry1.revision(), Revision(1));
873 932 assert_eq!(*entry1.node(), node1);
874 933 assert!(!entry1.has_p1());
875 934 assert_eq!(entry1.p1(), None);
876 935 assert_eq!(entry1.p2(), None);
877 936 let p1_entry = entry1.p1_entry().unwrap();
878 937 assert!(p1_entry.is_none());
879 938 let p2_entry = entry1.p2_entry().unwrap();
880 939 assert!(p2_entry.is_none());
881 940
882 941 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
883 942 assert_eq!(entry2.revision(), Revision(2));
884 943 assert_eq!(*entry2.node(), node2);
885 944 assert!(entry2.has_p1());
886 945 assert_eq!(entry2.p1(), Some(Revision(0)));
887 946 assert_eq!(entry2.p2(), Some(Revision(1)));
888 947 let p1_entry = entry2.p1_entry().unwrap();
889 948 assert!(p1_entry.is_some());
890 949 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
891 950 let p2_entry = entry2.p2_entry().unwrap();
892 951 assert!(p2_entry.is_some());
893 952 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
894 953 }
895 954
896 955 #[test]
897 956 fn test_nodemap() {
898 957 let temp = tempfile::tempdir().unwrap();
899 958 let vfs = Vfs { base: temp.path() };
900 959
901 960 // building a revlog with a forced Node starting with zeros
902 961 // This is a corruption, but it does not preclude using the nodemap
903 962 // if we don't try and access the data
904 963 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
905 964 .unwrap();
906 965 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
907 966 .unwrap();
908 967 let entry0_bytes = IndexEntryBuilder::new()
909 968 .is_first(true)
910 969 .with_version(1)
911 970 .with_inline(true)
912 971 .with_offset(INDEX_ENTRY_SIZE)
913 972 .with_node(node0)
914 973 .build();
915 974 let entry1_bytes = IndexEntryBuilder::new()
916 975 .with_offset(INDEX_ENTRY_SIZE)
917 976 .with_node(node1)
918 977 .build();
919 978 let contents = vec![entry0_bytes, entry1_bytes]
920 979 .into_iter()
921 980 .flatten()
922 981 .collect_vec();
923 982 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
924 983
925 984 let mut idx = nodemap::tests::TestNtIndex::new();
926 985 idx.insert_node(Revision(0), node0).unwrap();
927 986 idx.insert_node(Revision(1), node1).unwrap();
928 987
929 let revlog =
930 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
988 let revlog = Revlog::open_gen(
989 &vfs,
990 "foo.i",
991 None,
992 RevlogOpenOptions::new(),
993 Some(idx.nt),
994 )
995 .unwrap();
931 996
932 997 // accessing the data shows the corruption
933 998 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
934 999
935 1000 assert_eq!(
936 1001 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
937 1002 Revision(-1)
938 1003 );
939 1004 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
940 1005 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
941 1006 assert_eq!(
942 1007 revlog
943 1008 .rev_from_node(NodePrefix::from_hex("000").unwrap())
944 1009 .unwrap(),
945 1010 Revision(-1)
946 1011 );
947 1012 assert_eq!(
948 1013 revlog
949 1014 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
950 1015 .unwrap(),
951 1016 Revision(1)
952 1017 );
953 1018 // RevlogError does not implement PartialEq
954 1019 // (ultimately because io::Error does not)
955 1020 match revlog
956 1021 .rev_from_node(NodePrefix::from_hex("00").unwrap())
957 1022 .expect_err("Expected to give AmbiguousPrefix error")
958 1023 {
959 1024 RevlogError::AmbiguousPrefix => (),
960 1025 e => {
961 1026 panic!("Got another error than AmbiguousPrefix: {:?}", e);
962 1027 }
963 1028 };
964 1029 }
965 1030 }
@@ -1,561 +1,572 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 PyRevision,
12 12 };
13 13 use cpython::{
14 14 buffer::{Element, PyBuffer},
15 15 exc::{IndexError, ValueError},
16 16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
17 17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 18 };
19 19 use hg::{
20 index::IndexHeader,
20 21 nodemap::{Block, NodeMapError, NodeTree},
21 22 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
22 23 BaseRevision, Revision, UncheckedRevision,
23 24 };
24 25 use std::cell::RefCell;
25 26
26 27 /// Return a Struct implementing the Graph trait
27 28 pub(crate) fn pyindex_to_graph(
28 29 py: Python,
29 30 index: PyObject,
30 31 ) -> PyResult<cindex::Index> {
31 32 match index.extract::<MixedIndex>(py) {
32 33 Ok(midx) => Ok(midx.clone_cindex(py)),
33 34 Err(_) => cindex::Index::new(py, index),
34 35 }
35 36 }
36 37
37 38 py_class!(pub class MixedIndex |py| {
38 39 data cindex: RefCell<cindex::Index>;
39 40 data index: RefCell<hg::index::Index>;
40 41 data nt: RefCell<Option<NodeTree>>;
41 42 data docket: RefCell<Option<PyObject>>;
42 43 // Holds a reference to the mmap'ed persistent nodemap data
43 44 data nodemap_mmap: RefCell<Option<PyBuffer>>;
44 45 // Holds a reference to the mmap'ed persistent index data
45 46 data index_mmap: RefCell<Option<PyBuffer>>;
46 47
47 48 def __new__(
48 49 _cls,
49 50 cindex: PyObject,
50 data: PyObject
51 data: PyObject,
52 default_header: u32,
51 53 ) -> PyResult<MixedIndex> {
52 Self::new(py, cindex, data)
54 Self::new(py, cindex, data, default_header)
53 55 }
54 56
55 57 /// Compatibility layer used for Python consumers needing access to the C index
56 58 ///
57 59 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
58 60 /// that may need to build a custom `nodetree`, based on a specified revset.
59 61 /// With a Rust implementation of the nodemap, we will be able to get rid of
60 62 /// this, by exposing our own standalone nodemap class,
61 63 /// ready to accept `MixedIndex`.
62 64 def get_cindex(&self) -> PyResult<PyObject> {
63 65 Ok(self.cindex(py).borrow().inner().clone_ref(py))
64 66 }
65 67
66 68 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
67 69
68 70 /// Return Revision if found, raises a bare `error.RevlogError`
69 71 /// in case of ambiguity, same as C version does
70 72 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
71 73 let opt = self.get_nodetree(py)?.borrow();
72 74 let nt = opt.as_ref().unwrap();
73 75 let idx = &*self.cindex(py).borrow();
74 76 let node = node_from_py_bytes(py, &node)?;
75 77 let res = nt.find_bin(idx, node.into());
76 78 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
77 79 }
78 80
79 81 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
80 82 /// is not found.
81 83 ///
82 84 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
83 85 /// will catch and rewrap with it
84 86 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
85 87 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
86 88 }
87 89
88 90 /// return True if the node exist in the index
89 91 def has_node(&self, node: PyBytes) -> PyResult<bool> {
90 92 self.get_rev(py, node).map(|opt| opt.is_some())
91 93 }
92 94
93 95 /// find length of shortest hex nodeid of a binary ID
94 96 def shortest(&self, node: PyBytes) -> PyResult<usize> {
95 97 let opt = self.get_nodetree(py)?.borrow();
96 98 let nt = opt.as_ref().unwrap();
97 99 let idx = &*self.cindex(py).borrow();
98 100 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
99 101 {
100 102 Ok(Some(l)) => Ok(l),
101 103 Ok(None) => Err(revlog_error(py)),
102 104 Err(e) => Err(nodemap_error(py, e)),
103 105 }
104 106 }
105 107
106 108 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
107 109 let opt = self.get_nodetree(py)?.borrow();
108 110 let nt = opt.as_ref().unwrap();
109 111 let idx = &*self.cindex(py).borrow();
110 112
111 113 let node_as_string = if cfg!(feature = "python3-sys") {
112 114 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
113 115 }
114 116 else {
115 117 let node = node.extract::<PyBytes>(py)?;
116 118 String::from_utf8_lossy(node.data(py)).to_string()
117 119 };
118 120
119 121 let prefix = NodePrefix::from_hex(&node_as_string)
120 122 .map_err(|_| PyErr::new::<ValueError, _>(
121 123 py, format!("Invalid node or prefix '{}'", node_as_string))
122 124 )?;
123 125
124 126 nt.find_bin(idx, prefix)
125 127 // TODO make an inner API returning the node directly
126 128 .map(|opt| opt.map(
127 129 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
128 130 .map_err(|e| nodemap_error(py, e))
129 131
130 132 }
131 133
132 134 /// append an index entry
133 135 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
134 136 if tup.len(py) < 8 {
135 137 // this is better than the panic promised by tup.get_item()
136 138 return Err(
137 139 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
138 140 }
139 141 let node_bytes = tup.get_item(py, 7).extract(py)?;
140 142 let node = node_from_py_object(py, &node_bytes)?;
141 143
142 144 let mut idx = self.cindex(py).borrow_mut();
143 145
144 146 // This is ok since we will just add the revision to the index
145 147 let rev = Revision(idx.len() as BaseRevision);
146 148 idx.append(py, tup)?;
147 149
148 150 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
149 151 .insert(&*idx, &node, rev)
150 152 .map_err(|e| nodemap_error(py, e))?;
151 153 Ok(py.None())
152 154 }
153 155
154 156 def __delitem__(&self, key: PyObject) -> PyResult<()> {
155 157 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
156 158 self.cindex(py).borrow().inner().del_item(py, key)?;
157 159 let mut opt = self.get_nodetree(py)?.borrow_mut();
158 160 let nt = opt.as_mut().unwrap();
159 161 nt.invalidate_all();
160 162 self.fill_nodemap(py, nt)?;
161 163 Ok(())
162 164 }
163 165
164 166 //
165 167 // Reforwarded C index API
166 168 //
167 169
168 170 // index_methods (tp_methods). Same ordering as in revlog.c
169 171
170 172 /// return the gca set of the given revs
171 173 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
172 174 self.call_cindex(py, "ancestors", args, kw)
173 175 }
174 176
175 177 /// return the heads of the common ancestors of the given revs
176 178 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
177 179 self.call_cindex(py, "commonancestorsheads", args, kw)
178 180 }
179 181
180 182 /// Clear the index caches and inner py_class data.
181 183 /// It is Python's responsibility to call `update_nodemap_data` again.
182 184 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
183 185 self.nt(py).borrow_mut().take();
184 186 self.docket(py).borrow_mut().take();
185 187 self.nodemap_mmap(py).borrow_mut().take();
186 188 self.call_cindex(py, "clearcaches", args, kw)
187 189 }
188 190
189 191 /// return the raw binary string representing a revision
190 192 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
191 193 self.call_cindex(py, "entry_binary", args, kw)
192 194 }
193 195
194 196 /// return a binary packed version of the header
195 197 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
196 198 self.call_cindex(py, "pack_header", args, kw)
197 199 }
198 200
199 201 /// get an index entry
200 202 def get(&self, *args, **kw) -> PyResult<PyObject> {
201 203 self.call_cindex(py, "get", args, kw)
202 204 }
203 205
204 206 /// compute phases
205 207 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
206 208 self.call_cindex(py, "computephasesmapsets", args, kw)
207 209 }
208 210
209 211 /// reachableroots
210 212 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
211 213 self.call_cindex(py, "reachableroots2", args, kw)
212 214 }
213 215
214 216 /// get head revisions
215 217 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
216 218 self.call_cindex(py, "headrevs", args, kw)
217 219 }
218 220
219 221 /// get filtered head revisions
220 222 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
221 223 self.call_cindex(py, "headrevsfiltered", args, kw)
222 224 }
223 225
224 226 /// True if the object is a snapshot
225 227 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
226 228 self.call_cindex(py, "issnapshot", args, kw)
227 229 }
228 230
229 231 /// Gather snapshot data in a cache dict
230 232 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
231 233 self.call_cindex(py, "findsnapshots", args, kw)
232 234 }
233 235
234 236 /// determine revisions with deltas to reconstruct fulltext
235 237 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
236 238 self.call_cindex(py, "deltachain", args, kw)
237 239 }
238 240
239 241 /// slice planned chunk read to reach a density threshold
240 242 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
241 243 self.call_cindex(py, "slicechunktodensity", args, kw)
242 244 }
243 245
244 246 /// stats for the index
245 247 def stats(&self, *args, **kw) -> PyResult<PyObject> {
246 248 self.call_cindex(py, "stats", args, kw)
247 249 }
248 250
249 251 // index_sequence_methods and index_mapping_methods.
250 252 //
251 253 // Since we call back through the high level Python API,
252 254 // there's no point making a distinction between index_get
253 255 // and index_getitem.
254 256
255 257 def __len__(&self) -> PyResult<usize> {
256 258 self.cindex(py).borrow().inner().len(py)
257 259 }
258 260
259 261 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
260 262 // this conversion seems needless, but that's actually because
261 263 // `index_getitem` does not handle conversion from PyLong,
262 264 // which expressions such as [e for e in index] internally use.
263 265 // Note that we don't seem to have a direct way to call
264 266 // PySequence_GetItem (does the job), which would possibly be better
265 267 // for performance
266 268 let key = match key.extract::<i32>(py) {
267 269 Ok(rev) => rev.to_py_object(py).into_object(),
268 270 Err(_) => key,
269 271 };
270 272 self.cindex(py).borrow().inner().get_item(py, key)
271 273 }
272 274
273 275 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
274 276 self.cindex(py).borrow().inner().set_item(py, key, value)
275 277 }
276 278
277 279 def __contains__(&self, item: PyObject) -> PyResult<bool> {
278 280 // ObjectProtocol does not seem to provide contains(), so
279 281 // this is an equivalent implementation of the index_contains()
280 282 // defined in revlog.c
281 283 let cindex = self.cindex(py).borrow();
282 284 match item.extract::<i32>(py) {
283 285 Ok(rev) => {
284 286 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
285 287 }
286 288 Err(_) => {
287 289 cindex.inner().call_method(
288 290 py,
289 291 "has_node",
290 292 PyTuple::new(py, &[item]),
291 293 None)?
292 294 .extract(py)
293 295 }
294 296 }
295 297 }
296 298
297 299 def nodemap_data_all(&self) -> PyResult<PyBytes> {
298 300 self.inner_nodemap_data_all(py)
299 301 }
300 302
301 303 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
302 304 self.inner_nodemap_data_incremental(py)
303 305 }
304 306 def update_nodemap_data(
305 307 &self,
306 308 docket: PyObject,
307 309 nm_data: PyObject
308 310 ) -> PyResult<PyObject> {
309 311 self.inner_update_nodemap_data(py, docket, nm_data)
310 312 }
311 313
312 314 @property
313 315 def entry_size(&self) -> PyResult<PyInt> {
314 316 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
315 317 }
316 318
317 319 @property
318 320 def rust_ext_compat(&self) -> PyResult<PyInt> {
319 321 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
320 322 }
321 323
322 324 });
323 325
324 326 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
325 327 /// buffer along with the Rust slice into said buffer. We need to keep the
326 328 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
327 329 /// is freed from Python's side.
328 330 ///
329 331 /// # Safety
330 332 ///
331 333 /// The caller must make sure that the buffer is kept around for at least as
332 334 /// long as the slice.
333 335 #[deny(unsafe_op_in_unsafe_fn)]
334 336 unsafe fn mmap_keeparound(
335 337 py: Python,
336 338 data: PyObject,
337 339 ) -> PyResult<(
338 340 PyBuffer,
339 341 Box<dyn std::ops::Deref<Target = [u8]> + Send + 'static>,
340 342 )> {
341 343 let buf = PyBuffer::get(py, &data)?;
342 344 let len = buf.item_count();
343 345
344 346 // Build a slice from the mmap'ed buffer data
345 347 let cbuf = buf.buf_ptr();
346 348 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
347 349 && buf.is_c_contiguous()
348 350 && u8::is_compatible_format(buf.format())
349 351 {
350 352 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
351 353 } else {
352 354 return Err(PyErr::new::<ValueError, _>(
353 355 py,
354 356 "Nodemap data buffer has an invalid memory representation"
355 357 .to_string(),
356 358 ));
357 359 };
358 360
359 361 Ok((buf, Box::new(bytes)))
360 362 }
361 363
362 364 impl MixedIndex {
363 365 fn new(
364 366 py: Python,
365 367 cindex: PyObject,
366 368 data: PyObject,
369 header: u32,
367 370 ) -> PyResult<MixedIndex> {
368 371 // Safety: we keep the buffer around inside the class as `index_mmap`
369 372 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
370 373
371 374 Self::create_instance(
372 375 py,
373 376 RefCell::new(cindex::Index::new(py, cindex)?),
374 RefCell::new(hg::index::Index::new(bytes).unwrap()),
377 RefCell::new(
378 hg::index::Index::new(
379 bytes,
380 IndexHeader::parse(&header.to_be_bytes())
381 .expect("default header is broken")
382 .unwrap(),
383 )
384 .unwrap(),
385 ),
375 386 RefCell::new(None),
376 387 RefCell::new(None),
377 388 RefCell::new(None),
378 389 RefCell::new(Some(buf)),
379 390 )
380 391 }
381 392
382 393 /// This is scaffolding at this point, but it could also become
383 394 /// a way to start a persistent nodemap or perform a
384 395 /// vacuum / repack operation
385 396 fn fill_nodemap(
386 397 &self,
387 398 py: Python,
388 399 nt: &mut NodeTree,
389 400 ) -> PyResult<PyObject> {
390 401 let index = self.cindex(py).borrow();
391 402 for r in 0..index.len() {
392 403 let rev = Revision(r as BaseRevision);
393 404 // in this case node() won't ever return None
394 405 nt.insert(&*index, index.node(rev).unwrap(), rev)
395 406 .map_err(|e| nodemap_error(py, e))?
396 407 }
397 408 Ok(py.None())
398 409 }
399 410
400 411 fn get_nodetree<'a>(
401 412 &'a self,
402 413 py: Python<'a>,
403 414 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
404 415 if self.nt(py).borrow().is_none() {
405 416 let readonly = Box::<Vec<_>>::default();
406 417 let mut nt = NodeTree::load_bytes(readonly, 0);
407 418 self.fill_nodemap(py, &mut nt)?;
408 419 self.nt(py).borrow_mut().replace(nt);
409 420 }
410 421 Ok(self.nt(py))
411 422 }
412 423
413 424 /// forward a method call to the underlying C index
414 425 fn call_cindex(
415 426 &self,
416 427 py: Python,
417 428 name: &str,
418 429 args: &PyTuple,
419 430 kwargs: Option<&PyDict>,
420 431 ) -> PyResult<PyObject> {
421 432 self.cindex(py)
422 433 .borrow()
423 434 .inner()
424 435 .call_method(py, name, args, kwargs)
425 436 }
426 437
427 438 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
428 439 self.cindex(py).borrow().clone_ref(py)
429 440 }
430 441
431 442 /// Returns the full nodemap bytes to be written as-is to disk
432 443 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
433 444 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
434 445 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
435 446
436 447 // If there's anything readonly, we need to build the data again from
437 448 // scratch
438 449 let bytes = if readonly.len() > 0 {
439 450 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
440 451 self.fill_nodemap(py, &mut nt)?;
441 452
442 453 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
443 454 assert_eq!(readonly.len(), 0);
444 455
445 456 bytes
446 457 } else {
447 458 bytes
448 459 };
449 460
450 461 let bytes = PyBytes::new(py, &bytes);
451 462 Ok(bytes)
452 463 }
453 464
454 465 /// Returns the last saved docket along with the size of any changed data
455 466 /// (in number of blocks), and said data as bytes.
456 467 fn inner_nodemap_data_incremental(
457 468 &self,
458 469 py: Python,
459 470 ) -> PyResult<PyObject> {
460 471 let docket = self.docket(py).borrow();
461 472 let docket = match docket.as_ref() {
462 473 Some(d) => d,
463 474 None => return Ok(py.None()),
464 475 };
465 476
466 477 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
467 478 let masked_blocks = node_tree.masked_readonly_blocks();
468 479 let (_, data) = node_tree.into_readonly_and_added_bytes();
469 480 let changed = masked_blocks * std::mem::size_of::<Block>();
470 481
471 482 Ok((docket, changed, PyBytes::new(py, &data))
472 483 .to_py_object(py)
473 484 .into_object())
474 485 }
475 486
476 487 /// Update the nodemap from the new (mmaped) data.
477 488 /// The docket is kept as a reference for later incremental calls.
478 489 fn inner_update_nodemap_data(
479 490 &self,
480 491 py: Python,
481 492 docket: PyObject,
482 493 nm_data: PyObject,
483 494 ) -> PyResult<PyObject> {
484 495 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
485 496 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
486 497 let len = buf.item_count();
487 498 self.nodemap_mmap(py).borrow_mut().replace(buf);
488 499
489 500 let mut nt = NodeTree::load_bytes(bytes, len);
490 501
491 502 let data_tip = docket
492 503 .getattr(py, "tip_rev")?
493 504 .extract::<BaseRevision>(py)?
494 505 .into();
495 506 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
496 507 let idx = self.cindex(py).borrow();
497 508 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
498 509 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
499 510 })?;
500 511 let current_tip = idx.len();
501 512
502 513 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
503 514 let rev = Revision(r);
504 515 // in this case node() won't ever return None
505 516 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
506 517 .map_err(|e| nodemap_error(py, e))?
507 518 }
508 519
509 520 *self.nt(py).borrow_mut() = Some(nt);
510 521
511 522 Ok(py.None())
512 523 }
513 524 }
514 525
515 526 fn revlog_error(py: Python) -> PyErr {
516 527 match py
517 528 .import("mercurial.error")
518 529 .and_then(|m| m.get(py, "RevlogError"))
519 530 {
520 531 Err(e) => e,
521 532 Ok(cls) => PyErr::from_instance(
522 533 py,
523 534 cls.call(py, (py.None(),), None).ok().into_py_object(py),
524 535 ),
525 536 }
526 537 }
527 538
528 539 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
529 540 PyErr::new::<ValueError, _>(
530 541 py,
531 542 format!(
532 543 "Inconsistency: Revision {} found in nodemap \
533 544 is not in revlog index",
534 545 rev
535 546 ),
536 547 )
537 548 }
538 549
539 550 /// Standard treatment of NodeMapError
540 551 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
541 552 match err {
542 553 NodeMapError::MultipleResults => revlog_error(py),
543 554 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
544 555 }
545 556 }
546 557
547 558 /// Create the module, with __package__ given from parent
548 559 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
549 560 let dotted_name = &format!("{}.revlog", package);
550 561 let m = PyModule::new(py, dotted_name)?;
551 562 m.add(py, "__package__", package)?;
552 563 m.add(py, "__doc__", "RevLog - Rust implementations")?;
553 564
554 565 m.add_class::<MixedIndex>(py)?;
555 566
556 567 let sys = PyModule::import(py, "sys")?;
557 568 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
558 569 sys_modules.set_item(py, dotted_name, &m)?;
559 570
560 571 Ok(m)
561 572 }
@@ -1,815 +1,822 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::error::CommandError;
9 9 use crate::ui::{
10 10 format_pattern_file_warning, print_narrow_sparse_warnings, relative_paths,
11 11 RelativePaths, Ui,
12 12 };
13 13 use crate::utils::path_utils::RelativizePaths;
14 14 use clap::Arg;
15 15 use format_bytes::format_bytes;
16 16 use hg::config::Config;
17 17 use hg::dirstate::has_exec_bit;
18 18 use hg::dirstate::status::StatusPath;
19 19 use hg::dirstate::TruncatedTimestamp;
20 20 use hg::errors::{HgError, IoResultExt};
21 21 use hg::filepatterns::parse_pattern_args;
22 22 use hg::lock::LockError;
23 23 use hg::manifest::Manifest;
24 24 use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
25 25 use hg::repo::Repo;
26 26 use hg::utils::debug::debug_wait_for_file;
27 27 use hg::utils::files::{
28 28 get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
29 29 };
30 30 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
31 use hg::DirstateStatus;
32 31 use hg::PatternFileWarning;
33 32 use hg::Revision;
34 33 use hg::StatusError;
35 34 use hg::StatusOptions;
36 35 use hg::{self, narrow, sparse};
36 use hg::{DirstateStatus, RevlogOpenOptions};
37 37 use log::info;
38 38 use rayon::prelude::*;
39 39 use std::borrow::Cow;
40 40 use std::io;
41 41 use std::mem::take;
42 42 use std::path::PathBuf;
43 43
44 44 pub const HELP_TEXT: &str = "
45 45 Show changed files in the working directory
46 46
47 47 This is a pure Rust version of `hg status`.
48 48
49 49 Some options might be missing, check the list below.
50 50 ";
51 51
52 52 pub fn args() -> clap::Command {
53 53 clap::command!("status")
54 54 .alias("st")
55 55 .about(HELP_TEXT)
56 56 .arg(
57 57 Arg::new("file")
58 58 .value_parser(clap::value_parser!(std::ffi::OsString))
59 59 .help("show only these files")
60 60 .action(clap::ArgAction::Append),
61 61 )
62 62 .arg(
63 63 Arg::new("all")
64 64 .help("show status of all files")
65 65 .short('A')
66 66 .action(clap::ArgAction::SetTrue)
67 67 .long("all"),
68 68 )
69 69 .arg(
70 70 Arg::new("modified")
71 71 .help("show only modified files")
72 72 .short('m')
73 73 .action(clap::ArgAction::SetTrue)
74 74 .long("modified"),
75 75 )
76 76 .arg(
77 77 Arg::new("added")
78 78 .help("show only added files")
79 79 .short('a')
80 80 .action(clap::ArgAction::SetTrue)
81 81 .long("added"),
82 82 )
83 83 .arg(
84 84 Arg::new("removed")
85 85 .help("show only removed files")
86 86 .short('r')
87 87 .action(clap::ArgAction::SetTrue)
88 88 .long("removed"),
89 89 )
90 90 .arg(
91 91 Arg::new("clean")
92 92 .help("show only clean files")
93 93 .short('c')
94 94 .action(clap::ArgAction::SetTrue)
95 95 .long("clean"),
96 96 )
97 97 .arg(
98 98 Arg::new("deleted")
99 99 .help("show only deleted files")
100 100 .short('d')
101 101 .action(clap::ArgAction::SetTrue)
102 102 .long("deleted"),
103 103 )
104 104 .arg(
105 105 Arg::new("unknown")
106 106 .help("show only unknown (not tracked) files")
107 107 .short('u')
108 108 .action(clap::ArgAction::SetTrue)
109 109 .long("unknown"),
110 110 )
111 111 .arg(
112 112 Arg::new("ignored")
113 113 .help("show only ignored files")
114 114 .short('i')
115 115 .action(clap::ArgAction::SetTrue)
116 116 .long("ignored"),
117 117 )
118 118 .arg(
119 119 Arg::new("copies")
120 120 .help("show source of copied files (DEFAULT: ui.statuscopies)")
121 121 .short('C')
122 122 .action(clap::ArgAction::SetTrue)
123 123 .long("copies"),
124 124 )
125 125 .arg(
126 126 Arg::new("print0")
127 127 .help("end filenames with NUL, for use with xargs")
128 128 .short('0')
129 129 .action(clap::ArgAction::SetTrue)
130 130 .long("print0"),
131 131 )
132 132 .arg(
133 133 Arg::new("no-status")
134 134 .help("hide status prefix")
135 135 .short('n')
136 136 .action(clap::ArgAction::SetTrue)
137 137 .long("no-status"),
138 138 )
139 139 .arg(
140 140 Arg::new("verbose")
141 141 .help("enable additional output")
142 142 .short('v')
143 143 .action(clap::ArgAction::SetTrue)
144 144 .long("verbose"),
145 145 )
146 146 .arg(
147 147 Arg::new("rev")
148 148 .help("show difference from/to revision")
149 149 .long("rev")
150 150 .num_args(1)
151 151 .action(clap::ArgAction::Append)
152 152 .value_name("REV"),
153 153 )
154 154 }
155 155
156 156 fn parse_revpair(
157 157 repo: &Repo,
158 158 revs: Option<Vec<String>>,
159 159 ) -> Result<Option<(Revision, Revision)>, CommandError> {
160 160 let revs = match revs {
161 161 None => return Ok(None),
162 162 Some(revs) => revs,
163 163 };
164 164 if revs.is_empty() {
165 165 return Ok(None);
166 166 }
167 167 if revs.len() != 2 {
168 168 return Err(CommandError::unsupported("expected 0 or 2 --rev flags"));
169 169 }
170 170
171 171 let rev1 = &revs[0];
172 172 let rev2 = &revs[1];
173 173 let rev1 = hg::revset::resolve_single(rev1, repo)
174 174 .map_err(|e| (e, rev1.as_str()))?;
175 175 let rev2 = hg::revset::resolve_single(rev2, repo)
176 176 .map_err(|e| (e, rev2.as_str()))?;
177 177 Ok(Some((rev1, rev2)))
178 178 }
179 179
180 180 /// Pure data type allowing the caller to specify file states to display
181 181 #[derive(Copy, Clone, Debug)]
182 182 pub struct DisplayStates {
183 183 pub modified: bool,
184 184 pub added: bool,
185 185 pub removed: bool,
186 186 pub clean: bool,
187 187 pub deleted: bool,
188 188 pub unknown: bool,
189 189 pub ignored: bool,
190 190 }
191 191
192 192 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
193 193 modified: true,
194 194 added: true,
195 195 removed: true,
196 196 clean: false,
197 197 deleted: true,
198 198 unknown: true,
199 199 ignored: false,
200 200 };
201 201
202 202 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
203 203 modified: true,
204 204 added: true,
205 205 removed: true,
206 206 clean: true,
207 207 deleted: true,
208 208 unknown: true,
209 209 ignored: true,
210 210 };
211 211
212 212 impl DisplayStates {
213 213 pub fn is_empty(&self) -> bool {
214 214 !(self.modified
215 215 || self.added
216 216 || self.removed
217 217 || self.clean
218 218 || self.deleted
219 219 || self.unknown
220 220 || self.ignored)
221 221 }
222 222 }
223 223
224 224 fn has_unfinished_merge(repo: &Repo) -> Result<bool, CommandError> {
225 225 Ok(repo.dirstate_parents()?.is_merge())
226 226 }
227 227
228 228 fn has_unfinished_state(repo: &Repo) -> Result<bool, CommandError> {
229 229 // These are all the known values for the [fname] argument of
230 230 // [addunfinished] function in [state.py]
231 231 let known_state_files: &[&str] = &[
232 232 "bisect.state",
233 233 "graftstate",
234 234 "histedit-state",
235 235 "rebasestate",
236 236 "shelvedstate",
237 237 "transplant/journal",
238 238 "updatestate",
239 239 ];
240 240 if has_unfinished_merge(repo)? {
241 241 return Ok(true);
242 242 };
243 243 for f in known_state_files {
244 244 if repo.hg_vfs().join(f).exists() {
245 245 return Ok(true);
246 246 }
247 247 }
248 248 Ok(false)
249 249 }
250 250
251 251 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
252 252 // TODO: lift these limitations
253 253 if invocation
254 254 .config
255 255 .get(b"commands", b"status.terse")
256 256 .is_some()
257 257 {
258 258 return Err(CommandError::unsupported(
259 259 "status.terse is not yet supported with rhg status",
260 260 ));
261 261 }
262 262
263 263 let ui = invocation.ui;
264 264 let config = invocation.config;
265 265 let args = invocation.subcommand_args;
266 266
267 267 let revs = args.get_many::<String>("rev");
268 268 let print0 = args.get_flag("print0");
269 269 let verbose = args.get_flag("verbose")
270 270 || config.get_bool(b"ui", b"verbose")?
271 271 || config.get_bool(b"commands", b"status.verbose")?;
272 272 let verbose = verbose && !print0;
273 273
274 274 let all = args.get_flag("all");
275 275 let display_states = if all {
276 276 // TODO when implementing `--quiet`: it excludes clean files
277 277 // from `--all`
278 278 ALL_DISPLAY_STATES
279 279 } else {
280 280 let requested = DisplayStates {
281 281 modified: args.get_flag("modified"),
282 282 added: args.get_flag("added"),
283 283 removed: args.get_flag("removed"),
284 284 clean: args.get_flag("clean"),
285 285 deleted: args.get_flag("deleted"),
286 286 unknown: args.get_flag("unknown"),
287 287 ignored: args.get_flag("ignored"),
288 288 };
289 289 if requested.is_empty() {
290 290 DEFAULT_DISPLAY_STATES
291 291 } else {
292 292 requested
293 293 }
294 294 };
295 295 let no_status = args.get_flag("no-status");
296 296 let list_copies = all
297 297 || args.get_flag("copies")
298 298 || config.get_bool(b"ui", b"statuscopies")?;
299 299
300 300 let repo = invocation.repo?;
301 301 let revpair = parse_revpair(repo, revs.map(|i| i.cloned().collect()))?;
302 302
303 303 if verbose && has_unfinished_state(repo)? {
304 304 return Err(CommandError::unsupported(
305 305 "verbose status output is not supported by rhg (and is needed because we're in an unfinished operation)",
306 306 ));
307 307 }
308 308
309 309 let mut dmap = repo.dirstate_map_mut()?;
310 310
311 311 let check_exec = hg::checkexec::check_exec(repo.working_directory_path());
312 312
313 313 let options = StatusOptions {
314 314 check_exec,
315 315 list_clean: display_states.clean,
316 316 list_unknown: display_states.unknown,
317 317 list_ignored: display_states.ignored,
318 318 list_copies,
319 319 collect_traversed_dirs: false,
320 320 };
321 321
322 322 type StatusResult<'a> =
323 323 Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>;
324 324
325 325 let relative_status = config
326 326 .get_option(b"commands", b"status.relative")?
327 327 .expect("commands.status.relative should have a default value");
328 328
329 329 let relativize_paths = relative_status || {
330 330 // See in Python code with `getuipathfn` usage in `commands.py`.
331 331 let legacy_relative_behavior = args.contains_id("file");
332 332 match relative_paths(invocation.config)? {
333 333 RelativePaths::Legacy => legacy_relative_behavior,
334 334 RelativePaths::Bool(v) => v,
335 335 }
336 336 };
337 337
338 338 let mut output = DisplayStatusPaths {
339 339 ui,
340 340 no_status,
341 341 relativize: if relativize_paths {
342 342 Some(RelativizePaths::new(repo)?)
343 343 } else {
344 344 None
345 345 },
346 346 print0,
347 347 };
348 348
349 349 let after_status = |res: StatusResult| -> Result<_, CommandError> {
350 350 let (mut ds_status, pattern_warnings) = res?;
351 351 for warning in pattern_warnings {
352 352 ui.write_stderr(&format_pattern_file_warning(&warning, repo))?;
353 353 }
354 354
355 355 for (path, error) in take(&mut ds_status.bad) {
356 356 let error = match error {
357 357 hg::BadMatch::OsError(code) => {
358 358 std::io::Error::from_raw_os_error(code).to_string()
359 359 }
360 360 hg::BadMatch::BadType(ty) => {
361 361 format!("unsupported file type (type is {})", ty)
362 362 }
363 363 };
364 364 ui.write_stderr(&format_bytes!(
365 365 b"{}: {}\n",
366 366 path.as_bytes(),
367 367 error.as_bytes()
368 368 ))?
369 369 }
370 370 if !ds_status.unsure.is_empty() {
371 371 info!(
372 372 "Files to be rechecked by retrieval from filelog: {:?}",
373 373 ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
374 374 );
375 375 }
376 376 let mut fixup = Vec::new();
377 377 if !ds_status.unsure.is_empty()
378 378 && (display_states.modified || display_states.clean)
379 379 {
380 380 let p1 = repo.dirstate_parents()?.p1;
381 381 let manifest = repo.manifest_for_node(p1).map_err(|e| {
382 382 CommandError::from((e, &*format!("{:x}", p1.short())))
383 383 })?;
384 384 let working_directory_vfs = repo.working_directory_vfs();
385 385 let store_vfs = repo.store_vfs();
386 let revlog_open_options = repo.default_revlog_options(false)?;
386 387 let res: Vec<_> = take(&mut ds_status.unsure)
387 388 .into_par_iter()
388 389 .map(|to_check| {
389 390 // The compiler seems to get a bit confused with complex
390 391 // inference when using a parallel iterator + map
391 392 // + map_err + collect, so let's just inline some of the
392 393 // logic.
393 394 match unsure_is_modified(
394 395 working_directory_vfs,
395 396 store_vfs,
396 397 check_exec,
397 398 &manifest,
398 399 &to_check.path,
400 revlog_open_options,
399 401 ) {
400 402 Err(HgError::IoError { .. }) => {
401 403 // IO errors most likely stem from the file being
402 404 // deleted even though we know it's in the
403 405 // dirstate.
404 406 Ok((to_check, UnsureOutcome::Deleted))
405 407 }
406 408 Ok(outcome) => Ok((to_check, outcome)),
407 409 Err(e) => Err(e),
408 410 }
409 411 })
410 412 .collect::<Result<_, _>>()?;
411 413 for (status_path, outcome) in res.into_iter() {
412 414 match outcome {
413 415 UnsureOutcome::Clean => {
414 416 if display_states.clean {
415 417 ds_status.clean.push(status_path.clone());
416 418 }
417 419 fixup.push(status_path.path.into_owned())
418 420 }
419 421 UnsureOutcome::Modified => {
420 422 if display_states.modified {
421 423 ds_status.modified.push(status_path);
422 424 }
423 425 }
424 426 UnsureOutcome::Deleted => {
425 427 if display_states.deleted {
426 428 ds_status.deleted.push(status_path);
427 429 }
428 430 }
429 431 }
430 432 }
431 433 }
432 434
433 435 let dirstate_write_needed = ds_status.dirty;
434 436 let filesystem_time_at_status_start =
435 437 ds_status.filesystem_time_at_status_start;
436 438
437 439 output.output(display_states, ds_status)?;
438 440
439 441 Ok((
440 442 fixup,
441 443 dirstate_write_needed,
442 444 filesystem_time_at_status_start,
443 445 ))
444 446 };
445 447 let (narrow_matcher, narrow_warnings) = narrow::matcher(repo)?;
446 448
447 449 match revpair {
448 450 Some((rev1, rev2)) => {
449 451 let mut ds_status = DirstateStatus::default();
450 452 if list_copies {
451 453 return Err(CommandError::unsupported(
452 454 "status --rev --rev with copy information is not implemented yet",
453 455 ));
454 456 }
455 457
456 458 let stat = hg::operations::status_rev_rev_no_copies(
457 459 repo,
458 460 rev1,
459 461 rev2,
460 462 narrow_matcher,
461 463 )?;
462 464 for entry in stat.iter() {
463 465 let (path, status) = entry?;
464 466 let path = StatusPath {
465 467 path: Cow::Borrowed(path),
466 468 copy_source: None,
467 469 };
468 470 match status {
469 471 hg::operations::DiffStatus::Removed => {
470 472 if display_states.removed {
471 473 ds_status.removed.push(path)
472 474 }
473 475 }
474 476 hg::operations::DiffStatus::Added => {
475 477 if display_states.added {
476 478 ds_status.added.push(path)
477 479 }
478 480 }
479 481 hg::operations::DiffStatus::Modified => {
480 482 if display_states.modified {
481 483 ds_status.modified.push(path)
482 484 }
483 485 }
484 486 hg::operations::DiffStatus::Matching => {
485 487 if display_states.clean {
486 488 ds_status.clean.push(path)
487 489 }
488 490 }
489 491 }
490 492 }
491 493 output.output(display_states, ds_status)?;
492 494 return Ok(());
493 495 }
494 496 None => (),
495 497 }
496 498
497 499 let (sparse_matcher, sparse_warnings) = sparse::matcher(repo)?;
498 500 let matcher = match (repo.has_narrow(), repo.has_sparse()) {
499 501 (true, true) => {
500 502 Box::new(IntersectionMatcher::new(narrow_matcher, sparse_matcher))
501 503 }
502 504 (true, false) => narrow_matcher,
503 505 (false, true) => sparse_matcher,
504 506 (false, false) => Box::new(AlwaysMatcher),
505 507 };
506 508 let matcher = match args.get_many::<std::ffi::OsString>("file") {
507 509 None => matcher,
508 510 Some(files) => {
509 511 let patterns: Vec<Vec<u8>> = files
510 512 .filter(|s| !s.is_empty())
511 513 .map(get_bytes_from_os_str)
512 514 .collect();
513 515 for file in &patterns {
514 516 if file.starts_with(b"set:") {
515 517 return Err(CommandError::unsupported("fileset"));
516 518 }
517 519 }
518 520 let cwd = hg::utils::current_dir()?;
519 521 let root = repo.working_directory_path();
520 522 let ignore_patterns = parse_pattern_args(patterns, &cwd, root)?;
521 523 let files_matcher =
522 524 hg::matchers::PatternMatcher::new(ignore_patterns)?;
523 525 Box::new(IntersectionMatcher::new(
524 526 Box::new(files_matcher),
525 527 matcher,
526 528 ))
527 529 }
528 530 };
529 531
530 532 print_narrow_sparse_warnings(
531 533 &narrow_warnings,
532 534 &sparse_warnings,
533 535 ui,
534 536 repo,
535 537 )?;
536 538 let (fixup, mut dirstate_write_needed, filesystem_time_at_status_start) =
537 539 dmap.with_status(
538 540 matcher.as_ref(),
539 541 repo.working_directory_path().to_owned(),
540 542 ignore_files(repo, config),
541 543 options,
542 544 after_status,
543 545 )?;
544 546
545 547 // Development config option to test write races
546 548 if let Err(e) =
547 549 debug_wait_for_file(config, "status.pre-dirstate-write-file")
548 550 {
549 551 ui.write_stderr(e.as_bytes()).ok();
550 552 }
551 553
552 554 if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
553 555 && !dirstate_write_needed
554 556 {
555 557 // Nothing to update
556 558 return Ok(());
557 559 }
558 560
559 561 // Update the dirstate on disk if we can
560 562 let with_lock_result =
561 563 repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
562 564 if let Some(mtime_boundary) = filesystem_time_at_status_start {
563 565 for hg_path in fixup {
564 566 use std::os::unix::fs::MetadataExt;
565 567 let fs_path = hg_path_to_path_buf(&hg_path)
566 568 .expect("HgPath conversion");
567 569 // Specifically do not reuse `fs_metadata` from
568 570 // `unsure_is_clean` which was needed before reading
569 571 // contents. Here we access metadata again after reading
570 572 // content, in case it changed in the meantime.
571 573 let metadata_res = repo
572 574 .working_directory_vfs()
573 575 .symlink_metadata(&fs_path);
574 576 let fs_metadata = match metadata_res {
575 577 Ok(meta) => meta,
576 578 Err(err) => match err {
577 579 HgError::IoError { .. } => {
578 580 // The file has probably been deleted. In any
579 581 // case, it was in the dirstate before, so
580 582 // let's ignore the error.
581 583 continue;
582 584 }
583 585 _ => return Err(err.into()),
584 586 },
585 587 };
586 588 if let Some(mtime) =
587 589 TruncatedTimestamp::for_reliable_mtime_of(
588 590 &fs_metadata,
589 591 &mtime_boundary,
590 592 )
591 593 .when_reading_file(&fs_path)?
592 594 {
593 595 let mode = fs_metadata.mode();
594 596 let size = fs_metadata.len();
595 597 dmap.set_clean(&hg_path, mode, size as u32, mtime)?;
596 598 dirstate_write_needed = true
597 599 }
598 600 }
599 601 }
600 602 drop(dmap); // Avoid "already mutably borrowed" RefCell panics
601 603 if dirstate_write_needed {
602 604 repo.write_dirstate()?
603 605 }
604 606 Ok(())
605 607 });
606 608 match with_lock_result {
607 609 Ok(closure_result) => closure_result?,
608 610 Err(LockError::AlreadyHeld) => {
609 611 // Not updating the dirstate is not ideal but not critical:
610 612 // don’t keep our caller waiting until some other Mercurial
611 613 // process releases the lock.
612 614 log::info!("not writing dirstate from `status`: lock is held")
613 615 }
614 616 Err(LockError::Other(HgError::IoError { error, .. }))
615 617 if error.kind() == io::ErrorKind::PermissionDenied =>
616 618 {
617 619 // `hg status` on a read-only repository is fine
618 620 }
619 621 Err(LockError::Other(error)) => {
620 622 // Report other I/O errors
621 623 Err(error)?
622 624 }
623 625 }
624 626 Ok(())
625 627 }
626 628
627 629 fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
628 630 let mut ignore_files = Vec::new();
629 631 let repo_ignore = repo.working_directory_vfs().join(".hgignore");
630 632 if repo_ignore.exists() {
631 633 ignore_files.push(repo_ignore)
632 634 }
633 635 for (key, value) in config.iter_section(b"ui") {
634 636 if key == b"ignore" || key.starts_with(b"ignore.") {
635 637 let path = get_path_from_bytes(value);
636 638 // TODO: expand "~/" and environment variable here, like Python
637 639 // does with `os.path.expanduser` and `os.path.expandvars`
638 640
639 641 let joined = repo.working_directory_path().join(path);
640 642 ignore_files.push(joined);
641 643 }
642 644 }
643 645 ignore_files
644 646 }
645 647
646 648 struct DisplayStatusPaths<'a> {
647 649 ui: &'a Ui,
648 650 no_status: bool,
649 651 relativize: Option<RelativizePaths>,
650 652 print0: bool,
651 653 }
652 654
653 655 impl DisplayStatusPaths<'_> {
654 656 // Probably more elegant to use a Deref or Borrow trait rather than
655 657 // harcode HgPathBuf, but probably not really useful at this point
656 658 fn display(
657 659 &self,
658 660 status_prefix: &[u8],
659 661 label: &'static str,
660 662 mut paths: Vec<StatusPath<'_>>,
661 663 ) -> Result<(), CommandError> {
662 664 paths.sort_unstable();
663 665 // TODO: get the stdout lock once for the whole loop
664 666 // instead of in each write
665 667 for StatusPath { path, copy_source } in paths {
666 668 let relative_path;
667 669 let relative_source;
668 670 let (path, copy_source) = if let Some(relativize) =
669 671 &self.relativize
670 672 {
671 673 relative_path = relativize.relativize(&path);
672 674 relative_source =
673 675 copy_source.as_ref().map(|s| relativize.relativize(s));
674 676 (&*relative_path, relative_source.as_deref())
675 677 } else {
676 678 (path.as_bytes(), copy_source.as_ref().map(|s| s.as_bytes()))
677 679 };
678 680 // TODO: Add a way to use `write_bytes!` instead of `format_bytes!`
679 681 // in order to stream to stdout instead of allocating an
680 682 // itermediate `Vec<u8>`.
681 683 if !self.no_status {
682 684 self.ui.write_stdout_labelled(status_prefix, label)?
683 685 }
684 686 let linebreak = if self.print0 { b"\x00" } else { b"\n" };
685 687 self.ui.write_stdout_labelled(
686 688 &format_bytes!(b"{}{}", path, linebreak),
687 689 label,
688 690 )?;
689 691 if let Some(source) = copy_source.filter(|_| !self.no_status) {
690 692 let label = "status.copied";
691 693 self.ui.write_stdout_labelled(
692 694 &format_bytes!(b" {}{}", source, linebreak),
693 695 label,
694 696 )?
695 697 }
696 698 }
697 699 Ok(())
698 700 }
699 701
700 702 fn output(
701 703 &mut self,
702 704 display_states: DisplayStates,
703 705 ds_status: DirstateStatus,
704 706 ) -> Result<(), CommandError> {
705 707 if display_states.modified {
706 708 self.display(b"M ", "status.modified", ds_status.modified)?;
707 709 }
708 710 if display_states.added {
709 711 self.display(b"A ", "status.added", ds_status.added)?;
710 712 }
711 713 if display_states.removed {
712 714 self.display(b"R ", "status.removed", ds_status.removed)?;
713 715 }
714 716 if display_states.deleted {
715 717 self.display(b"! ", "status.deleted", ds_status.deleted)?;
716 718 }
717 719 if display_states.unknown {
718 720 self.display(b"? ", "status.unknown", ds_status.unknown)?;
719 721 }
720 722 if display_states.ignored {
721 723 self.display(b"I ", "status.ignored", ds_status.ignored)?;
722 724 }
723 725 if display_states.clean {
724 726 self.display(b"C ", "status.clean", ds_status.clean)?;
725 727 }
726 728 Ok(())
727 729 }
728 730 }
729 731
730 732 /// Outcome of the additional check for an ambiguous tracked file
731 733 enum UnsureOutcome {
732 734 /// The file is actually clean
733 735 Clean,
734 736 /// The file has been modified
735 737 Modified,
736 738 /// The file was deleted on disk (or became another type of fs entry)
737 739 Deleted,
738 740 }
739 741
740 742 /// Check if a file is modified by comparing actual repo store and file system.
741 743 ///
742 744 /// This meant to be used for those that the dirstate cannot resolve, due
743 745 /// to time resolution limits.
744 746 fn unsure_is_modified(
745 747 working_directory_vfs: hg::vfs::Vfs,
746 748 store_vfs: hg::vfs::Vfs,
747 749 check_exec: bool,
748 750 manifest: &Manifest,
749 751 hg_path: &HgPath,
752 revlog_open_options: RevlogOpenOptions,
750 753 ) -> Result<UnsureOutcome, HgError> {
751 754 let vfs = working_directory_vfs;
752 755 let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
753 756 let fs_metadata = vfs.symlink_metadata(&fs_path)?;
754 757 let is_symlink = fs_metadata.file_type().is_symlink();
755 758
756 759 let entry = manifest
757 760 .find_by_path(hg_path)?
758 761 .expect("ambgious file not in p1");
759 762
760 763 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
761 764 // dirstate
762 765 let fs_flags = if is_symlink {
763 766 Some(b'l')
764 767 } else if check_exec && has_exec_bit(&fs_metadata) {
765 768 Some(b'x')
766 769 } else {
767 770 None
768 771 };
769 772
770 773 let entry_flags = if check_exec {
771 774 entry.flags
772 775 } else if entry.flags == Some(b'x') {
773 776 None
774 777 } else {
775 778 entry.flags
776 779 };
777 780
778 781 if entry_flags != fs_flags {
779 782 return Ok(UnsureOutcome::Modified);
780 783 }
781 let filelog = hg::filelog::Filelog::open_vfs(&store_vfs, hg_path)?;
784 let filelog = hg::filelog::Filelog::open_vfs(
785 &store_vfs,
786 hg_path,
787 revlog_open_options,
788 )?;
782 789 let fs_len = fs_metadata.len();
783 790 let file_node = entry.node_id()?;
784 791 let filelog_entry = filelog.entry_for_node(file_node).map_err(|_| {
785 792 HgError::corrupted(format!(
786 793 "filelog {:?} missing node {:?} from manifest",
787 794 hg_path, file_node
788 795 ))
789 796 })?;
790 797 if filelog_entry.file_data_len_not_equal_to(fs_len) {
791 798 // No need to read file contents:
792 799 // it cannot be equal if it has a different length.
793 800 return Ok(UnsureOutcome::Modified);
794 801 }
795 802
796 803 let p1_filelog_data = filelog_entry.data()?;
797 804 let p1_contents = p1_filelog_data.file_data()?;
798 805 if p1_contents.len() as u64 != fs_len {
799 806 // No need to read file contents:
800 807 // it cannot be equal if it has a different length.
801 808 return Ok(UnsureOutcome::Modified);
802 809 }
803 810
804 811 let fs_contents = if is_symlink {
805 812 get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
806 813 } else {
807 814 vfs.read(fs_path)?
808 815 };
809 816
810 817 Ok(if p1_contents != &*fs_contents {
811 818 UnsureOutcome::Modified
812 819 } else {
813 820 UnsureOutcome::Clean
814 821 })
815 822 }
@@ -1,60 +1,63 b''
1 import struct
1 2 import unittest
2 3
3 4 try:
4 5 from mercurial import rustext
5 6
6 7 rustext.__name__ # trigger immediate actual import
7 8 except ImportError:
8 9 rustext = None
9 10 else:
10 11 from mercurial.rustext import revlog
11 12
12 13 # this would fail already without appropriate ancestor.__package__
13 14 from mercurial.rustext.ancestor import LazyAncestors
14 15
15 16 from mercurial.testing import revlog as revlogtesting
16 17
18 header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
19
17 20
18 21 @unittest.skipIf(
19 22 rustext is None,
20 23 "rustext module revlog relies on is not available",
21 24 )
22 25 class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
23 26 def test_heads(self):
24 27 idx = self.parseindex()
25 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
28 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
26 29 self.assertEqual(rustidx.headrevs(), idx.headrevs())
27 30
28 31 def test_get_cindex(self):
29 32 # drop me once we no longer need the method for shortest node
30 33 idx = self.parseindex()
31 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
34 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
32 35 cidx = rustidx.get_cindex()
33 36 self.assertTrue(idx is cidx)
34 37
35 38 def test_len(self):
36 39 idx = self.parseindex()
37 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
40 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
38 41 self.assertEqual(len(rustidx), len(idx))
39 42
40 43 def test_ancestors(self):
41 44 idx = self.parseindex()
42 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
45 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
43 46 lazy = LazyAncestors(rustidx, [3], 0, True)
44 47 # we have two more references to the index:
45 48 # - in its inner iterator for __contains__ and __bool__
46 49 # - in the LazyAncestors instance itself (to spawn new iterators)
47 50 self.assertTrue(2 in lazy)
48 51 self.assertTrue(bool(lazy))
49 52 self.assertEqual(list(lazy), [3, 2, 1, 0])
50 53 # a second time to validate that we spawn new iterators
51 54 self.assertEqual(list(lazy), [3, 2, 1, 0])
52 55
53 56 # let's check bool for an empty one
54 57 self.assertFalse(LazyAncestors(idx, [0], 0, False))
55 58
56 59
57 60 if __name__ == '__main__':
58 61 import silenttestrunner
59 62
60 63 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now