##// END OF EJS Templates
censor: accept censored revision during upgrade...
marmoute -
r51972:15a50bfa default
parent child Browse files
Show More
@@ -1,3797 +1,3801 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 util.nouideprecwarn(
496 496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 497 )
498 498 return self.delta_config.general_delta
499 499
500 500 @property
501 501 def _checkambig(self):
502 502 """temporary compatibility proxy"""
503 503 util.nouideprecwarn(
504 504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 505 )
506 506 return self.data_config.check_ambig
507 507
508 508 @property
509 509 def _mmaplargeindex(self):
510 510 """temporary compatibility proxy"""
511 511 util.nouideprecwarn(
512 512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 513 )
514 514 return self.data_config.mmap_large_index
515 515
516 516 @property
517 517 def _censorable(self):
518 518 """temporary compatibility proxy"""
519 519 util.nouideprecwarn(
520 520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 521 )
522 522 return self.feature_config.censorable
523 523
524 524 @property
525 525 def _chunkcachesize(self):
526 526 """temporary compatibility proxy"""
527 527 util.nouideprecwarn(
528 528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 529 )
530 530 return self.data_config.chunk_cache_size
531 531
532 532 @property
533 533 def _maxchainlen(self):
534 534 """temporary compatibility proxy"""
535 535 util.nouideprecwarn(
536 536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 537 )
538 538 return self.delta_config.max_chain_len
539 539
540 540 @property
541 541 def _deltabothparents(self):
542 542 """temporary compatibility proxy"""
543 543 util.nouideprecwarn(
544 544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 545 )
546 546 return self.delta_config.delta_both_parents
547 547
548 548 @property
549 549 def _candidate_group_chunk_size(self):
550 550 """temporary compatibility proxy"""
551 551 util.nouideprecwarn(
552 552 b"use revlog.delta_config.candidate_group_chunk_size",
553 553 b"6.6",
554 554 stacklevel=2,
555 555 )
556 556 return self.delta_config.candidate_group_chunk_size
557 557
558 558 @property
559 559 def _debug_delta(self):
560 560 """temporary compatibility proxy"""
561 561 util.nouideprecwarn(
562 562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 563 )
564 564 return self.delta_config.debug_delta
565 565
566 566 @property
567 567 def _compengine(self):
568 568 """temporary compatibility proxy"""
569 569 util.nouideprecwarn(
570 570 b"use revlog.feature_config.compression_engine",
571 571 b"6.6",
572 572 stacklevel=2,
573 573 )
574 574 return self.feature_config.compression_engine
575 575
576 576 @property
577 577 def _compengineopts(self):
578 578 """temporary compatibility proxy"""
579 579 util.nouideprecwarn(
580 580 b"use revlog.feature_config.compression_engine_options",
581 581 b"6.6",
582 582 stacklevel=2,
583 583 )
584 584 return self.feature_config.compression_engine_options
585 585
586 586 @property
587 587 def _maxdeltachainspan(self):
588 588 """temporary compatibility proxy"""
589 589 util.nouideprecwarn(
590 590 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
591 591 )
592 592 return self.delta_config.max_deltachain_span
593 593
594 594 @property
595 595 def _withsparseread(self):
596 596 """temporary compatibility proxy"""
597 597 util.nouideprecwarn(
598 598 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
599 599 )
600 600 return self.data_config.with_sparse_read
601 601
602 602 @property
603 603 def _sparserevlog(self):
604 604 """temporary compatibility proxy"""
605 605 util.nouideprecwarn(
606 606 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
607 607 )
608 608 return self.delta_config.sparse_revlog
609 609
610 610 @property
611 611 def hassidedata(self):
612 612 """temporary compatibility proxy"""
613 613 util.nouideprecwarn(
614 614 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
615 615 )
616 616 return self.feature_config.has_side_data
617 617
618 618 @property
619 619 def _srdensitythreshold(self):
620 620 """temporary compatibility proxy"""
621 621 util.nouideprecwarn(
622 622 b"use revlog.data_config.sr_density_threshold",
623 623 b"6.6",
624 624 stacklevel=2,
625 625 )
626 626 return self.data_config.sr_density_threshold
627 627
628 628 @property
629 629 def _srmingapsize(self):
630 630 """temporary compatibility proxy"""
631 631 util.nouideprecwarn(
632 632 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
633 633 )
634 634 return self.data_config.sr_min_gap_size
635 635
636 636 @property
637 637 def _compute_rank(self):
638 638 """temporary compatibility proxy"""
639 639 util.nouideprecwarn(
640 640 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
641 641 )
642 642 return self.feature_config.compute_rank
643 643
644 644 @property
645 645 def canonical_parent_order(self):
646 646 """temporary compatibility proxy"""
647 647 util.nouideprecwarn(
648 648 b"use revlog.feature_config.canonical_parent_order",
649 649 b"6.6",
650 650 stacklevel=2,
651 651 )
652 652 return self.feature_config.canonical_parent_order
653 653
654 654 @property
655 655 def _lazydelta(self):
656 656 """temporary compatibility proxy"""
657 657 util.nouideprecwarn(
658 658 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
659 659 )
660 660 return self.delta_config.lazy_delta
661 661
662 662 @property
663 663 def _lazydeltabase(self):
664 664 """temporary compatibility proxy"""
665 665 util.nouideprecwarn(
666 666 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
667 667 )
668 668 return self.delta_config.lazy_delta_base
669 669
670 670 def _init_opts(self):
671 671 """process options (from above/config) to setup associated default revlog mode
672 672
673 673 These values might be affected when actually reading on disk information.
674 674
675 675 The relevant values are returned for use in _loadindex().
676 676
677 677 * newversionflags:
678 678 version header to use if we need to create a new revlog
679 679
680 680 * mmapindexthreshold:
681 681 minimal index size for start to use mmap
682 682
683 683 * force_nodemap:
684 684 force the usage of a "development" version of the nodemap code
685 685 """
686 686 opts = self.opener.options
687 687
688 688 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
689 689 new_header = CHANGELOGV2
690 690 compute_rank = opts.get(b'changelogv2.compute-rank', True)
691 691 self.feature_config.compute_rank = compute_rank
692 692 elif b'revlogv2' in opts:
693 693 new_header = REVLOGV2
694 694 elif b'revlogv1' in opts:
695 695 new_header = REVLOGV1 | FLAG_INLINE_DATA
696 696 if b'generaldelta' in opts:
697 697 new_header |= FLAG_GENERALDELTA
698 698 elif b'revlogv0' in self.opener.options:
699 699 new_header = REVLOGV0
700 700 else:
701 701 new_header = REVLOG_DEFAULT_VERSION
702 702
703 703 mmapindexthreshold = None
704 704 if self.data_config.mmap_large_index:
705 705 mmapindexthreshold = self.data_config.mmap_index_threshold
706 706 if self.feature_config.enable_ellipsis:
707 707 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
708 708
709 709 # revlog v0 doesn't have flag processors
710 710 for flag, processor in opts.get(b'flagprocessors', {}).items():
711 711 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
712 712
713 713 chunk_cache_size = self.data_config.chunk_cache_size
714 714 if chunk_cache_size <= 0:
715 715 raise error.RevlogError(
716 716 _(b'revlog chunk cache size %r is not greater than 0')
717 717 % chunk_cache_size
718 718 )
719 719 elif chunk_cache_size & (chunk_cache_size - 1):
720 720 raise error.RevlogError(
721 721 _(b'revlog chunk cache size %r is not a power of 2')
722 722 % chunk_cache_size
723 723 )
724 724 force_nodemap = opts.get(b'devel-force-nodemap', False)
725 725 return new_header, mmapindexthreshold, force_nodemap
726 726
727 727 def _get_data(self, filepath, mmap_threshold, size=None):
728 728 """return a file content with or without mmap
729 729
730 730 If the file is missing return the empty string"""
731 731 try:
732 732 with self.opener(filepath) as fp:
733 733 if mmap_threshold is not None:
734 734 file_size = self.opener.fstat(fp).st_size
735 735 if file_size >= mmap_threshold:
736 736 if size is not None:
737 737 # avoid potentiel mmap crash
738 738 size = min(file_size, size)
739 739 # TODO: should .close() to release resources without
740 740 # relying on Python GC
741 741 if size is None:
742 742 return util.buffer(util.mmapread(fp))
743 743 else:
744 744 return util.buffer(util.mmapread(fp, size))
745 745 if size is None:
746 746 return fp.read()
747 747 else:
748 748 return fp.read(size)
749 749 except FileNotFoundError:
750 750 return b''
751 751
752 752 def get_streams(self, max_linkrev, force_inline=False):
753 753 """return a list of streams that represent this revlog
754 754
755 755 This is used by stream-clone to do bytes to bytes copies of a repository.
756 756
757 757 This streams data for all revisions that refer to a changelog revision up
758 758 to `max_linkrev`.
759 759
760 760 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
761 761
762 762 It returns is a list of three-tuple:
763 763
764 764 [
765 765 (filename, bytes_stream, stream_size),
766 766 …
767 767 ]
768 768 """
769 769 n = len(self)
770 770 index = self.index
771 771 while n > 0:
772 772 linkrev = index[n - 1][4]
773 773 if linkrev < max_linkrev:
774 774 break
775 775 # note: this loop will rarely go through multiple iterations, since
776 776 # it only traverses commits created during the current streaming
777 777 # pull operation.
778 778 #
779 779 # If this become a problem, using a binary search should cap the
780 780 # runtime of this.
781 781 n = n - 1
782 782 if n == 0:
783 783 # no data to send
784 784 return []
785 785 index_size = n * index.entry_size
786 786 data_size = self.end(n - 1)
787 787
788 788 # XXX we might have been split (or stripped) since the object
789 789 # initialization, We need to close this race too, but having a way to
790 790 # pre-open the file we feed to the revlog and never closing them before
791 791 # we are done streaming.
792 792
793 793 if self._inline:
794 794
795 795 def get_stream():
796 796 with self._indexfp() as fp:
797 797 yield None
798 798 size = index_size + data_size
799 799 if size <= 65536:
800 800 yield fp.read(size)
801 801 else:
802 802 yield from util.filechunkiter(fp, limit=size)
803 803
804 804 inline_stream = get_stream()
805 805 next(inline_stream)
806 806 return [
807 807 (self._indexfile, inline_stream, index_size + data_size),
808 808 ]
809 809 elif force_inline:
810 810
811 811 def get_stream():
812 812 with self.reading():
813 813 yield None
814 814
815 815 for rev in range(n):
816 816 idx = self.index.entry_binary(rev)
817 817 if rev == 0 and self._docket is None:
818 818 # re-inject the inline flag
819 819 header = self._format_flags
820 820 header |= self._format_version
821 821 header |= FLAG_INLINE_DATA
822 822 header = self.index.pack_header(header)
823 823 idx = header + idx
824 824 yield idx
825 825 yield self._getsegmentforrevs(rev, rev)[1]
826 826
827 827 inline_stream = get_stream()
828 828 next(inline_stream)
829 829 return [
830 830 (self._indexfile, inline_stream, index_size + data_size),
831 831 ]
832 832 else:
833 833
834 834 def get_index_stream():
835 835 with self._indexfp() as fp:
836 836 yield None
837 837 if index_size <= 65536:
838 838 yield fp.read(index_size)
839 839 else:
840 840 yield from util.filechunkiter(fp, limit=index_size)
841 841
842 842 def get_data_stream():
843 843 with self._datafp() as fp:
844 844 yield None
845 845 if data_size <= 65536:
846 846 yield fp.read(data_size)
847 847 else:
848 848 yield from util.filechunkiter(fp, limit=data_size)
849 849
850 850 index_stream = get_index_stream()
851 851 next(index_stream)
852 852 data_stream = get_data_stream()
853 853 next(data_stream)
854 854 return [
855 855 (self._datafile, data_stream, data_size),
856 856 (self._indexfile, index_stream, index_size),
857 857 ]
858 858
859 859 def _loadindex(self, docket=None):
860 860
861 861 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
862 862
863 863 if self.postfix is not None:
864 864 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
865 865 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
866 866 entry_point = b'%s.i.a' % self.radix
867 867 elif self._try_split and self.opener.exists(self._split_index_file):
868 868 entry_point = self._split_index_file
869 869 else:
870 870 entry_point = b'%s.i' % self.radix
871 871
872 872 if docket is not None:
873 873 self._docket = docket
874 874 self._docket_file = entry_point
875 875 else:
876 876 self._initempty = True
877 877 entry_data = self._get_data(entry_point, mmapindexthreshold)
878 878 if len(entry_data) > 0:
879 879 header = INDEX_HEADER.unpack(entry_data[:4])[0]
880 880 self._initempty = False
881 881 else:
882 882 header = new_header
883 883
884 884 self._format_flags = header & ~0xFFFF
885 885 self._format_version = header & 0xFFFF
886 886
887 887 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
888 888 if supported_flags is None:
889 889 msg = _(b'unknown version (%d) in revlog %s')
890 890 msg %= (self._format_version, self.display_id)
891 891 raise error.RevlogError(msg)
892 892 elif self._format_flags & ~supported_flags:
893 893 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
894 894 display_flag = self._format_flags >> 16
895 895 msg %= (display_flag, self._format_version, self.display_id)
896 896 raise error.RevlogError(msg)
897 897
898 898 features = FEATURES_BY_VERSION[self._format_version]
899 899 self._inline = features[b'inline'](self._format_flags)
900 900 self.delta_config.general_delta = features[b'generaldelta'](
901 901 self._format_flags
902 902 )
903 903 self.feature_config.has_side_data = features[b'sidedata']
904 904
905 905 if not features[b'docket']:
906 906 self._indexfile = entry_point
907 907 index_data = entry_data
908 908 else:
909 909 self._docket_file = entry_point
910 910 if self._initempty:
911 911 self._docket = docketutil.default_docket(self, header)
912 912 else:
913 913 self._docket = docketutil.parse_docket(
914 914 self, entry_data, use_pending=self._trypending
915 915 )
916 916
917 917 if self._docket is not None:
918 918 self._indexfile = self._docket.index_filepath()
919 919 index_data = b''
920 920 index_size = self._docket.index_end
921 921 if index_size > 0:
922 922 index_data = self._get_data(
923 923 self._indexfile, mmapindexthreshold, size=index_size
924 924 )
925 925 if len(index_data) < index_size:
926 926 msg = _(b'too few index data for %s: got %d, expected %d')
927 927 msg %= (self.display_id, len(index_data), index_size)
928 928 raise error.RevlogError(msg)
929 929
930 930 self._inline = False
931 931 # generaldelta implied by version 2 revlogs.
932 932 self.delta_config.general_delta = True
933 933 # the logic for persistent nodemap will be dealt with within the
934 934 # main docket, so disable it for now.
935 935 self._nodemap_file = None
936 936
937 937 if self._docket is not None:
938 938 self._datafile = self._docket.data_filepath()
939 939 self._sidedatafile = self._docket.sidedata_filepath()
940 940 elif self.postfix is None:
941 941 self._datafile = b'%s.d' % self.radix
942 942 else:
943 943 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
944 944
945 945 self.nodeconstants = sha1nodeconstants
946 946 self.nullid = self.nodeconstants.nullid
947 947
948 948 # sparse-revlog can't be on without general-delta (issue6056)
949 949 if not self.delta_config.general_delta:
950 950 self.delta_config.sparse_revlog = False
951 951
952 952 self._storedeltachains = True
953 953
954 954 devel_nodemap = (
955 955 self._nodemap_file
956 956 and force_nodemap
957 957 and parse_index_v1_nodemap is not None
958 958 )
959 959
960 960 use_rust_index = False
961 961 if rustrevlog is not None:
962 962 if self._nodemap_file is not None:
963 963 use_rust_index = True
964 964 else:
965 965 use_rust_index = self.opener.options.get(b'rust.index')
966 966
967 967 self._parse_index = parse_index_v1
968 968 if self._format_version == REVLOGV0:
969 969 self._parse_index = revlogv0.parse_index_v0
970 970 elif self._format_version == REVLOGV2:
971 971 self._parse_index = parse_index_v2
972 972 elif self._format_version == CHANGELOGV2:
973 973 self._parse_index = parse_index_cl_v2
974 974 elif devel_nodemap:
975 975 self._parse_index = parse_index_v1_nodemap
976 976 elif use_rust_index:
977 977 self._parse_index = parse_index_v1_mixed
978 978 try:
979 979 d = self._parse_index(index_data, self._inline)
980 980 index, chunkcache = d
981 981 use_nodemap = (
982 982 not self._inline
983 983 and self._nodemap_file is not None
984 984 and hasattr(index, 'update_nodemap_data')
985 985 )
986 986 if use_nodemap:
987 987 nodemap_data = nodemaputil.persisted_data(self)
988 988 if nodemap_data is not None:
989 989 docket = nodemap_data[0]
990 990 if (
991 991 len(d[0]) > docket.tip_rev
992 992 and d[0][docket.tip_rev][7] == docket.tip_node
993 993 ):
994 994 # no changelog tampering
995 995 self._nodemap_docket = docket
996 996 index.update_nodemap_data(*nodemap_data)
997 997 except (ValueError, IndexError):
998 998 raise error.RevlogError(
999 999 _(b"index %s is corrupted") % self.display_id
1000 1000 )
1001 1001 self.index = index
1002 1002 self._segmentfile = randomaccessfile.randomaccessfile(
1003 1003 self.opener,
1004 1004 (self._indexfile if self._inline else self._datafile),
1005 1005 self.data_config.chunk_cache_size,
1006 1006 chunkcache,
1007 1007 )
1008 1008 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1009 1009 self.opener,
1010 1010 self._sidedatafile,
1011 1011 self.data_config.chunk_cache_size,
1012 1012 )
1013 1013 # revnum -> (chain-length, sum-delta-length)
1014 1014 self._chaininfocache = util.lrucachedict(500)
1015 1015 # revlog header -> revlog compressor
1016 1016 self._decompressors = {}
1017 1017
1018 1018 def get_revlog(self):
1019 1019 """simple function to mirror API of other not-really-revlog API"""
1020 1020 return self
1021 1021
1022 1022 @util.propertycache
1023 1023 def revlog_kind(self):
1024 1024 return self.target[0]
1025 1025
1026 1026 @util.propertycache
1027 1027 def display_id(self):
1028 1028 """The public facing "ID" of the revlog that we use in message"""
1029 1029 if self.revlog_kind == KIND_FILELOG:
1030 1030 # Reference the file without the "data/" prefix, so it is familiar
1031 1031 # to the user.
1032 1032 return self.target[1]
1033 1033 else:
1034 1034 return self.radix
1035 1035
1036 1036 def _get_decompressor(self, t):
1037 1037 try:
1038 1038 compressor = self._decompressors[t]
1039 1039 except KeyError:
1040 1040 try:
1041 1041 engine = util.compengines.forrevlogheader(t)
1042 1042 compressor = engine.revlogcompressor(
1043 1043 self.feature_config.compression_engine_options
1044 1044 )
1045 1045 self._decompressors[t] = compressor
1046 1046 except KeyError:
1047 1047 raise error.RevlogError(
1048 1048 _(b'unknown compression type %s') % binascii.hexlify(t)
1049 1049 )
1050 1050 return compressor
1051 1051
1052 1052 @util.propertycache
1053 1053 def _compressor(self):
1054 1054 engine = util.compengines[self.feature_config.compression_engine]
1055 1055 return engine.revlogcompressor(
1056 1056 self.feature_config.compression_engine_options
1057 1057 )
1058 1058
1059 1059 @util.propertycache
1060 1060 def _decompressor(self):
1061 1061 """the default decompressor"""
1062 1062 if self._docket is None:
1063 1063 return None
1064 1064 t = self._docket.default_compression_header
1065 1065 c = self._get_decompressor(t)
1066 1066 return c.decompress
1067 1067
1068 1068 def _indexfp(self):
1069 1069 """file object for the revlog's index file"""
1070 1070 return self.opener(self._indexfile, mode=b"r")
1071 1071
1072 1072 def __index_write_fp(self):
1073 1073 # You should not use this directly and use `_writing` instead
1074 1074 try:
1075 1075 f = self.opener(
1076 1076 self._indexfile,
1077 1077 mode=b"r+",
1078 1078 checkambig=self.data_config.check_ambig,
1079 1079 )
1080 1080 if self._docket is None:
1081 1081 f.seek(0, os.SEEK_END)
1082 1082 else:
1083 1083 f.seek(self._docket.index_end, os.SEEK_SET)
1084 1084 return f
1085 1085 except FileNotFoundError:
1086 1086 return self.opener(
1087 1087 self._indexfile,
1088 1088 mode=b"w+",
1089 1089 checkambig=self.data_config.check_ambig,
1090 1090 )
1091 1091
1092 1092 def __index_new_fp(self):
1093 1093 # You should not use this unless you are upgrading from inline revlog
1094 1094 return self.opener(
1095 1095 self._indexfile,
1096 1096 mode=b"w",
1097 1097 checkambig=self.data_config.check_ambig,
1098 1098 atomictemp=True,
1099 1099 )
1100 1100
1101 1101 def _datafp(self, mode=b'r'):
1102 1102 """file object for the revlog's data file"""
1103 1103 return self.opener(self._datafile, mode=mode)
1104 1104
1105 1105 @contextlib.contextmanager
1106 1106 def _sidedatareadfp(self):
1107 1107 """file object suitable to read sidedata"""
1108 1108 if self._writinghandles:
1109 1109 yield self._writinghandles[2]
1110 1110 else:
1111 1111 with self.opener(self._sidedatafile) as fp:
1112 1112 yield fp
1113 1113
1114 1114 def tiprev(self):
1115 1115 return len(self.index) - 1
1116 1116
1117 1117 def tip(self):
1118 1118 return self.node(self.tiprev())
1119 1119
1120 1120 def __contains__(self, rev):
1121 1121 return 0 <= rev < len(self)
1122 1122
1123 1123 def __len__(self):
1124 1124 return len(self.index)
1125 1125
1126 1126 def __iter__(self):
1127 1127 return iter(range(len(self)))
1128 1128
1129 1129 def revs(self, start=0, stop=None):
1130 1130 """iterate over all rev in this revlog (from start to stop)"""
1131 1131 return storageutil.iterrevs(len(self), start=start, stop=stop)
1132 1132
1133 1133 def hasnode(self, node):
1134 1134 try:
1135 1135 self.rev(node)
1136 1136 return True
1137 1137 except KeyError:
1138 1138 return False
1139 1139
1140 1140 def _candelta(self, baserev, rev):
1141 1141 """whether two revisions (baserev, rev) can be delta-ed or not"""
1142 1142 # Disable delta if either rev requires a content-changing flag
1143 1143 # processor (ex. LFS). This is because such flag processor can alter
1144 1144 # the rawtext content that the delta will be based on, and two clients
1145 1145 # could have a same revlog node with different flags (i.e. different
1146 1146 # rawtext contents) and the delta could be incompatible.
1147 1147 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1148 1148 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1149 1149 ):
1150 1150 return False
1151 1151 return True
1152 1152
1153 1153 def update_caches(self, transaction):
1154 1154 """update on disk cache
1155 1155
1156 1156 If a transaction is passed, the update may be delayed to transaction
1157 1157 commit."""
1158 1158 if self._nodemap_file is not None:
1159 1159 if transaction is None:
1160 1160 nodemaputil.update_persistent_nodemap(self)
1161 1161 else:
1162 1162 nodemaputil.setup_persistent_nodemap(transaction, self)
1163 1163
1164 1164 def clearcaches(self):
1165 1165 """Clear in-memory caches"""
1166 1166 self._revisioncache = None
1167 1167 self._chainbasecache.clear()
1168 1168 self._segmentfile.clear_cache()
1169 1169 self._segmentfile_sidedata.clear_cache()
1170 1170 self._pcache = {}
1171 1171 self._nodemap_docket = None
1172 1172 self.index.clearcaches()
1173 1173 # The python code is the one responsible for validating the docket, we
1174 1174 # end up having to refresh it here.
1175 1175 use_nodemap = (
1176 1176 not self._inline
1177 1177 and self._nodemap_file is not None
1178 1178 and hasattr(self.index, 'update_nodemap_data')
1179 1179 )
1180 1180 if use_nodemap:
1181 1181 nodemap_data = nodemaputil.persisted_data(self)
1182 1182 if nodemap_data is not None:
1183 1183 self._nodemap_docket = nodemap_data[0]
1184 1184 self.index.update_nodemap_data(*nodemap_data)
1185 1185
1186 1186 def rev(self, node):
1187 1187 """return the revision number associated with a <nodeid>"""
1188 1188 try:
1189 1189 return self.index.rev(node)
1190 1190 except TypeError:
1191 1191 raise
1192 1192 except error.RevlogError:
1193 1193 # parsers.c radix tree lookup failed
1194 1194 if (
1195 1195 node == self.nodeconstants.wdirid
1196 1196 or node in self.nodeconstants.wdirfilenodeids
1197 1197 ):
1198 1198 raise error.WdirUnsupported
1199 1199 raise error.LookupError(node, self.display_id, _(b'no node'))
1200 1200
1201 1201 # Accessors for index entries.
1202 1202
1203 1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 1204 # are flags.
1205 1205 def start(self, rev):
1206 1206 return int(self.index[rev][0] >> 16)
1207 1207
1208 1208 def sidedata_cut_off(self, rev):
1209 1209 sd_cut_off = self.index[rev][8]
1210 1210 if sd_cut_off != 0:
1211 1211 return sd_cut_off
1212 1212 # This is some annoying dance, because entries without sidedata
1213 1213 # currently use 0 as their ofsset. (instead of previous-offset +
1214 1214 # previous-size)
1215 1215 #
1216 1216 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1217 1217 # In the meantime, we need this.
1218 1218 while 0 <= rev:
1219 1219 e = self.index[rev]
1220 1220 if e[9] != 0:
1221 1221 return e[8] + e[9]
1222 1222 rev -= 1
1223 1223 return 0
1224 1224
1225 1225 def flags(self, rev):
1226 1226 return self.index[rev][0] & 0xFFFF
1227 1227
1228 1228 def length(self, rev):
1229 1229 return self.index[rev][1]
1230 1230
1231 1231 def sidedata_length(self, rev):
1232 1232 if not self.feature_config.has_side_data:
1233 1233 return 0
1234 1234 return self.index[rev][9]
1235 1235
1236 1236 def rawsize(self, rev):
1237 1237 """return the length of the uncompressed text for a given revision"""
1238 1238 l = self.index[rev][2]
1239 1239 if l >= 0:
1240 1240 return l
1241 1241
1242 1242 t = self.rawdata(rev)
1243 1243 return len(t)
1244 1244
1245 1245 def size(self, rev):
1246 1246 """length of non-raw text (processed by a "read" flag processor)"""
1247 1247 # fast path: if no "read" flag processor could change the content,
1248 1248 # size is rawsize. note: ELLIPSIS is known to not change the content.
1249 1249 flags = self.flags(rev)
1250 1250 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1251 1251 return self.rawsize(rev)
1252 1252
1253 1253 return len(self.revision(rev))
1254 1254
1255 1255 def fast_rank(self, rev):
1256 1256 """Return the rank of a revision if already known, or None otherwise.
1257 1257
1258 1258 The rank of a revision is the size of the sub-graph it defines as a
1259 1259 head. Equivalently, the rank of a revision `r` is the size of the set
1260 1260 `ancestors(r)`, `r` included.
1261 1261
1262 1262 This method returns the rank retrieved from the revlog in constant
1263 1263 time. It makes no attempt at computing unknown values for versions of
1264 1264 the revlog which do not persist the rank.
1265 1265 """
1266 1266 rank = self.index[rev][ENTRY_RANK]
1267 1267 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1268 1268 return None
1269 1269 if rev == nullrev:
1270 1270 return 0 # convention
1271 1271 return rank
1272 1272
1273 1273 def chainbase(self, rev):
1274 1274 base = self._chainbasecache.get(rev)
1275 1275 if base is not None:
1276 1276 return base
1277 1277
1278 1278 index = self.index
1279 1279 iterrev = rev
1280 1280 base = index[iterrev][3]
1281 1281 while base != iterrev:
1282 1282 iterrev = base
1283 1283 base = index[iterrev][3]
1284 1284
1285 1285 self._chainbasecache[rev] = base
1286 1286 return base
1287 1287
1288 1288 def linkrev(self, rev):
1289 1289 return self.index[rev][4]
1290 1290
1291 1291 def parentrevs(self, rev):
1292 1292 try:
1293 1293 entry = self.index[rev]
1294 1294 except IndexError:
1295 1295 if rev == wdirrev:
1296 1296 raise error.WdirUnsupported
1297 1297 raise
1298 1298
1299 1299 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1300 1300 return entry[6], entry[5]
1301 1301 else:
1302 1302 return entry[5], entry[6]
1303 1303
1304 1304 # fast parentrevs(rev) where rev isn't filtered
1305 1305 _uncheckedparentrevs = parentrevs
1306 1306
1307 1307 def node(self, rev):
1308 1308 try:
1309 1309 return self.index[rev][7]
1310 1310 except IndexError:
1311 1311 if rev == wdirrev:
1312 1312 raise error.WdirUnsupported
1313 1313 raise
1314 1314
1315 1315 # Derived from index values.
1316 1316
1317 1317 def end(self, rev):
1318 1318 return self.start(rev) + self.length(rev)
1319 1319
1320 1320 def parents(self, node):
1321 1321 i = self.index
1322 1322 d = i[self.rev(node)]
1323 1323 # inline node() to avoid function call overhead
1324 1324 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1325 1325 return i[d[6]][7], i[d[5]][7]
1326 1326 else:
1327 1327 return i[d[5]][7], i[d[6]][7]
1328 1328
1329 1329 def chainlen(self, rev):
1330 1330 return self._chaininfo(rev)[0]
1331 1331
1332 1332 def _chaininfo(self, rev):
1333 1333 chaininfocache = self._chaininfocache
1334 1334 if rev in chaininfocache:
1335 1335 return chaininfocache[rev]
1336 1336 index = self.index
1337 1337 generaldelta = self.delta_config.general_delta
1338 1338 iterrev = rev
1339 1339 e = index[iterrev]
1340 1340 clen = 0
1341 1341 compresseddeltalen = 0
1342 1342 while iterrev != e[3]:
1343 1343 clen += 1
1344 1344 compresseddeltalen += e[1]
1345 1345 if generaldelta:
1346 1346 iterrev = e[3]
1347 1347 else:
1348 1348 iterrev -= 1
1349 1349 if iterrev in chaininfocache:
1350 1350 t = chaininfocache[iterrev]
1351 1351 clen += t[0]
1352 1352 compresseddeltalen += t[1]
1353 1353 break
1354 1354 e = index[iterrev]
1355 1355 else:
1356 1356 # Add text length of base since decompressing that also takes
1357 1357 # work. For cache hits the length is already included.
1358 1358 compresseddeltalen += e[1]
1359 1359 r = (clen, compresseddeltalen)
1360 1360 chaininfocache[rev] = r
1361 1361 return r
1362 1362
1363 1363 def _deltachain(self, rev, stoprev=None):
1364 1364 """Obtain the delta chain for a revision.
1365 1365
1366 1366 ``stoprev`` specifies a revision to stop at. If not specified, we
1367 1367 stop at the base of the chain.
1368 1368
1369 1369 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1370 1370 revs in ascending order and ``stopped`` is a bool indicating whether
1371 1371 ``stoprev`` was hit.
1372 1372 """
1373 1373 generaldelta = self.delta_config.general_delta
1374 1374 # Try C implementation.
1375 1375 try:
1376 1376 return self.index.deltachain(rev, stoprev, generaldelta)
1377 1377 except AttributeError:
1378 1378 pass
1379 1379
1380 1380 chain = []
1381 1381
1382 1382 # Alias to prevent attribute lookup in tight loop.
1383 1383 index = self.index
1384 1384
1385 1385 iterrev = rev
1386 1386 e = index[iterrev]
1387 1387 while iterrev != e[3] and iterrev != stoprev:
1388 1388 chain.append(iterrev)
1389 1389 if generaldelta:
1390 1390 iterrev = e[3]
1391 1391 else:
1392 1392 iterrev -= 1
1393 1393 e = index[iterrev]
1394 1394
1395 1395 if iterrev == stoprev:
1396 1396 stopped = True
1397 1397 else:
1398 1398 chain.append(iterrev)
1399 1399 stopped = False
1400 1400
1401 1401 chain.reverse()
1402 1402 return chain, stopped
1403 1403
1404 1404 def ancestors(self, revs, stoprev=0, inclusive=False):
1405 1405 """Generate the ancestors of 'revs' in reverse revision order.
1406 1406 Does not generate revs lower than stoprev.
1407 1407
1408 1408 See the documentation for ancestor.lazyancestors for more details."""
1409 1409
1410 1410 # first, make sure start revisions aren't filtered
1411 1411 revs = list(revs)
1412 1412 checkrev = self.node
1413 1413 for r in revs:
1414 1414 checkrev(r)
1415 1415 # and we're sure ancestors aren't filtered as well
1416 1416
1417 1417 if rustancestor is not None and self.index.rust_ext_compat:
1418 1418 lazyancestors = rustancestor.LazyAncestors
1419 1419 arg = self.index
1420 1420 else:
1421 1421 lazyancestors = ancestor.lazyancestors
1422 1422 arg = self._uncheckedparentrevs
1423 1423 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1424 1424
1425 1425 def descendants(self, revs):
1426 1426 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1427 1427
1428 1428 def findcommonmissing(self, common=None, heads=None):
1429 1429 """Return a tuple of the ancestors of common and the ancestors of heads
1430 1430 that are not ancestors of common. In revset terminology, we return the
1431 1431 tuple:
1432 1432
1433 1433 ::common, (::heads) - (::common)
1434 1434
1435 1435 The list is sorted by revision number, meaning it is
1436 1436 topologically sorted.
1437 1437
1438 1438 'heads' and 'common' are both lists of node IDs. If heads is
1439 1439 not supplied, uses all of the revlog's heads. If common is not
1440 1440 supplied, uses nullid."""
1441 1441 if common is None:
1442 1442 common = [self.nullid]
1443 1443 if heads is None:
1444 1444 heads = self.heads()
1445 1445
1446 1446 common = [self.rev(n) for n in common]
1447 1447 heads = [self.rev(n) for n in heads]
1448 1448
1449 1449 # we want the ancestors, but inclusive
1450 1450 class lazyset:
1451 1451 def __init__(self, lazyvalues):
1452 1452 self.addedvalues = set()
1453 1453 self.lazyvalues = lazyvalues
1454 1454
1455 1455 def __contains__(self, value):
1456 1456 return value in self.addedvalues or value in self.lazyvalues
1457 1457
1458 1458 def __iter__(self):
1459 1459 added = self.addedvalues
1460 1460 for r in added:
1461 1461 yield r
1462 1462 for r in self.lazyvalues:
1463 1463 if not r in added:
1464 1464 yield r
1465 1465
1466 1466 def add(self, value):
1467 1467 self.addedvalues.add(value)
1468 1468
1469 1469 def update(self, values):
1470 1470 self.addedvalues.update(values)
1471 1471
1472 1472 has = lazyset(self.ancestors(common))
1473 1473 has.add(nullrev)
1474 1474 has.update(common)
1475 1475
1476 1476 # take all ancestors from heads that aren't in has
1477 1477 missing = set()
1478 1478 visit = collections.deque(r for r in heads if r not in has)
1479 1479 while visit:
1480 1480 r = visit.popleft()
1481 1481 if r in missing:
1482 1482 continue
1483 1483 else:
1484 1484 missing.add(r)
1485 1485 for p in self.parentrevs(r):
1486 1486 if p not in has:
1487 1487 visit.append(p)
1488 1488 missing = list(missing)
1489 1489 missing.sort()
1490 1490 return has, [self.node(miss) for miss in missing]
1491 1491
1492 1492 def incrementalmissingrevs(self, common=None):
1493 1493 """Return an object that can be used to incrementally compute the
1494 1494 revision numbers of the ancestors of arbitrary sets that are not
1495 1495 ancestors of common. This is an ancestor.incrementalmissingancestors
1496 1496 object.
1497 1497
1498 1498 'common' is a list of revision numbers. If common is not supplied, uses
1499 1499 nullrev.
1500 1500 """
1501 1501 if common is None:
1502 1502 common = [nullrev]
1503 1503
1504 1504 if rustancestor is not None and self.index.rust_ext_compat:
1505 1505 return rustancestor.MissingAncestors(self.index, common)
1506 1506 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1507 1507
1508 1508 def findmissingrevs(self, common=None, heads=None):
1509 1509 """Return the revision numbers of the ancestors of heads that
1510 1510 are not ancestors of common.
1511 1511
1512 1512 More specifically, return a list of revision numbers corresponding to
1513 1513 nodes N such that every N satisfies the following constraints:
1514 1514
1515 1515 1. N is an ancestor of some node in 'heads'
1516 1516 2. N is not an ancestor of any node in 'common'
1517 1517
1518 1518 The list is sorted by revision number, meaning it is
1519 1519 topologically sorted.
1520 1520
1521 1521 'heads' and 'common' are both lists of revision numbers. If heads is
1522 1522 not supplied, uses all of the revlog's heads. If common is not
1523 1523 supplied, uses nullid."""
1524 1524 if common is None:
1525 1525 common = [nullrev]
1526 1526 if heads is None:
1527 1527 heads = self.headrevs()
1528 1528
1529 1529 inc = self.incrementalmissingrevs(common=common)
1530 1530 return inc.missingancestors(heads)
1531 1531
1532 1532 def findmissing(self, common=None, heads=None):
1533 1533 """Return the ancestors of heads that are not ancestors of common.
1534 1534
1535 1535 More specifically, return a list of nodes N such that every N
1536 1536 satisfies the following constraints:
1537 1537
1538 1538 1. N is an ancestor of some node in 'heads'
1539 1539 2. N is not an ancestor of any node in 'common'
1540 1540
1541 1541 The list is sorted by revision number, meaning it is
1542 1542 topologically sorted.
1543 1543
1544 1544 'heads' and 'common' are both lists of node IDs. If heads is
1545 1545 not supplied, uses all of the revlog's heads. If common is not
1546 1546 supplied, uses nullid."""
1547 1547 if common is None:
1548 1548 common = [self.nullid]
1549 1549 if heads is None:
1550 1550 heads = self.heads()
1551 1551
1552 1552 common = [self.rev(n) for n in common]
1553 1553 heads = [self.rev(n) for n in heads]
1554 1554
1555 1555 inc = self.incrementalmissingrevs(common=common)
1556 1556 return [self.node(r) for r in inc.missingancestors(heads)]
1557 1557
1558 1558 def nodesbetween(self, roots=None, heads=None):
1559 1559 """Return a topological path from 'roots' to 'heads'.
1560 1560
1561 1561 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1562 1562 topologically sorted list of all nodes N that satisfy both of
1563 1563 these constraints:
1564 1564
1565 1565 1. N is a descendant of some node in 'roots'
1566 1566 2. N is an ancestor of some node in 'heads'
1567 1567
1568 1568 Every node is considered to be both a descendant and an ancestor
1569 1569 of itself, so every reachable node in 'roots' and 'heads' will be
1570 1570 included in 'nodes'.
1571 1571
1572 1572 'outroots' is the list of reachable nodes in 'roots', i.e., the
1573 1573 subset of 'roots' that is returned in 'nodes'. Likewise,
1574 1574 'outheads' is the subset of 'heads' that is also in 'nodes'.
1575 1575
1576 1576 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1577 1577 unspecified, uses nullid as the only root. If 'heads' is
1578 1578 unspecified, uses list of all of the revlog's heads."""
1579 1579 nonodes = ([], [], [])
1580 1580 if roots is not None:
1581 1581 roots = list(roots)
1582 1582 if not roots:
1583 1583 return nonodes
1584 1584 lowestrev = min([self.rev(n) for n in roots])
1585 1585 else:
1586 1586 roots = [self.nullid] # Everybody's a descendant of nullid
1587 1587 lowestrev = nullrev
1588 1588 if (lowestrev == nullrev) and (heads is None):
1589 1589 # We want _all_ the nodes!
1590 1590 return (
1591 1591 [self.node(r) for r in self],
1592 1592 [self.nullid],
1593 1593 list(self.heads()),
1594 1594 )
1595 1595 if heads is None:
1596 1596 # All nodes are ancestors, so the latest ancestor is the last
1597 1597 # node.
1598 1598 highestrev = len(self) - 1
1599 1599 # Set ancestors to None to signal that every node is an ancestor.
1600 1600 ancestors = None
1601 1601 # Set heads to an empty dictionary for later discovery of heads
1602 1602 heads = {}
1603 1603 else:
1604 1604 heads = list(heads)
1605 1605 if not heads:
1606 1606 return nonodes
1607 1607 ancestors = set()
1608 1608 # Turn heads into a dictionary so we can remove 'fake' heads.
1609 1609 # Also, later we will be using it to filter out the heads we can't
1610 1610 # find from roots.
1611 1611 heads = dict.fromkeys(heads, False)
1612 1612 # Start at the top and keep marking parents until we're done.
1613 1613 nodestotag = set(heads)
1614 1614 # Remember where the top was so we can use it as a limit later.
1615 1615 highestrev = max([self.rev(n) for n in nodestotag])
1616 1616 while nodestotag:
1617 1617 # grab a node to tag
1618 1618 n = nodestotag.pop()
1619 1619 # Never tag nullid
1620 1620 if n == self.nullid:
1621 1621 continue
1622 1622 # A node's revision number represents its place in a
1623 1623 # topologically sorted list of nodes.
1624 1624 r = self.rev(n)
1625 1625 if r >= lowestrev:
1626 1626 if n not in ancestors:
1627 1627 # If we are possibly a descendant of one of the roots
1628 1628 # and we haven't already been marked as an ancestor
1629 1629 ancestors.add(n) # Mark as ancestor
1630 1630 # Add non-nullid parents to list of nodes to tag.
1631 1631 nodestotag.update(
1632 1632 [p for p in self.parents(n) if p != self.nullid]
1633 1633 )
1634 1634 elif n in heads: # We've seen it before, is it a fake head?
1635 1635 # So it is, real heads should not be the ancestors of
1636 1636 # any other heads.
1637 1637 heads.pop(n)
1638 1638 if not ancestors:
1639 1639 return nonodes
1640 1640 # Now that we have our set of ancestors, we want to remove any
1641 1641 # roots that are not ancestors.
1642 1642
1643 1643 # If one of the roots was nullid, everything is included anyway.
1644 1644 if lowestrev > nullrev:
1645 1645 # But, since we weren't, let's recompute the lowest rev to not
1646 1646 # include roots that aren't ancestors.
1647 1647
1648 1648 # Filter out roots that aren't ancestors of heads
1649 1649 roots = [root for root in roots if root in ancestors]
1650 1650 # Recompute the lowest revision
1651 1651 if roots:
1652 1652 lowestrev = min([self.rev(root) for root in roots])
1653 1653 else:
1654 1654 # No more roots? Return empty list
1655 1655 return nonodes
1656 1656 else:
1657 1657 # We are descending from nullid, and don't need to care about
1658 1658 # any other roots.
1659 1659 lowestrev = nullrev
1660 1660 roots = [self.nullid]
1661 1661 # Transform our roots list into a set.
1662 1662 descendants = set(roots)
1663 1663 # Also, keep the original roots so we can filter out roots that aren't
1664 1664 # 'real' roots (i.e. are descended from other roots).
1665 1665 roots = descendants.copy()
1666 1666 # Our topologically sorted list of output nodes.
1667 1667 orderedout = []
1668 1668 # Don't start at nullid since we don't want nullid in our output list,
1669 1669 # and if nullid shows up in descendants, empty parents will look like
1670 1670 # they're descendants.
1671 1671 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1672 1672 n = self.node(r)
1673 1673 isdescendant = False
1674 1674 if lowestrev == nullrev: # Everybody is a descendant of nullid
1675 1675 isdescendant = True
1676 1676 elif n in descendants:
1677 1677 # n is already a descendant
1678 1678 isdescendant = True
1679 1679 # This check only needs to be done here because all the roots
1680 1680 # will start being marked is descendants before the loop.
1681 1681 if n in roots:
1682 1682 # If n was a root, check if it's a 'real' root.
1683 1683 p = tuple(self.parents(n))
1684 1684 # If any of its parents are descendants, it's not a root.
1685 1685 if (p[0] in descendants) or (p[1] in descendants):
1686 1686 roots.remove(n)
1687 1687 else:
1688 1688 p = tuple(self.parents(n))
1689 1689 # A node is a descendant if either of its parents are
1690 1690 # descendants. (We seeded the dependents list with the roots
1691 1691 # up there, remember?)
1692 1692 if (p[0] in descendants) or (p[1] in descendants):
1693 1693 descendants.add(n)
1694 1694 isdescendant = True
1695 1695 if isdescendant and ((ancestors is None) or (n in ancestors)):
1696 1696 # Only include nodes that are both descendants and ancestors.
1697 1697 orderedout.append(n)
1698 1698 if (ancestors is not None) and (n in heads):
1699 1699 # We're trying to figure out which heads are reachable
1700 1700 # from roots.
1701 1701 # Mark this head as having been reached
1702 1702 heads[n] = True
1703 1703 elif ancestors is None:
1704 1704 # Otherwise, we're trying to discover the heads.
1705 1705 # Assume this is a head because if it isn't, the next step
1706 1706 # will eventually remove it.
1707 1707 heads[n] = True
1708 1708 # But, obviously its parents aren't.
1709 1709 for p in self.parents(n):
1710 1710 heads.pop(p, None)
1711 1711 heads = [head for head, flag in heads.items() if flag]
1712 1712 roots = list(roots)
1713 1713 assert orderedout
1714 1714 assert roots
1715 1715 assert heads
1716 1716 return (orderedout, roots, heads)
1717 1717
1718 1718 def headrevs(self, revs=None):
1719 1719 if revs is None:
1720 1720 try:
1721 1721 return self.index.headrevs()
1722 1722 except AttributeError:
1723 1723 return self._headrevs()
1724 1724 if rustdagop is not None and self.index.rust_ext_compat:
1725 1725 return rustdagop.headrevs(self.index, revs)
1726 1726 return dagop.headrevs(revs, self._uncheckedparentrevs)
1727 1727
1728 1728 def computephases(self, roots):
1729 1729 return self.index.computephasesmapsets(roots)
1730 1730
1731 1731 def _headrevs(self):
1732 1732 count = len(self)
1733 1733 if not count:
1734 1734 return [nullrev]
1735 1735 # we won't iter over filtered rev so nobody is a head at start
1736 1736 ishead = [0] * (count + 1)
1737 1737 index = self.index
1738 1738 for r in self:
1739 1739 ishead[r] = 1 # I may be an head
1740 1740 e = index[r]
1741 1741 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1742 1742 return [r for r, val in enumerate(ishead) if val]
1743 1743
1744 1744 def heads(self, start=None, stop=None):
1745 1745 """return the list of all nodes that have no children
1746 1746
1747 1747 if start is specified, only heads that are descendants of
1748 1748 start will be returned
1749 1749 if stop is specified, it will consider all the revs from stop
1750 1750 as if they had no children
1751 1751 """
1752 1752 if start is None and stop is None:
1753 1753 if not len(self):
1754 1754 return [self.nullid]
1755 1755 return [self.node(r) for r in self.headrevs()]
1756 1756
1757 1757 if start is None:
1758 1758 start = nullrev
1759 1759 else:
1760 1760 start = self.rev(start)
1761 1761
1762 1762 stoprevs = {self.rev(n) for n in stop or []}
1763 1763
1764 1764 revs = dagop.headrevssubset(
1765 1765 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1766 1766 )
1767 1767
1768 1768 return [self.node(rev) for rev in revs]
1769 1769
1770 1770 def children(self, node):
1771 1771 """find the children of a given node"""
1772 1772 c = []
1773 1773 p = self.rev(node)
1774 1774 for r in self.revs(start=p + 1):
1775 1775 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1776 1776 if prevs:
1777 1777 for pr in prevs:
1778 1778 if pr == p:
1779 1779 c.append(self.node(r))
1780 1780 elif p == nullrev:
1781 1781 c.append(self.node(r))
1782 1782 return c
1783 1783
1784 1784 def commonancestorsheads(self, a, b):
1785 1785 """calculate all the heads of the common ancestors of nodes a and b"""
1786 1786 a, b = self.rev(a), self.rev(b)
1787 1787 ancs = self._commonancestorsheads(a, b)
1788 1788 return pycompat.maplist(self.node, ancs)
1789 1789
1790 1790 def _commonancestorsheads(self, *revs):
1791 1791 """calculate all the heads of the common ancestors of revs"""
1792 1792 try:
1793 1793 ancs = self.index.commonancestorsheads(*revs)
1794 1794 except (AttributeError, OverflowError): # C implementation failed
1795 1795 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1796 1796 return ancs
1797 1797
1798 1798 def isancestor(self, a, b):
1799 1799 """return True if node a is an ancestor of node b
1800 1800
1801 1801 A revision is considered an ancestor of itself."""
1802 1802 a, b = self.rev(a), self.rev(b)
1803 1803 return self.isancestorrev(a, b)
1804 1804
1805 1805 def isancestorrev(self, a, b):
1806 1806 """return True if revision a is an ancestor of revision b
1807 1807
1808 1808 A revision is considered an ancestor of itself.
1809 1809
1810 1810 The implementation of this is trivial but the use of
1811 1811 reachableroots is not."""
1812 1812 if a == nullrev:
1813 1813 return True
1814 1814 elif a == b:
1815 1815 return True
1816 1816 elif a > b:
1817 1817 return False
1818 1818 return bool(self.reachableroots(a, [b], [a], includepath=False))
1819 1819
1820 1820 def reachableroots(self, minroot, heads, roots, includepath=False):
1821 1821 """return (heads(::(<roots> and <roots>::<heads>)))
1822 1822
1823 1823 If includepath is True, return (<roots>::<heads>)."""
1824 1824 try:
1825 1825 return self.index.reachableroots2(
1826 1826 minroot, heads, roots, includepath
1827 1827 )
1828 1828 except AttributeError:
1829 1829 return dagop._reachablerootspure(
1830 1830 self.parentrevs, minroot, roots, heads, includepath
1831 1831 )
1832 1832
1833 1833 def ancestor(self, a, b):
1834 1834 """calculate the "best" common ancestor of nodes a and b"""
1835 1835
1836 1836 a, b = self.rev(a), self.rev(b)
1837 1837 try:
1838 1838 ancs = self.index.ancestors(a, b)
1839 1839 except (AttributeError, OverflowError):
1840 1840 ancs = ancestor.ancestors(self.parentrevs, a, b)
1841 1841 if ancs:
1842 1842 # choose a consistent winner when there's a tie
1843 1843 return min(map(self.node, ancs))
1844 1844 return self.nullid
1845 1845
1846 1846 def _match(self, id):
1847 1847 if isinstance(id, int):
1848 1848 # rev
1849 1849 return self.node(id)
1850 1850 if len(id) == self.nodeconstants.nodelen:
1851 1851 # possibly a binary node
1852 1852 # odds of a binary node being all hex in ASCII are 1 in 10**25
1853 1853 try:
1854 1854 node = id
1855 1855 self.rev(node) # quick search the index
1856 1856 return node
1857 1857 except error.LookupError:
1858 1858 pass # may be partial hex id
1859 1859 try:
1860 1860 # str(rev)
1861 1861 rev = int(id)
1862 1862 if b"%d" % rev != id:
1863 1863 raise ValueError
1864 1864 if rev < 0:
1865 1865 rev = len(self) + rev
1866 1866 if rev < 0 or rev >= len(self):
1867 1867 raise ValueError
1868 1868 return self.node(rev)
1869 1869 except (ValueError, OverflowError):
1870 1870 pass
1871 1871 if len(id) == 2 * self.nodeconstants.nodelen:
1872 1872 try:
1873 1873 # a full hex nodeid?
1874 1874 node = bin(id)
1875 1875 self.rev(node)
1876 1876 return node
1877 1877 except (binascii.Error, error.LookupError):
1878 1878 pass
1879 1879
1880 1880 def _partialmatch(self, id):
1881 1881 # we don't care wdirfilenodeids as they should be always full hash
1882 1882 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1883 1883 ambiguous = False
1884 1884 try:
1885 1885 partial = self.index.partialmatch(id)
1886 1886 if partial and self.hasnode(partial):
1887 1887 if maybewdir:
1888 1888 # single 'ff...' match in radix tree, ambiguous with wdir
1889 1889 ambiguous = True
1890 1890 else:
1891 1891 return partial
1892 1892 elif maybewdir:
1893 1893 # no 'ff...' match in radix tree, wdir identified
1894 1894 raise error.WdirUnsupported
1895 1895 else:
1896 1896 return None
1897 1897 except error.RevlogError:
1898 1898 # parsers.c radix tree lookup gave multiple matches
1899 1899 # fast path: for unfiltered changelog, radix tree is accurate
1900 1900 if not getattr(self, 'filteredrevs', None):
1901 1901 ambiguous = True
1902 1902 # fall through to slow path that filters hidden revisions
1903 1903 except (AttributeError, ValueError):
1904 1904 # we are pure python, or key is not hex
1905 1905 pass
1906 1906 if ambiguous:
1907 1907 raise error.AmbiguousPrefixLookupError(
1908 1908 id, self.display_id, _(b'ambiguous identifier')
1909 1909 )
1910 1910
1911 1911 if id in self._pcache:
1912 1912 return self._pcache[id]
1913 1913
1914 1914 if len(id) <= 40:
1915 1915 # hex(node)[:...]
1916 1916 l = len(id) // 2 * 2 # grab an even number of digits
1917 1917 try:
1918 1918 # we're dropping the last digit, so let's check that it's hex,
1919 1919 # to avoid the expensive computation below if it's not
1920 1920 if len(id) % 2 > 0:
1921 1921 if not (id[-1] in hexdigits):
1922 1922 return None
1923 1923 prefix = bin(id[:l])
1924 1924 except binascii.Error:
1925 1925 pass
1926 1926 else:
1927 1927 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1928 1928 nl = [
1929 1929 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1930 1930 ]
1931 1931 if self.nodeconstants.nullhex.startswith(id):
1932 1932 nl.append(self.nullid)
1933 1933 if len(nl) > 0:
1934 1934 if len(nl) == 1 and not maybewdir:
1935 1935 self._pcache[id] = nl[0]
1936 1936 return nl[0]
1937 1937 raise error.AmbiguousPrefixLookupError(
1938 1938 id, self.display_id, _(b'ambiguous identifier')
1939 1939 )
1940 1940 if maybewdir:
1941 1941 raise error.WdirUnsupported
1942 1942 return None
1943 1943
1944 1944 def lookup(self, id):
1945 1945 """locate a node based on:
1946 1946 - revision number or str(revision number)
1947 1947 - nodeid or subset of hex nodeid
1948 1948 """
1949 1949 n = self._match(id)
1950 1950 if n is not None:
1951 1951 return n
1952 1952 n = self._partialmatch(id)
1953 1953 if n:
1954 1954 return n
1955 1955
1956 1956 raise error.LookupError(id, self.display_id, _(b'no match found'))
1957 1957
1958 1958 def shortest(self, node, minlength=1):
1959 1959 """Find the shortest unambiguous prefix that matches node."""
1960 1960
1961 1961 def isvalid(prefix):
1962 1962 try:
1963 1963 matchednode = self._partialmatch(prefix)
1964 1964 except error.AmbiguousPrefixLookupError:
1965 1965 return False
1966 1966 except error.WdirUnsupported:
1967 1967 # single 'ff...' match
1968 1968 return True
1969 1969 if matchednode is None:
1970 1970 raise error.LookupError(node, self.display_id, _(b'no node'))
1971 1971 return True
1972 1972
1973 1973 def maybewdir(prefix):
1974 1974 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1975 1975
1976 1976 hexnode = hex(node)
1977 1977
1978 1978 def disambiguate(hexnode, minlength):
1979 1979 """Disambiguate against wdirid."""
1980 1980 for length in range(minlength, len(hexnode) + 1):
1981 1981 prefix = hexnode[:length]
1982 1982 if not maybewdir(prefix):
1983 1983 return prefix
1984 1984
1985 1985 if not getattr(self, 'filteredrevs', None):
1986 1986 try:
1987 1987 length = max(self.index.shortest(node), minlength)
1988 1988 return disambiguate(hexnode, length)
1989 1989 except error.RevlogError:
1990 1990 if node != self.nodeconstants.wdirid:
1991 1991 raise error.LookupError(
1992 1992 node, self.display_id, _(b'no node')
1993 1993 )
1994 1994 except AttributeError:
1995 1995 # Fall through to pure code
1996 1996 pass
1997 1997
1998 1998 if node == self.nodeconstants.wdirid:
1999 1999 for length in range(minlength, len(hexnode) + 1):
2000 2000 prefix = hexnode[:length]
2001 2001 if isvalid(prefix):
2002 2002 return prefix
2003 2003
2004 2004 for length in range(minlength, len(hexnode) + 1):
2005 2005 prefix = hexnode[:length]
2006 2006 if isvalid(prefix):
2007 2007 return disambiguate(hexnode, length)
2008 2008
2009 2009 def cmp(self, node, text):
2010 2010 """compare text with a given file revision
2011 2011
2012 2012 returns True if text is different than what is stored.
2013 2013 """
2014 2014 p1, p2 = self.parents(node)
2015 2015 return storageutil.hashrevisionsha1(text, p1, p2) != node
2016 2016
2017 2017 def _getsegmentforrevs(self, startrev, endrev):
2018 2018 """Obtain a segment of raw data corresponding to a range of revisions.
2019 2019
2020 2020 Accepts the start and end revisions and an optional already-open
2021 2021 file handle to be used for reading. If the file handle is read, its
2022 2022 seek position will not be preserved.
2023 2023
2024 2024 Requests for data may be satisfied by a cache.
2025 2025
2026 2026 Returns a 2-tuple of (offset, data) for the requested range of
2027 2027 revisions. Offset is the integer offset from the beginning of the
2028 2028 revlog and data is a str or buffer of the raw byte data.
2029 2029
2030 2030 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2031 2031 to determine where each revision's data begins and ends.
2032 2032 """
2033 2033 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2034 2034 # (functions are expensive).
2035 2035 index = self.index
2036 2036 istart = index[startrev]
2037 2037 start = int(istart[0] >> 16)
2038 2038 if startrev == endrev:
2039 2039 end = start + istart[1]
2040 2040 else:
2041 2041 iend = index[endrev]
2042 2042 end = int(iend[0] >> 16) + iend[1]
2043 2043
2044 2044 if self._inline:
2045 2045 start += (startrev + 1) * self.index.entry_size
2046 2046 end += (endrev + 1) * self.index.entry_size
2047 2047 length = end - start
2048 2048
2049 2049 return start, self._segmentfile.read_chunk(start, length)
2050 2050
2051 2051 def _chunk(self, rev):
2052 2052 """Obtain a single decompressed chunk for a revision.
2053 2053
2054 2054 Accepts an integer revision and an optional already-open file handle
2055 2055 to be used for reading. If used, the seek position of the file will not
2056 2056 be preserved.
2057 2057
2058 2058 Returns a str holding uncompressed data for the requested revision.
2059 2059 """
2060 2060 compression_mode = self.index[rev][10]
2061 2061 data = self._getsegmentforrevs(rev, rev)[1]
2062 2062 if compression_mode == COMP_MODE_PLAIN:
2063 2063 return data
2064 2064 elif compression_mode == COMP_MODE_DEFAULT:
2065 2065 return self._decompressor(data)
2066 2066 elif compression_mode == COMP_MODE_INLINE:
2067 2067 return self.decompress(data)
2068 2068 else:
2069 2069 msg = b'unknown compression mode %d'
2070 2070 msg %= compression_mode
2071 2071 raise error.RevlogError(msg)
2072 2072
2073 2073 def _chunks(self, revs, targetsize=None):
2074 2074 """Obtain decompressed chunks for the specified revisions.
2075 2075
2076 2076 Accepts an iterable of numeric revisions that are assumed to be in
2077 2077 ascending order. Also accepts an optional already-open file handle
2078 2078 to be used for reading. If used, the seek position of the file will
2079 2079 not be preserved.
2080 2080
2081 2081 This function is similar to calling ``self._chunk()`` multiple times,
2082 2082 but is faster.
2083 2083
2084 2084 Returns a list with decompressed data for each requested revision.
2085 2085 """
2086 2086 if not revs:
2087 2087 return []
2088 2088 start = self.start
2089 2089 length = self.length
2090 2090 inline = self._inline
2091 2091 iosize = self.index.entry_size
2092 2092 buffer = util.buffer
2093 2093
2094 2094 l = []
2095 2095 ladd = l.append
2096 2096
2097 2097 if not self.data_config.with_sparse_read:
2098 2098 slicedchunks = (revs,)
2099 2099 else:
2100 2100 slicedchunks = deltautil.slicechunk(
2101 2101 self, revs, targetsize=targetsize
2102 2102 )
2103 2103
2104 2104 for revschunk in slicedchunks:
2105 2105 firstrev = revschunk[0]
2106 2106 # Skip trailing revisions with empty diff
2107 2107 for lastrev in revschunk[::-1]:
2108 2108 if length(lastrev) != 0:
2109 2109 break
2110 2110
2111 2111 try:
2112 2112 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2113 2113 except OverflowError:
2114 2114 # issue4215 - we can't cache a run of chunks greater than
2115 2115 # 2G on Windows
2116 2116 return [self._chunk(rev) for rev in revschunk]
2117 2117
2118 2118 decomp = self.decompress
2119 2119 # self._decompressor might be None, but will not be used in that case
2120 2120 def_decomp = self._decompressor
2121 2121 for rev in revschunk:
2122 2122 chunkstart = start(rev)
2123 2123 if inline:
2124 2124 chunkstart += (rev + 1) * iosize
2125 2125 chunklength = length(rev)
2126 2126 comp_mode = self.index[rev][10]
2127 2127 c = buffer(data, chunkstart - offset, chunklength)
2128 2128 if comp_mode == COMP_MODE_PLAIN:
2129 2129 ladd(c)
2130 2130 elif comp_mode == COMP_MODE_INLINE:
2131 2131 ladd(decomp(c))
2132 2132 elif comp_mode == COMP_MODE_DEFAULT:
2133 2133 ladd(def_decomp(c))
2134 2134 else:
2135 2135 msg = b'unknown compression mode %d'
2136 2136 msg %= comp_mode
2137 2137 raise error.RevlogError(msg)
2138 2138
2139 2139 return l
2140 2140
2141 2141 def deltaparent(self, rev):
2142 2142 """return deltaparent of the given revision"""
2143 2143 base = self.index[rev][3]
2144 2144 if base == rev:
2145 2145 return nullrev
2146 2146 elif self.delta_config.general_delta:
2147 2147 return base
2148 2148 else:
2149 2149 return rev - 1
2150 2150
2151 2151 def issnapshot(self, rev):
2152 2152 """tells whether rev is a snapshot"""
2153 2153 if not self.delta_config.sparse_revlog:
2154 2154 return self.deltaparent(rev) == nullrev
2155 2155 elif hasattr(self.index, 'issnapshot'):
2156 2156 # directly assign the method to cache the testing and access
2157 2157 self.issnapshot = self.index.issnapshot
2158 2158 return self.issnapshot(rev)
2159 2159 if rev == nullrev:
2160 2160 return True
2161 2161 entry = self.index[rev]
2162 2162 base = entry[3]
2163 2163 if base == rev:
2164 2164 return True
2165 2165 if base == nullrev:
2166 2166 return True
2167 2167 p1 = entry[5]
2168 2168 while self.length(p1) == 0:
2169 2169 b = self.deltaparent(p1)
2170 2170 if b == p1:
2171 2171 break
2172 2172 p1 = b
2173 2173 p2 = entry[6]
2174 2174 while self.length(p2) == 0:
2175 2175 b = self.deltaparent(p2)
2176 2176 if b == p2:
2177 2177 break
2178 2178 p2 = b
2179 2179 if base == p1 or base == p2:
2180 2180 return False
2181 2181 return self.issnapshot(base)
2182 2182
2183 2183 def snapshotdepth(self, rev):
2184 2184 """number of snapshot in the chain before this one"""
2185 2185 if not self.issnapshot(rev):
2186 2186 raise error.ProgrammingError(b'revision %d not a snapshot')
2187 2187 return len(self._deltachain(rev)[0]) - 1
2188 2188
2189 2189 def revdiff(self, rev1, rev2):
2190 2190 """return or calculate a delta between two revisions
2191 2191
2192 2192 The delta calculated is in binary form and is intended to be written to
2193 2193 revlog data directly. So this function needs raw revision data.
2194 2194 """
2195 2195 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2196 2196 return bytes(self._chunk(rev2))
2197 2197
2198 2198 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2199 2199
2200 2200 def revision(self, nodeorrev):
2201 2201 """return an uncompressed revision of a given node or revision
2202 2202 number.
2203 2203 """
2204 2204 return self._revisiondata(nodeorrev)
2205 2205
2206 2206 def sidedata(self, nodeorrev):
2207 2207 """a map of extra data related to the changeset but not part of the hash
2208 2208
2209 2209 This function currently return a dictionary. However, more advanced
2210 2210 mapping object will likely be used in the future for a more
2211 2211 efficient/lazy code.
2212 2212 """
2213 2213 # deal with <nodeorrev> argument type
2214 2214 if isinstance(nodeorrev, int):
2215 2215 rev = nodeorrev
2216 2216 else:
2217 2217 rev = self.rev(nodeorrev)
2218 2218 return self._sidedata(rev)
2219 2219
2220 2220 def _revisiondata(self, nodeorrev, raw=False):
2221 2221 # deal with <nodeorrev> argument type
2222 2222 if isinstance(nodeorrev, int):
2223 2223 rev = nodeorrev
2224 2224 node = self.node(rev)
2225 2225 else:
2226 2226 node = nodeorrev
2227 2227 rev = None
2228 2228
2229 2229 # fast path the special `nullid` rev
2230 2230 if node == self.nullid:
2231 2231 return b""
2232 2232
2233 2233 # ``rawtext`` is the text as stored inside the revlog. Might be the
2234 2234 # revision or might need to be processed to retrieve the revision.
2235 2235 rev, rawtext, validated = self._rawtext(node, rev)
2236 2236
2237 2237 if raw and validated:
2238 2238 # if we don't want to process the raw text and that raw
2239 2239 # text is cached, we can exit early.
2240 2240 return rawtext
2241 2241 if rev is None:
2242 2242 rev = self.rev(node)
2243 2243 # the revlog's flag for this revision
2244 2244 # (usually alter its state or content)
2245 2245 flags = self.flags(rev)
2246 2246
2247 2247 if validated and flags == REVIDX_DEFAULT_FLAGS:
2248 2248 # no extra flags set, no flag processor runs, text = rawtext
2249 2249 return rawtext
2250 2250
2251 2251 if raw:
2252 2252 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2253 2253 text = rawtext
2254 2254 else:
2255 2255 r = flagutil.processflagsread(self, rawtext, flags)
2256 2256 text, validatehash = r
2257 2257 if validatehash:
2258 2258 self.checkhash(text, node, rev=rev)
2259 2259 if not validated:
2260 2260 self._revisioncache = (node, rev, rawtext)
2261 2261
2262 2262 return text
2263 2263
2264 2264 def _rawtext(self, node, rev):
2265 2265 """return the possibly unvalidated rawtext for a revision
2266 2266
2267 2267 returns (rev, rawtext, validated)
2268 2268 """
2269 2269
2270 2270 # revision in the cache (could be useful to apply delta)
2271 2271 cachedrev = None
2272 2272 # An intermediate text to apply deltas to
2273 2273 basetext = None
2274 2274
2275 2275 # Check if we have the entry in cache
2276 2276 # The cache entry looks like (node, rev, rawtext)
2277 2277 if self._revisioncache:
2278 2278 if self._revisioncache[0] == node:
2279 2279 return (rev, self._revisioncache[2], True)
2280 2280 cachedrev = self._revisioncache[1]
2281 2281
2282 2282 if rev is None:
2283 2283 rev = self.rev(node)
2284 2284
2285 2285 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2286 2286 if stopped:
2287 2287 basetext = self._revisioncache[2]
2288 2288
2289 2289 # drop cache to save memory, the caller is expected to
2290 2290 # update self._revisioncache after validating the text
2291 2291 self._revisioncache = None
2292 2292
2293 2293 targetsize = None
2294 2294 rawsize = self.index[rev][2]
2295 2295 if 0 <= rawsize:
2296 2296 targetsize = 4 * rawsize
2297 2297
2298 2298 bins = self._chunks(chain, targetsize=targetsize)
2299 2299 if basetext is None:
2300 2300 basetext = bytes(bins[0])
2301 2301 bins = bins[1:]
2302 2302
2303 2303 rawtext = mdiff.patches(basetext, bins)
2304 2304 del basetext # let us have a chance to free memory early
2305 2305 return (rev, rawtext, False)
2306 2306
2307 2307 def _sidedata(self, rev):
2308 2308 """Return the sidedata for a given revision number."""
2309 2309 index_entry = self.index[rev]
2310 2310 sidedata_offset = index_entry[8]
2311 2311 sidedata_size = index_entry[9]
2312 2312
2313 2313 if self._inline:
2314 2314 sidedata_offset += self.index.entry_size * (1 + rev)
2315 2315 if sidedata_size == 0:
2316 2316 return {}
2317 2317
2318 2318 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2319 2319 filename = self._sidedatafile
2320 2320 end = self._docket.sidedata_end
2321 2321 offset = sidedata_offset
2322 2322 length = sidedata_size
2323 2323 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2324 2324 raise error.RevlogError(m)
2325 2325
2326 2326 comp_segment = self._segmentfile_sidedata.read_chunk(
2327 2327 sidedata_offset, sidedata_size
2328 2328 )
2329 2329
2330 2330 comp = self.index[rev][11]
2331 2331 if comp == COMP_MODE_PLAIN:
2332 2332 segment = comp_segment
2333 2333 elif comp == COMP_MODE_DEFAULT:
2334 2334 segment = self._decompressor(comp_segment)
2335 2335 elif comp == COMP_MODE_INLINE:
2336 2336 segment = self.decompress(comp_segment)
2337 2337 else:
2338 2338 msg = b'unknown compression mode %d'
2339 2339 msg %= comp
2340 2340 raise error.RevlogError(msg)
2341 2341
2342 2342 sidedata = sidedatautil.deserialize_sidedata(segment)
2343 2343 return sidedata
2344 2344
2345 2345 def rawdata(self, nodeorrev):
2346 2346 """return an uncompressed raw data of a given node or revision number."""
2347 2347 return self._revisiondata(nodeorrev, raw=True)
2348 2348
2349 2349 def hash(self, text, p1, p2):
2350 2350 """Compute a node hash.
2351 2351
2352 2352 Available as a function so that subclasses can replace the hash
2353 2353 as needed.
2354 2354 """
2355 2355 return storageutil.hashrevisionsha1(text, p1, p2)
2356 2356
2357 2357 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2358 2358 """Check node hash integrity.
2359 2359
2360 2360 Available as a function so that subclasses can extend hash mismatch
2361 2361 behaviors as needed.
2362 2362 """
2363 2363 try:
2364 2364 if p1 is None and p2 is None:
2365 2365 p1, p2 = self.parents(node)
2366 2366 if node != self.hash(text, p1, p2):
2367 2367 # Clear the revision cache on hash failure. The revision cache
2368 2368 # only stores the raw revision and clearing the cache does have
2369 2369 # the side-effect that we won't have a cache hit when the raw
2370 2370 # revision data is accessed. But this case should be rare and
2371 2371 # it is extra work to teach the cache about the hash
2372 2372 # verification state.
2373 2373 if self._revisioncache and self._revisioncache[0] == node:
2374 2374 self._revisioncache = None
2375 2375
2376 2376 revornode = rev
2377 2377 if revornode is None:
2378 2378 revornode = templatefilters.short(hex(node))
2379 2379 raise error.RevlogError(
2380 2380 _(b"integrity check failed on %s:%s")
2381 2381 % (self.display_id, pycompat.bytestr(revornode))
2382 2382 )
2383 2383 except error.RevlogError:
2384 2384 if self.feature_config.censorable and storageutil.iscensoredtext(
2385 2385 text
2386 2386 ):
2387 2387 raise error.CensoredNodeError(self.display_id, node, text)
2388 2388 raise
2389 2389
2390 2390 @property
2391 2391 def _split_index_file(self):
2392 2392 """the path where to expect the index of an ongoing splitting operation
2393 2393
2394 2394 The file will only exist if a splitting operation is in progress, but
2395 2395 it is always expected at the same location."""
2396 2396 parts = self.radix.split(b'/')
2397 2397 if len(parts) > 1:
2398 2398 # adds a '-s' prefix to the ``data/` or `meta/` base
2399 2399 head = parts[0] + b'-s'
2400 2400 mids = parts[1:-1]
2401 2401 tail = parts[-1] + b'.i'
2402 2402 pieces = [head] + mids + [tail]
2403 2403 return b'/'.join(pieces)
2404 2404 else:
2405 2405 # the revlog is stored at the root of the store (changelog or
2406 2406 # manifest), no risk of collision.
2407 2407 return self.radix + b'.i.s'
2408 2408
2409 2409 def _enforceinlinesize(self, tr, side_write=True):
2410 2410 """Check if the revlog is too big for inline and convert if so.
2411 2411
2412 2412 This should be called after revisions are added to the revlog. If the
2413 2413 revlog has grown too large to be an inline revlog, it will convert it
2414 2414 to use multiple index and data files.
2415 2415 """
2416 2416 tiprev = len(self) - 1
2417 2417 total_size = self.start(tiprev) + self.length(tiprev)
2418 2418 if not self._inline or total_size < _maxinline:
2419 2419 return
2420 2420
2421 2421 troffset = tr.findoffset(self._indexfile)
2422 2422 if troffset is None:
2423 2423 raise error.RevlogError(
2424 2424 _(b"%s not found in the transaction") % self._indexfile
2425 2425 )
2426 2426 if troffset:
2427 2427 tr.addbackup(self._indexfile, for_offset=True)
2428 2428 tr.add(self._datafile, 0)
2429 2429
2430 2430 existing_handles = False
2431 2431 if self._writinghandles is not None:
2432 2432 existing_handles = True
2433 2433 fp = self._writinghandles[0]
2434 2434 fp.flush()
2435 2435 fp.close()
2436 2436 # We can't use the cached file handle after close(). So prevent
2437 2437 # its usage.
2438 2438 self._writinghandles = None
2439 2439 self._segmentfile.writing_handle = None
2440 2440 # No need to deal with sidedata writing handle as it is only
2441 2441 # relevant with revlog-v2 which is never inline, not reaching
2442 2442 # this code
2443 2443 if side_write:
2444 2444 old_index_file_path = self._indexfile
2445 2445 new_index_file_path = self._split_index_file
2446 2446 opener = self.opener
2447 2447 weak_self = weakref.ref(self)
2448 2448
2449 2449 # the "split" index replace the real index when the transaction is finalized
2450 2450 def finalize_callback(tr):
2451 2451 opener.rename(
2452 2452 new_index_file_path,
2453 2453 old_index_file_path,
2454 2454 checkambig=True,
2455 2455 )
2456 2456 maybe_self = weak_self()
2457 2457 if maybe_self is not None:
2458 2458 maybe_self._indexfile = old_index_file_path
2459 2459
2460 2460 def abort_callback(tr):
2461 2461 maybe_self = weak_self()
2462 2462 if maybe_self is not None:
2463 2463 maybe_self._indexfile = old_index_file_path
2464 2464
2465 2465 tr.registertmp(new_index_file_path)
2466 2466 if self.target[1] is not None:
2467 2467 callback_id = b'000-revlog-split-%d-%s' % self.target
2468 2468 else:
2469 2469 callback_id = b'000-revlog-split-%d' % self.target[0]
2470 2470 tr.addfinalize(callback_id, finalize_callback)
2471 2471 tr.addabort(callback_id, abort_callback)
2472 2472
2473 2473 new_dfh = self._datafp(b'w+')
2474 2474 new_dfh.truncate(0) # drop any potentially existing data
2475 2475 try:
2476 2476 with self.reading():
2477 2477 for r in self:
2478 2478 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2479 2479 new_dfh.flush()
2480 2480
2481 2481 if side_write:
2482 2482 self._indexfile = new_index_file_path
2483 2483 with self.__index_new_fp() as fp:
2484 2484 self._format_flags &= ~FLAG_INLINE_DATA
2485 2485 self._inline = False
2486 2486 for i in self:
2487 2487 e = self.index.entry_binary(i)
2488 2488 if i == 0 and self._docket is None:
2489 2489 header = self._format_flags | self._format_version
2490 2490 header = self.index.pack_header(header)
2491 2491 e = header + e
2492 2492 fp.write(e)
2493 2493 if self._docket is not None:
2494 2494 self._docket.index_end = fp.tell()
2495 2495
2496 2496 # If we don't use side-write, the temp file replace the real
2497 2497 # index when we exit the context manager
2498 2498
2499 2499 nodemaputil.setup_persistent_nodemap(tr, self)
2500 2500 self._segmentfile = randomaccessfile.randomaccessfile(
2501 2501 self.opener,
2502 2502 self._datafile,
2503 2503 self.data_config.chunk_cache_size,
2504 2504 )
2505 2505
2506 2506 if existing_handles:
2507 2507 # switched from inline to conventional reopen the index
2508 2508 ifh = self.__index_write_fp()
2509 2509 self._writinghandles = (ifh, new_dfh, None)
2510 2510 self._segmentfile.writing_handle = new_dfh
2511 2511 new_dfh = None
2512 2512 # No need to deal with sidedata writing handle as it is only
2513 2513 # relevant with revlog-v2 which is never inline, not reaching
2514 2514 # this code
2515 2515 finally:
2516 2516 if new_dfh is not None:
2517 2517 new_dfh.close()
2518 2518
2519 2519 def _nodeduplicatecallback(self, transaction, node):
2520 2520 """called when trying to add a node already stored."""
2521 2521
2522 2522 @contextlib.contextmanager
2523 2523 def reading(self):
2524 2524 """Context manager that keeps data and sidedata files open for reading"""
2525 2525 if len(self.index) == 0:
2526 2526 yield # nothing to be read
2527 2527 else:
2528 2528 with self._segmentfile.reading():
2529 2529 with self._segmentfile_sidedata.reading():
2530 2530 yield
2531 2531
2532 2532 @contextlib.contextmanager
2533 2533 def _writing(self, transaction):
2534 2534 if self._trypending:
2535 2535 msg = b'try to write in a `trypending` revlog: %s'
2536 2536 msg %= self.display_id
2537 2537 raise error.ProgrammingError(msg)
2538 2538 if self._writinghandles is not None:
2539 2539 yield
2540 2540 else:
2541 2541 ifh = dfh = sdfh = None
2542 2542 try:
2543 2543 r = len(self)
2544 2544 # opening the data file.
2545 2545 dsize = 0
2546 2546 if r:
2547 2547 dsize = self.end(r - 1)
2548 2548 dfh = None
2549 2549 if not self._inline:
2550 2550 try:
2551 2551 dfh = self._datafp(b"r+")
2552 2552 if self._docket is None:
2553 2553 dfh.seek(0, os.SEEK_END)
2554 2554 else:
2555 2555 dfh.seek(self._docket.data_end, os.SEEK_SET)
2556 2556 except FileNotFoundError:
2557 2557 dfh = self._datafp(b"w+")
2558 2558 transaction.add(self._datafile, dsize)
2559 2559 if self._sidedatafile is not None:
2560 2560 # revlog-v2 does not inline, help Pytype
2561 2561 assert dfh is not None
2562 2562 try:
2563 2563 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2564 2564 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2565 2565 except FileNotFoundError:
2566 2566 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2567 2567 transaction.add(
2568 2568 self._sidedatafile, self._docket.sidedata_end
2569 2569 )
2570 2570
2571 2571 # opening the index file.
2572 2572 isize = r * self.index.entry_size
2573 2573 ifh = self.__index_write_fp()
2574 2574 if self._inline:
2575 2575 transaction.add(self._indexfile, dsize + isize)
2576 2576 else:
2577 2577 transaction.add(self._indexfile, isize)
2578 2578 # exposing all file handle for writing.
2579 2579 self._writinghandles = (ifh, dfh, sdfh)
2580 2580 self._segmentfile.writing_handle = ifh if self._inline else dfh
2581 2581 self._segmentfile_sidedata.writing_handle = sdfh
2582 2582 yield
2583 2583 if self._docket is not None:
2584 2584 self._write_docket(transaction)
2585 2585 finally:
2586 2586 self._writinghandles = None
2587 2587 self._segmentfile.writing_handle = None
2588 2588 self._segmentfile_sidedata.writing_handle = None
2589 2589 if dfh is not None:
2590 2590 dfh.close()
2591 2591 if sdfh is not None:
2592 2592 sdfh.close()
2593 2593 # closing the index file last to avoid exposing referent to
2594 2594 # potential unflushed data content.
2595 2595 if ifh is not None:
2596 2596 ifh.close()
2597 2597
2598 2598 def _write_docket(self, transaction):
2599 2599 """write the current docket on disk
2600 2600
2601 2601 Exist as a method to help changelog to implement transaction logic
2602 2602
2603 2603 We could also imagine using the same transaction logic for all revlog
2604 2604 since docket are cheap."""
2605 2605 self._docket.write(transaction)
2606 2606
2607 2607 def addrevision(
2608 2608 self,
2609 2609 text,
2610 2610 transaction,
2611 2611 link,
2612 2612 p1,
2613 2613 p2,
2614 2614 cachedelta=None,
2615 2615 node=None,
2616 2616 flags=REVIDX_DEFAULT_FLAGS,
2617 2617 deltacomputer=None,
2618 2618 sidedata=None,
2619 2619 ):
2620 2620 """add a revision to the log
2621 2621
2622 2622 text - the revision data to add
2623 2623 transaction - the transaction object used for rollback
2624 2624 link - the linkrev data to add
2625 2625 p1, p2 - the parent nodeids of the revision
2626 2626 cachedelta - an optional precomputed delta
2627 2627 node - nodeid of revision; typically node is not specified, and it is
2628 2628 computed by default as hash(text, p1, p2), however subclasses might
2629 2629 use different hashing method (and override checkhash() in such case)
2630 2630 flags - the known flags to set on the revision
2631 2631 deltacomputer - an optional deltacomputer instance shared between
2632 2632 multiple calls
2633 2633 """
2634 2634 if link == nullrev:
2635 2635 raise error.RevlogError(
2636 2636 _(b"attempted to add linkrev -1 to %s") % self.display_id
2637 2637 )
2638 2638
2639 2639 if sidedata is None:
2640 2640 sidedata = {}
2641 2641 elif sidedata and not self.feature_config.has_side_data:
2642 2642 raise error.ProgrammingError(
2643 2643 _(b"trying to add sidedata to a revlog who don't support them")
2644 2644 )
2645 2645
2646 2646 if flags:
2647 2647 node = node or self.hash(text, p1, p2)
2648 2648
2649 2649 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2650 2650
2651 2651 # If the flag processor modifies the revision data, ignore any provided
2652 2652 # cachedelta.
2653 2653 if rawtext != text:
2654 2654 cachedelta = None
2655 2655
2656 2656 if len(rawtext) > _maxentrysize:
2657 2657 raise error.RevlogError(
2658 2658 _(
2659 2659 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2660 2660 )
2661 2661 % (self.display_id, len(rawtext))
2662 2662 )
2663 2663
2664 2664 node = node or self.hash(rawtext, p1, p2)
2665 2665 rev = self.index.get_rev(node)
2666 2666 if rev is not None:
2667 2667 return rev
2668 2668
2669 2669 if validatehash:
2670 2670 self.checkhash(rawtext, node, p1=p1, p2=p2)
2671 2671
2672 2672 return self.addrawrevision(
2673 2673 rawtext,
2674 2674 transaction,
2675 2675 link,
2676 2676 p1,
2677 2677 p2,
2678 2678 node,
2679 2679 flags,
2680 2680 cachedelta=cachedelta,
2681 2681 deltacomputer=deltacomputer,
2682 2682 sidedata=sidedata,
2683 2683 )
2684 2684
2685 2685 def addrawrevision(
2686 2686 self,
2687 2687 rawtext,
2688 2688 transaction,
2689 2689 link,
2690 2690 p1,
2691 2691 p2,
2692 2692 node,
2693 2693 flags,
2694 2694 cachedelta=None,
2695 2695 deltacomputer=None,
2696 2696 sidedata=None,
2697 2697 ):
2698 2698 """add a raw revision with known flags, node and parents
2699 2699 useful when reusing a revision not stored in this revlog (ex: received
2700 2700 over wire, or read from an external bundle).
2701 2701 """
2702 2702 with self._writing(transaction):
2703 2703 return self._addrevision(
2704 2704 node,
2705 2705 rawtext,
2706 2706 transaction,
2707 2707 link,
2708 2708 p1,
2709 2709 p2,
2710 2710 flags,
2711 2711 cachedelta,
2712 2712 deltacomputer=deltacomputer,
2713 2713 sidedata=sidedata,
2714 2714 )
2715 2715
2716 2716 def compress(self, data):
2717 2717 """Generate a possibly-compressed representation of data."""
2718 2718 if not data:
2719 2719 return b'', data
2720 2720
2721 2721 compressed = self._compressor.compress(data)
2722 2722
2723 2723 if compressed:
2724 2724 # The revlog compressor added the header in the returned data.
2725 2725 return b'', compressed
2726 2726
2727 2727 if data[0:1] == b'\0':
2728 2728 return b'', data
2729 2729 return b'u', data
2730 2730
2731 2731 def decompress(self, data):
2732 2732 """Decompress a revlog chunk.
2733 2733
2734 2734 The chunk is expected to begin with a header identifying the
2735 2735 format type so it can be routed to an appropriate decompressor.
2736 2736 """
2737 2737 if not data:
2738 2738 return data
2739 2739
2740 2740 # Revlogs are read much more frequently than they are written and many
2741 2741 # chunks only take microseconds to decompress, so performance is
2742 2742 # important here.
2743 2743 #
2744 2744 # We can make a few assumptions about revlogs:
2745 2745 #
2746 2746 # 1) the majority of chunks will be compressed (as opposed to inline
2747 2747 # raw data).
2748 2748 # 2) decompressing *any* data will likely by at least 10x slower than
2749 2749 # returning raw inline data.
2750 2750 # 3) we want to prioritize common and officially supported compression
2751 2751 # engines
2752 2752 #
2753 2753 # It follows that we want to optimize for "decompress compressed data
2754 2754 # when encoded with common and officially supported compression engines"
2755 2755 # case over "raw data" and "data encoded by less common or non-official
2756 2756 # compression engines." That is why we have the inline lookup first
2757 2757 # followed by the compengines lookup.
2758 2758 #
2759 2759 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2760 2760 # compressed chunks. And this matters for changelog and manifest reads.
2761 2761 t = data[0:1]
2762 2762
2763 2763 if t == b'x':
2764 2764 try:
2765 2765 return _zlibdecompress(data)
2766 2766 except zlib.error as e:
2767 2767 raise error.RevlogError(
2768 2768 _(b'revlog decompress error: %s')
2769 2769 % stringutil.forcebytestr(e)
2770 2770 )
2771 2771 # '\0' is more common than 'u' so it goes first.
2772 2772 elif t == b'\0':
2773 2773 return data
2774 2774 elif t == b'u':
2775 2775 return util.buffer(data, 1)
2776 2776
2777 2777 compressor = self._get_decompressor(t)
2778 2778
2779 2779 return compressor.decompress(data)
2780 2780
2781 2781 def _addrevision(
2782 2782 self,
2783 2783 node,
2784 2784 rawtext,
2785 2785 transaction,
2786 2786 link,
2787 2787 p1,
2788 2788 p2,
2789 2789 flags,
2790 2790 cachedelta,
2791 2791 alwayscache=False,
2792 2792 deltacomputer=None,
2793 2793 sidedata=None,
2794 2794 ):
2795 2795 """internal function to add revisions to the log
2796 2796
2797 2797 see addrevision for argument descriptions.
2798 2798
2799 2799 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2800 2800
2801 2801 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2802 2802 be used.
2803 2803
2804 2804 invariants:
2805 2805 - rawtext is optional (can be None); if not set, cachedelta must be set.
2806 2806 if both are set, they must correspond to each other.
2807 2807 """
2808 2808 if node == self.nullid:
2809 2809 raise error.RevlogError(
2810 2810 _(b"%s: attempt to add null revision") % self.display_id
2811 2811 )
2812 2812 if (
2813 2813 node == self.nodeconstants.wdirid
2814 2814 or node in self.nodeconstants.wdirfilenodeids
2815 2815 ):
2816 2816 raise error.RevlogError(
2817 2817 _(b"%s: attempt to add wdir revision") % self.display_id
2818 2818 )
2819 2819 if self._writinghandles is None:
2820 2820 msg = b'adding revision outside `revlog._writing` context'
2821 2821 raise error.ProgrammingError(msg)
2822 2822
2823 2823 btext = [rawtext]
2824 2824
2825 2825 curr = len(self)
2826 2826 prev = curr - 1
2827 2827
2828 2828 offset = self._get_data_offset(prev)
2829 2829
2830 2830 if self._concurrencychecker:
2831 2831 ifh, dfh, sdfh = self._writinghandles
2832 2832 # XXX no checking for the sidedata file
2833 2833 if self._inline:
2834 2834 # offset is "as if" it were in the .d file, so we need to add on
2835 2835 # the size of the entry metadata.
2836 2836 self._concurrencychecker(
2837 2837 ifh, self._indexfile, offset + curr * self.index.entry_size
2838 2838 )
2839 2839 else:
2840 2840 # Entries in the .i are a consistent size.
2841 2841 self._concurrencychecker(
2842 2842 ifh, self._indexfile, curr * self.index.entry_size
2843 2843 )
2844 2844 self._concurrencychecker(dfh, self._datafile, offset)
2845 2845
2846 2846 p1r, p2r = self.rev(p1), self.rev(p2)
2847 2847
2848 2848 # full versions are inserted when the needed deltas
2849 2849 # become comparable to the uncompressed text
2850 2850 if rawtext is None:
2851 2851 # need rawtext size, before changed by flag processors, which is
2852 2852 # the non-raw size. use revlog explicitly to avoid filelog's extra
2853 2853 # logic that might remove metadata size.
2854 2854 textlen = mdiff.patchedsize(
2855 2855 revlog.size(self, cachedelta[0]), cachedelta[1]
2856 2856 )
2857 2857 else:
2858 2858 textlen = len(rawtext)
2859 2859
2860 2860 if deltacomputer is None:
2861 2861 write_debug = None
2862 2862 if self.delta_config.debug_delta:
2863 2863 write_debug = transaction._report
2864 2864 deltacomputer = deltautil.deltacomputer(
2865 2865 self, write_debug=write_debug
2866 2866 )
2867 2867
2868 2868 if cachedelta is not None and len(cachedelta) == 2:
2869 2869 # If the cached delta has no information about how it should be
2870 2870 # reused, add the default reuse instruction according to the
2871 2871 # revlog's configuration.
2872 2872 if (
2873 2873 self.delta_config.general_delta
2874 2874 and self.delta_config.lazy_delta_base
2875 2875 ):
2876 2876 delta_base_reuse = DELTA_BASE_REUSE_TRY
2877 2877 else:
2878 2878 delta_base_reuse = DELTA_BASE_REUSE_NO
2879 2879 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2880 2880
2881 2881 revinfo = revlogutils.revisioninfo(
2882 2882 node,
2883 2883 p1,
2884 2884 p2,
2885 2885 btext,
2886 2886 textlen,
2887 2887 cachedelta,
2888 2888 flags,
2889 2889 )
2890 2890
2891 2891 deltainfo = deltacomputer.finddeltainfo(revinfo)
2892 2892
2893 2893 compression_mode = COMP_MODE_INLINE
2894 2894 if self._docket is not None:
2895 2895 default_comp = self._docket.default_compression_header
2896 2896 r = deltautil.delta_compression(default_comp, deltainfo)
2897 2897 compression_mode, deltainfo = r
2898 2898
2899 2899 sidedata_compression_mode = COMP_MODE_INLINE
2900 2900 if sidedata and self.feature_config.has_side_data:
2901 2901 sidedata_compression_mode = COMP_MODE_PLAIN
2902 2902 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2903 2903 sidedata_offset = self._docket.sidedata_end
2904 2904 h, comp_sidedata = self.compress(serialized_sidedata)
2905 2905 if (
2906 2906 h != b'u'
2907 2907 and comp_sidedata[0:1] != b'\0'
2908 2908 and len(comp_sidedata) < len(serialized_sidedata)
2909 2909 ):
2910 2910 assert not h
2911 2911 if (
2912 2912 comp_sidedata[0:1]
2913 2913 == self._docket.default_compression_header
2914 2914 ):
2915 2915 sidedata_compression_mode = COMP_MODE_DEFAULT
2916 2916 serialized_sidedata = comp_sidedata
2917 2917 else:
2918 2918 sidedata_compression_mode = COMP_MODE_INLINE
2919 2919 serialized_sidedata = comp_sidedata
2920 2920 else:
2921 2921 serialized_sidedata = b""
2922 2922 # Don't store the offset if the sidedata is empty, that way
2923 2923 # we can easily detect empty sidedata and they will be no different
2924 2924 # than ones we manually add.
2925 2925 sidedata_offset = 0
2926 2926
2927 2927 rank = RANK_UNKNOWN
2928 2928 if self.feature_config.compute_rank:
2929 2929 if (p1r, p2r) == (nullrev, nullrev):
2930 2930 rank = 1
2931 2931 elif p1r != nullrev and p2r == nullrev:
2932 2932 rank = 1 + self.fast_rank(p1r)
2933 2933 elif p1r == nullrev and p2r != nullrev:
2934 2934 rank = 1 + self.fast_rank(p2r)
2935 2935 else: # merge node
2936 2936 if rustdagop is not None and self.index.rust_ext_compat:
2937 2937 rank = rustdagop.rank(self.index, p1r, p2r)
2938 2938 else:
2939 2939 pmin, pmax = sorted((p1r, p2r))
2940 2940 rank = 1 + self.fast_rank(pmax)
2941 2941 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2942 2942
2943 2943 e = revlogutils.entry(
2944 2944 flags=flags,
2945 2945 data_offset=offset,
2946 2946 data_compressed_length=deltainfo.deltalen,
2947 2947 data_uncompressed_length=textlen,
2948 2948 data_compression_mode=compression_mode,
2949 2949 data_delta_base=deltainfo.base,
2950 2950 link_rev=link,
2951 2951 parent_rev_1=p1r,
2952 2952 parent_rev_2=p2r,
2953 2953 node_id=node,
2954 2954 sidedata_offset=sidedata_offset,
2955 2955 sidedata_compressed_length=len(serialized_sidedata),
2956 2956 sidedata_compression_mode=sidedata_compression_mode,
2957 2957 rank=rank,
2958 2958 )
2959 2959
2960 2960 self.index.append(e)
2961 2961 entry = self.index.entry_binary(curr)
2962 2962 if curr == 0 and self._docket is None:
2963 2963 header = self._format_flags | self._format_version
2964 2964 header = self.index.pack_header(header)
2965 2965 entry = header + entry
2966 2966 self._writeentry(
2967 2967 transaction,
2968 2968 entry,
2969 2969 deltainfo.data,
2970 2970 link,
2971 2971 offset,
2972 2972 serialized_sidedata,
2973 2973 sidedata_offset,
2974 2974 )
2975 2975
2976 2976 rawtext = btext[0]
2977 2977
2978 2978 if alwayscache and rawtext is None:
2979 2979 rawtext = deltacomputer.buildtext(revinfo)
2980 2980
2981 2981 if type(rawtext) == bytes: # only accept immutable objects
2982 2982 self._revisioncache = (node, curr, rawtext)
2983 2983 self._chainbasecache[curr] = deltainfo.chainbase
2984 2984 return curr
2985 2985
2986 2986 def _get_data_offset(self, prev):
2987 2987 """Returns the current offset in the (in-transaction) data file.
2988 2988 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2989 2989 file to store that information: since sidedata can be rewritten to the
2990 2990 end of the data file within a transaction, you can have cases where, for
2991 2991 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2992 2992 to `n - 1`'s sidedata being written after `n`'s data.
2993 2993
2994 2994 TODO cache this in a docket file before getting out of experimental."""
2995 2995 if self._docket is None:
2996 2996 return self.end(prev)
2997 2997 else:
2998 2998 return self._docket.data_end
2999 2999
3000 3000 def _writeentry(
3001 3001 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3002 3002 ):
3003 3003 # Files opened in a+ mode have inconsistent behavior on various
3004 3004 # platforms. Windows requires that a file positioning call be made
3005 3005 # when the file handle transitions between reads and writes. See
3006 3006 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3007 3007 # platforms, Python or the platform itself can be buggy. Some versions
3008 3008 # of Solaris have been observed to not append at the end of the file
3009 3009 # if the file was seeked to before the end. See issue4943 for more.
3010 3010 #
3011 3011 # We work around this issue by inserting a seek() before writing.
3012 3012 # Note: This is likely not necessary on Python 3. However, because
3013 3013 # the file handle is reused for reads and may be seeked there, we need
3014 3014 # to be careful before changing this.
3015 3015 if self._writinghandles is None:
3016 3016 msg = b'adding revision outside `revlog._writing` context'
3017 3017 raise error.ProgrammingError(msg)
3018 3018 ifh, dfh, sdfh = self._writinghandles
3019 3019 if self._docket is None:
3020 3020 ifh.seek(0, os.SEEK_END)
3021 3021 else:
3022 3022 ifh.seek(self._docket.index_end, os.SEEK_SET)
3023 3023 if dfh:
3024 3024 if self._docket is None:
3025 3025 dfh.seek(0, os.SEEK_END)
3026 3026 else:
3027 3027 dfh.seek(self._docket.data_end, os.SEEK_SET)
3028 3028 if sdfh:
3029 3029 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3030 3030
3031 3031 curr = len(self) - 1
3032 3032 if not self._inline:
3033 3033 transaction.add(self._datafile, offset)
3034 3034 if self._sidedatafile:
3035 3035 transaction.add(self._sidedatafile, sidedata_offset)
3036 3036 transaction.add(self._indexfile, curr * len(entry))
3037 3037 if data[0]:
3038 3038 dfh.write(data[0])
3039 3039 dfh.write(data[1])
3040 3040 if sidedata:
3041 3041 sdfh.write(sidedata)
3042 3042 ifh.write(entry)
3043 3043 else:
3044 3044 offset += curr * self.index.entry_size
3045 3045 transaction.add(self._indexfile, offset)
3046 3046 ifh.write(entry)
3047 3047 ifh.write(data[0])
3048 3048 ifh.write(data[1])
3049 3049 assert not sidedata
3050 3050 self._enforceinlinesize(transaction)
3051 3051 if self._docket is not None:
3052 3052 # revlog-v2 always has 3 writing handles, help Pytype
3053 3053 wh1 = self._writinghandles[0]
3054 3054 wh2 = self._writinghandles[1]
3055 3055 wh3 = self._writinghandles[2]
3056 3056 assert wh1 is not None
3057 3057 assert wh2 is not None
3058 3058 assert wh3 is not None
3059 3059 self._docket.index_end = wh1.tell()
3060 3060 self._docket.data_end = wh2.tell()
3061 3061 self._docket.sidedata_end = wh3.tell()
3062 3062
3063 3063 nodemaputil.setup_persistent_nodemap(transaction, self)
3064 3064
3065 3065 def addgroup(
3066 3066 self,
3067 3067 deltas,
3068 3068 linkmapper,
3069 3069 transaction,
3070 3070 alwayscache=False,
3071 3071 addrevisioncb=None,
3072 3072 duplicaterevisioncb=None,
3073 3073 debug_info=None,
3074 3074 delta_base_reuse_policy=None,
3075 3075 ):
3076 3076 """
3077 3077 add a delta group
3078 3078
3079 3079 given a set of deltas, add them to the revision log. the
3080 3080 first delta is against its parent, which should be in our
3081 3081 log, the rest are against the previous delta.
3082 3082
3083 3083 If ``addrevisioncb`` is defined, it will be called with arguments of
3084 3084 this revlog and the node that was added.
3085 3085 """
3086 3086
3087 3087 if self._adding_group:
3088 3088 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3089 3089
3090 3090 # read the default delta-base reuse policy from revlog config if the
3091 3091 # group did not specify one.
3092 3092 if delta_base_reuse_policy is None:
3093 3093 if (
3094 3094 self.delta_config.general_delta
3095 3095 and self.delta_config.lazy_delta_base
3096 3096 ):
3097 3097 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3098 3098 else:
3099 3099 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3100 3100
3101 3101 self._adding_group = True
3102 3102 empty = True
3103 3103 try:
3104 3104 with self._writing(transaction):
3105 3105 write_debug = None
3106 3106 if self.delta_config.debug_delta:
3107 3107 write_debug = transaction._report
3108 3108 deltacomputer = deltautil.deltacomputer(
3109 3109 self,
3110 3110 write_debug=write_debug,
3111 3111 debug_info=debug_info,
3112 3112 )
3113 3113 # loop through our set of deltas
3114 3114 for data in deltas:
3115 3115 (
3116 3116 node,
3117 3117 p1,
3118 3118 p2,
3119 3119 linknode,
3120 3120 deltabase,
3121 3121 delta,
3122 3122 flags,
3123 3123 sidedata,
3124 3124 ) = data
3125 3125 link = linkmapper(linknode)
3126 3126 flags = flags or REVIDX_DEFAULT_FLAGS
3127 3127
3128 3128 rev = self.index.get_rev(node)
3129 3129 if rev is not None:
3130 3130 # this can happen if two branches make the same change
3131 3131 self._nodeduplicatecallback(transaction, rev)
3132 3132 if duplicaterevisioncb:
3133 3133 duplicaterevisioncb(self, rev)
3134 3134 empty = False
3135 3135 continue
3136 3136
3137 3137 for p in (p1, p2):
3138 3138 if not self.index.has_node(p):
3139 3139 raise error.LookupError(
3140 3140 p, self.radix, _(b'unknown parent')
3141 3141 )
3142 3142
3143 3143 if not self.index.has_node(deltabase):
3144 3144 raise error.LookupError(
3145 3145 deltabase, self.display_id, _(b'unknown delta base')
3146 3146 )
3147 3147
3148 3148 baserev = self.rev(deltabase)
3149 3149
3150 3150 if baserev != nullrev and self.iscensored(baserev):
3151 3151 # if base is censored, delta must be full replacement in a
3152 3152 # single patch operation
3153 3153 hlen = struct.calcsize(b">lll")
3154 3154 oldlen = self.rawsize(baserev)
3155 3155 newlen = len(delta) - hlen
3156 3156 if delta[:hlen] != mdiff.replacediffheader(
3157 3157 oldlen, newlen
3158 3158 ):
3159 3159 raise error.CensoredBaseError(
3160 3160 self.display_id, self.node(baserev)
3161 3161 )
3162 3162
3163 3163 if not flags and self._peek_iscensored(baserev, delta):
3164 3164 flags |= REVIDX_ISCENSORED
3165 3165
3166 3166 # We assume consumers of addrevisioncb will want to retrieve
3167 3167 # the added revision, which will require a call to
3168 3168 # revision(). revision() will fast path if there is a cache
3169 3169 # hit. So, we tell _addrevision() to always cache in this case.
3170 3170 # We're only using addgroup() in the context of changegroup
3171 3171 # generation so the revision data can always be handled as raw
3172 3172 # by the flagprocessor.
3173 3173 rev = self._addrevision(
3174 3174 node,
3175 3175 None,
3176 3176 transaction,
3177 3177 link,
3178 3178 p1,
3179 3179 p2,
3180 3180 flags,
3181 3181 (baserev, delta, delta_base_reuse_policy),
3182 3182 alwayscache=alwayscache,
3183 3183 deltacomputer=deltacomputer,
3184 3184 sidedata=sidedata,
3185 3185 )
3186 3186
3187 3187 if addrevisioncb:
3188 3188 addrevisioncb(self, rev)
3189 3189 empty = False
3190 3190 finally:
3191 3191 self._adding_group = False
3192 3192 return not empty
3193 3193
3194 3194 def iscensored(self, rev):
3195 3195 """Check if a file revision is censored."""
3196 3196 if not self.feature_config.censorable:
3197 3197 return False
3198 3198
3199 3199 return self.flags(rev) & REVIDX_ISCENSORED
3200 3200
3201 3201 def _peek_iscensored(self, baserev, delta):
3202 3202 """Quickly check if a delta produces a censored revision."""
3203 3203 if not self.feature_config.censorable:
3204 3204 return False
3205 3205
3206 3206 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3207 3207
3208 3208 def getstrippoint(self, minlink):
3209 3209 """find the minimum rev that must be stripped to strip the linkrev
3210 3210
3211 3211 Returns a tuple containing the minimum rev and a set of all revs that
3212 3212 have linkrevs that will be broken by this strip.
3213 3213 """
3214 3214 return storageutil.resolvestripinfo(
3215 3215 minlink,
3216 3216 len(self) - 1,
3217 3217 self.headrevs(),
3218 3218 self.linkrev,
3219 3219 self.parentrevs,
3220 3220 )
3221 3221
3222 3222 def strip(self, minlink, transaction):
3223 3223 """truncate the revlog on the first revision with a linkrev >= minlink
3224 3224
3225 3225 This function is called when we're stripping revision minlink and
3226 3226 its descendants from the repository.
3227 3227
3228 3228 We have to remove all revisions with linkrev >= minlink, because
3229 3229 the equivalent changelog revisions will be renumbered after the
3230 3230 strip.
3231 3231
3232 3232 So we truncate the revlog on the first of these revisions, and
3233 3233 trust that the caller has saved the revisions that shouldn't be
3234 3234 removed and that it'll re-add them after this truncation.
3235 3235 """
3236 3236 if len(self) == 0:
3237 3237 return
3238 3238
3239 3239 rev, _ = self.getstrippoint(minlink)
3240 3240 if rev == len(self):
3241 3241 return
3242 3242
3243 3243 # first truncate the files on disk
3244 3244 data_end = self.start(rev)
3245 3245 if not self._inline:
3246 3246 transaction.add(self._datafile, data_end)
3247 3247 end = rev * self.index.entry_size
3248 3248 else:
3249 3249 end = data_end + (rev * self.index.entry_size)
3250 3250
3251 3251 if self._sidedatafile:
3252 3252 sidedata_end = self.sidedata_cut_off(rev)
3253 3253 transaction.add(self._sidedatafile, sidedata_end)
3254 3254
3255 3255 transaction.add(self._indexfile, end)
3256 3256 if self._docket is not None:
3257 3257 # XXX we could, leverage the docket while stripping. However it is
3258 3258 # not powerfull enough at the time of this comment
3259 3259 self._docket.index_end = end
3260 3260 self._docket.data_end = data_end
3261 3261 self._docket.sidedata_end = sidedata_end
3262 3262 self._docket.write(transaction, stripping=True)
3263 3263
3264 3264 # then reset internal state in memory to forget those revisions
3265 3265 self._revisioncache = None
3266 3266 self._chaininfocache = util.lrucachedict(500)
3267 3267 self._segmentfile.clear_cache()
3268 3268 self._segmentfile_sidedata.clear_cache()
3269 3269
3270 3270 del self.index[rev:-1]
3271 3271
3272 3272 def checksize(self):
3273 3273 """Check size of index and data files
3274 3274
3275 3275 return a (dd, di) tuple.
3276 3276 - dd: extra bytes for the "data" file
3277 3277 - di: extra bytes for the "index" file
3278 3278
3279 3279 A healthy revlog will return (0, 0).
3280 3280 """
3281 3281 expected = 0
3282 3282 if len(self):
3283 3283 expected = max(0, self.end(len(self) - 1))
3284 3284
3285 3285 try:
3286 3286 with self._datafp() as f:
3287 3287 f.seek(0, io.SEEK_END)
3288 3288 actual = f.tell()
3289 3289 dd = actual - expected
3290 3290 except FileNotFoundError:
3291 3291 dd = 0
3292 3292
3293 3293 try:
3294 3294 f = self.opener(self._indexfile)
3295 3295 f.seek(0, io.SEEK_END)
3296 3296 actual = f.tell()
3297 3297 f.close()
3298 3298 s = self.index.entry_size
3299 3299 i = max(0, actual // s)
3300 3300 di = actual - (i * s)
3301 3301 if self._inline:
3302 3302 databytes = 0
3303 3303 for r in self:
3304 3304 databytes += max(0, self.length(r))
3305 3305 dd = 0
3306 3306 di = actual - len(self) * s - databytes
3307 3307 except FileNotFoundError:
3308 3308 di = 0
3309 3309
3310 3310 return (dd, di)
3311 3311
3312 3312 def files(self):
3313 3313 res = [self._indexfile]
3314 3314 if self._docket_file is None:
3315 3315 if not self._inline:
3316 3316 res.append(self._datafile)
3317 3317 else:
3318 3318 res.append(self._docket_file)
3319 3319 res.extend(self._docket.old_index_filepaths(include_empty=False))
3320 3320 if self._docket.data_end:
3321 3321 res.append(self._datafile)
3322 3322 res.extend(self._docket.old_data_filepaths(include_empty=False))
3323 3323 if self._docket.sidedata_end:
3324 3324 res.append(self._sidedatafile)
3325 3325 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3326 3326 return res
3327 3327
3328 3328 def emitrevisions(
3329 3329 self,
3330 3330 nodes,
3331 3331 nodesorder=None,
3332 3332 revisiondata=False,
3333 3333 assumehaveparentrevisions=False,
3334 3334 deltamode=repository.CG_DELTAMODE_STD,
3335 3335 sidedata_helpers=None,
3336 3336 debug_info=None,
3337 3337 ):
3338 3338 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3339 3339 raise error.ProgrammingError(
3340 3340 b'unhandled value for nodesorder: %s' % nodesorder
3341 3341 )
3342 3342
3343 3343 if nodesorder is None and not self.delta_config.general_delta:
3344 3344 nodesorder = b'storage'
3345 3345
3346 3346 if (
3347 3347 not self._storedeltachains
3348 3348 and deltamode != repository.CG_DELTAMODE_PREV
3349 3349 ):
3350 3350 deltamode = repository.CG_DELTAMODE_FULL
3351 3351
3352 3352 return storageutil.emitrevisions(
3353 3353 self,
3354 3354 nodes,
3355 3355 nodesorder,
3356 3356 revlogrevisiondelta,
3357 3357 deltaparentfn=self.deltaparent,
3358 3358 candeltafn=self._candelta,
3359 3359 rawsizefn=self.rawsize,
3360 3360 revdifffn=self.revdiff,
3361 3361 flagsfn=self.flags,
3362 3362 deltamode=deltamode,
3363 3363 revisiondata=revisiondata,
3364 3364 assumehaveparentrevisions=assumehaveparentrevisions,
3365 3365 sidedata_helpers=sidedata_helpers,
3366 3366 debug_info=debug_info,
3367 3367 )
3368 3368
3369 3369 DELTAREUSEALWAYS = b'always'
3370 3370 DELTAREUSESAMEREVS = b'samerevs'
3371 3371 DELTAREUSENEVER = b'never'
3372 3372
3373 3373 DELTAREUSEFULLADD = b'fulladd'
3374 3374
3375 3375 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3376 3376
3377 3377 def clone(
3378 3378 self,
3379 3379 tr,
3380 3380 destrevlog,
3381 3381 addrevisioncb=None,
3382 3382 deltareuse=DELTAREUSESAMEREVS,
3383 3383 forcedeltabothparents=None,
3384 3384 sidedata_helpers=None,
3385 3385 ):
3386 3386 """Copy this revlog to another, possibly with format changes.
3387 3387
3388 3388 The destination revlog will contain the same revisions and nodes.
3389 3389 However, it may not be bit-for-bit identical due to e.g. delta encoding
3390 3390 differences.
3391 3391
3392 3392 The ``deltareuse`` argument control how deltas from the existing revlog
3393 3393 are preserved in the destination revlog. The argument can have the
3394 3394 following values:
3395 3395
3396 3396 DELTAREUSEALWAYS
3397 3397 Deltas will always be reused (if possible), even if the destination
3398 3398 revlog would not select the same revisions for the delta. This is the
3399 3399 fastest mode of operation.
3400 3400 DELTAREUSESAMEREVS
3401 3401 Deltas will be reused if the destination revlog would pick the same
3402 3402 revisions for the delta. This mode strikes a balance between speed
3403 3403 and optimization.
3404 3404 DELTAREUSENEVER
3405 3405 Deltas will never be reused. This is the slowest mode of execution.
3406 3406 This mode can be used to recompute deltas (e.g. if the diff/delta
3407 3407 algorithm changes).
3408 3408 DELTAREUSEFULLADD
3409 3409 Revision will be re-added as if their were new content. This is
3410 3410 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3411 3411 eg: large file detection and handling.
3412 3412
3413 3413 Delta computation can be slow, so the choice of delta reuse policy can
3414 3414 significantly affect run time.
3415 3415
3416 3416 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3417 3417 two extremes. Deltas will be reused if they are appropriate. But if the
3418 3418 delta could choose a better revision, it will do so. This means if you
3419 3419 are converting a non-generaldelta revlog to a generaldelta revlog,
3420 3420 deltas will be recomputed if the delta's parent isn't a parent of the
3421 3421 revision.
3422 3422
3423 3423 In addition to the delta policy, the ``forcedeltabothparents``
3424 3424 argument controls whether to force compute deltas against both parents
3425 3425 for merges. By default, the current default is used.
3426 3426
3427 3427 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3428 3428 `sidedata_helpers`.
3429 3429 """
3430 3430 if deltareuse not in self.DELTAREUSEALL:
3431 3431 raise ValueError(
3432 3432 _(b'value for deltareuse invalid: %s') % deltareuse
3433 3433 )
3434 3434
3435 3435 if len(destrevlog):
3436 3436 raise ValueError(_(b'destination revlog is not empty'))
3437 3437
3438 3438 if getattr(self, 'filteredrevs', None):
3439 3439 raise ValueError(_(b'source revlog has filtered revisions'))
3440 3440 if getattr(destrevlog, 'filteredrevs', None):
3441 3441 raise ValueError(_(b'destination revlog has filtered revisions'))
3442 3442
3443 3443 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3444 3444 # if possible.
3445 3445 old_delta_config = destrevlog.delta_config
3446 3446 destrevlog.delta_config = destrevlog.delta_config.copy()
3447 3447
3448 3448 try:
3449 3449 if deltareuse == self.DELTAREUSEALWAYS:
3450 3450 destrevlog.delta_config.lazy_delta_base = True
3451 3451 destrevlog.delta_config.lazy_delta = True
3452 3452 elif deltareuse == self.DELTAREUSESAMEREVS:
3453 3453 destrevlog.delta_config.lazy_delta_base = False
3454 3454 destrevlog.delta_config.lazy_delta = True
3455 3455 elif deltareuse == self.DELTAREUSENEVER:
3456 3456 destrevlog.delta_config.lazy_delta_base = False
3457 3457 destrevlog.delta_config.lazy_delta = False
3458 3458
3459 3459 delta_both_parents = (
3460 3460 forcedeltabothparents or old_delta_config.delta_both_parents
3461 3461 )
3462 3462 destrevlog.delta_config.delta_both_parents = delta_both_parents
3463 3463
3464 3464 with self.reading():
3465 3465 self._clone(
3466 3466 tr,
3467 3467 destrevlog,
3468 3468 addrevisioncb,
3469 3469 deltareuse,
3470 3470 forcedeltabothparents,
3471 3471 sidedata_helpers,
3472 3472 )
3473 3473
3474 3474 finally:
3475 3475 destrevlog.delta_config = old_delta_config
3476 3476
3477 3477 def _clone(
3478 3478 self,
3479 3479 tr,
3480 3480 destrevlog,
3481 3481 addrevisioncb,
3482 3482 deltareuse,
3483 3483 forcedeltabothparents,
3484 3484 sidedata_helpers,
3485 3485 ):
3486 3486 """perform the core duty of `revlog.clone` after parameter processing"""
3487 3487 write_debug = None
3488 3488 if self.delta_config.debug_delta:
3489 3489 write_debug = tr._report
3490 3490 deltacomputer = deltautil.deltacomputer(
3491 3491 destrevlog,
3492 3492 write_debug=write_debug,
3493 3493 )
3494 3494 index = self.index
3495 3495 for rev in self:
3496 3496 entry = index[rev]
3497 3497
3498 3498 # Some classes override linkrev to take filtered revs into
3499 3499 # account. Use raw entry from index.
3500 3500 flags = entry[0] & 0xFFFF
3501 3501 linkrev = entry[4]
3502 3502 p1 = index[entry[5]][7]
3503 3503 p2 = index[entry[6]][7]
3504 3504 node = entry[7]
3505 3505
3506 3506 # (Possibly) reuse the delta from the revlog if allowed and
3507 3507 # the revlog chunk is a delta.
3508 3508 cachedelta = None
3509 3509 rawtext = None
3510 3510 if deltareuse == self.DELTAREUSEFULLADD:
3511 3511 text = self._revisiondata(rev)
3512 3512 sidedata = self.sidedata(rev)
3513 3513
3514 3514 if sidedata_helpers is not None:
3515 3515 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3516 3516 self, sidedata_helpers, sidedata, rev
3517 3517 )
3518 3518 flags = flags | new_flags[0] & ~new_flags[1]
3519 3519
3520 3520 destrevlog.addrevision(
3521 3521 text,
3522 3522 tr,
3523 3523 linkrev,
3524 3524 p1,
3525 3525 p2,
3526 3526 cachedelta=cachedelta,
3527 3527 node=node,
3528 3528 flags=flags,
3529 3529 deltacomputer=deltacomputer,
3530 3530 sidedata=sidedata,
3531 3531 )
3532 3532 else:
3533 3533 if destrevlog.delta_config.lazy_delta:
3534 3534 dp = self.deltaparent(rev)
3535 3535 if dp != nullrev:
3536 3536 cachedelta = (dp, bytes(self._chunk(rev)))
3537 3537
3538 3538 sidedata = None
3539 3539 if not cachedelta:
3540 rawtext = self._revisiondata(rev)
3540 try:
3541 rawtext = self._revisiondata(rev)
3542 except error.CensoredNodeError as censored:
3543 assert flags & REVIDX_ISCENSORED
3544 rawtext = censored.tombstone
3541 3545 sidedata = self.sidedata(rev)
3542 3546 if sidedata is None:
3543 3547 sidedata = self.sidedata(rev)
3544 3548
3545 3549 if sidedata_helpers is not None:
3546 3550 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3547 3551 self, sidedata_helpers, sidedata, rev
3548 3552 )
3549 3553 flags = flags | new_flags[0] & ~new_flags[1]
3550 3554
3551 3555 with destrevlog._writing(tr):
3552 3556 destrevlog._addrevision(
3553 3557 node,
3554 3558 rawtext,
3555 3559 tr,
3556 3560 linkrev,
3557 3561 p1,
3558 3562 p2,
3559 3563 flags,
3560 3564 cachedelta,
3561 3565 deltacomputer=deltacomputer,
3562 3566 sidedata=sidedata,
3563 3567 )
3564 3568
3565 3569 if addrevisioncb:
3566 3570 addrevisioncb(self, rev, node)
3567 3571
3568 3572 def censorrevision(self, tr, censornode, tombstone=b''):
3569 3573 if self._format_version == REVLOGV0:
3570 3574 raise error.RevlogError(
3571 3575 _(b'cannot censor with version %d revlogs')
3572 3576 % self._format_version
3573 3577 )
3574 3578 elif self._format_version == REVLOGV1:
3575 3579 rewrite.v1_censor(self, tr, censornode, tombstone)
3576 3580 else:
3577 3581 rewrite.v2_censor(self, tr, censornode, tombstone)
3578 3582
3579 3583 def verifyintegrity(self, state):
3580 3584 """Verifies the integrity of the revlog.
3581 3585
3582 3586 Yields ``revlogproblem`` instances describing problems that are
3583 3587 found.
3584 3588 """
3585 3589 dd, di = self.checksize()
3586 3590 if dd:
3587 3591 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3588 3592 if di:
3589 3593 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3590 3594
3591 3595 version = self._format_version
3592 3596
3593 3597 # The verifier tells us what version revlog we should be.
3594 3598 if version != state[b'expectedversion']:
3595 3599 yield revlogproblem(
3596 3600 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3597 3601 % (self.display_id, version, state[b'expectedversion'])
3598 3602 )
3599 3603
3600 3604 state[b'skipread'] = set()
3601 3605 state[b'safe_renamed'] = set()
3602 3606
3603 3607 for rev in self:
3604 3608 node = self.node(rev)
3605 3609
3606 3610 # Verify contents. 4 cases to care about:
3607 3611 #
3608 3612 # common: the most common case
3609 3613 # rename: with a rename
3610 3614 # meta: file content starts with b'\1\n', the metadata
3611 3615 # header defined in filelog.py, but without a rename
3612 3616 # ext: content stored externally
3613 3617 #
3614 3618 # More formally, their differences are shown below:
3615 3619 #
3616 3620 # | common | rename | meta | ext
3617 3621 # -------------------------------------------------------
3618 3622 # flags() | 0 | 0 | 0 | not 0
3619 3623 # renamed() | False | True | False | ?
3620 3624 # rawtext[0:2]=='\1\n'| False | True | True | ?
3621 3625 #
3622 3626 # "rawtext" means the raw text stored in revlog data, which
3623 3627 # could be retrieved by "rawdata(rev)". "text"
3624 3628 # mentioned below is "revision(rev)".
3625 3629 #
3626 3630 # There are 3 different lengths stored physically:
3627 3631 # 1. L1: rawsize, stored in revlog index
3628 3632 # 2. L2: len(rawtext), stored in revlog data
3629 3633 # 3. L3: len(text), stored in revlog data if flags==0, or
3630 3634 # possibly somewhere else if flags!=0
3631 3635 #
3632 3636 # L1 should be equal to L2. L3 could be different from them.
3633 3637 # "text" may or may not affect commit hash depending on flag
3634 3638 # processors (see flagutil.addflagprocessor).
3635 3639 #
3636 3640 # | common | rename | meta | ext
3637 3641 # -------------------------------------------------
3638 3642 # rawsize() | L1 | L1 | L1 | L1
3639 3643 # size() | L1 | L2-LM | L1(*) | L1 (?)
3640 3644 # len(rawtext) | L2 | L2 | L2 | L2
3641 3645 # len(text) | L2 | L2 | L2 | L3
3642 3646 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3643 3647 #
3644 3648 # LM: length of metadata, depending on rawtext
3645 3649 # (*): not ideal, see comment in filelog.size
3646 3650 # (?): could be "- len(meta)" if the resolved content has
3647 3651 # rename metadata
3648 3652 #
3649 3653 # Checks needed to be done:
3650 3654 # 1. length check: L1 == L2, in all cases.
3651 3655 # 2. hash check: depending on flag processor, we may need to
3652 3656 # use either "text" (external), or "rawtext" (in revlog).
3653 3657
3654 3658 try:
3655 3659 skipflags = state.get(b'skipflags', 0)
3656 3660 if skipflags:
3657 3661 skipflags &= self.flags(rev)
3658 3662
3659 3663 _verify_revision(self, skipflags, state, node)
3660 3664
3661 3665 l1 = self.rawsize(rev)
3662 3666 l2 = len(self.rawdata(node))
3663 3667
3664 3668 if l1 != l2:
3665 3669 yield revlogproblem(
3666 3670 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3667 3671 node=node,
3668 3672 )
3669 3673
3670 3674 except error.CensoredNodeError:
3671 3675 if state[b'erroroncensored']:
3672 3676 yield revlogproblem(
3673 3677 error=_(b'censored file data'), node=node
3674 3678 )
3675 3679 state[b'skipread'].add(node)
3676 3680 except Exception as e:
3677 3681 yield revlogproblem(
3678 3682 error=_(b'unpacking %s: %s')
3679 3683 % (short(node), stringutil.forcebytestr(e)),
3680 3684 node=node,
3681 3685 )
3682 3686 state[b'skipread'].add(node)
3683 3687
3684 3688 def storageinfo(
3685 3689 self,
3686 3690 exclusivefiles=False,
3687 3691 sharedfiles=False,
3688 3692 revisionscount=False,
3689 3693 trackedsize=False,
3690 3694 storedsize=False,
3691 3695 ):
3692 3696 d = {}
3693 3697
3694 3698 if exclusivefiles:
3695 3699 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3696 3700 if not self._inline:
3697 3701 d[b'exclusivefiles'].append((self.opener, self._datafile))
3698 3702
3699 3703 if sharedfiles:
3700 3704 d[b'sharedfiles'] = []
3701 3705
3702 3706 if revisionscount:
3703 3707 d[b'revisionscount'] = len(self)
3704 3708
3705 3709 if trackedsize:
3706 3710 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3707 3711
3708 3712 if storedsize:
3709 3713 d[b'storedsize'] = sum(
3710 3714 self.opener.stat(path).st_size for path in self.files()
3711 3715 )
3712 3716
3713 3717 return d
3714 3718
3715 3719 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3716 3720 if not self.feature_config.has_side_data:
3717 3721 return
3718 3722 # revlog formats with sidedata support does not support inline
3719 3723 assert not self._inline
3720 3724 if not helpers[1] and not helpers[2]:
3721 3725 # Nothing to generate or remove
3722 3726 return
3723 3727
3724 3728 new_entries = []
3725 3729 # append the new sidedata
3726 3730 with self._writing(transaction):
3727 3731 ifh, dfh, sdfh = self._writinghandles
3728 3732 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3729 3733
3730 3734 current_offset = sdfh.tell()
3731 3735 for rev in range(startrev, endrev + 1):
3732 3736 entry = self.index[rev]
3733 3737 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3734 3738 store=self,
3735 3739 sidedata_helpers=helpers,
3736 3740 sidedata={},
3737 3741 rev=rev,
3738 3742 )
3739 3743
3740 3744 serialized_sidedata = sidedatautil.serialize_sidedata(
3741 3745 new_sidedata
3742 3746 )
3743 3747
3744 3748 sidedata_compression_mode = COMP_MODE_INLINE
3745 3749 if serialized_sidedata and self.feature_config.has_side_data:
3746 3750 sidedata_compression_mode = COMP_MODE_PLAIN
3747 3751 h, comp_sidedata = self.compress(serialized_sidedata)
3748 3752 if (
3749 3753 h != b'u'
3750 3754 and comp_sidedata[0] != b'\0'
3751 3755 and len(comp_sidedata) < len(serialized_sidedata)
3752 3756 ):
3753 3757 assert not h
3754 3758 if (
3755 3759 comp_sidedata[0]
3756 3760 == self._docket.default_compression_header
3757 3761 ):
3758 3762 sidedata_compression_mode = COMP_MODE_DEFAULT
3759 3763 serialized_sidedata = comp_sidedata
3760 3764 else:
3761 3765 sidedata_compression_mode = COMP_MODE_INLINE
3762 3766 serialized_sidedata = comp_sidedata
3763 3767 if entry[8] != 0 or entry[9] != 0:
3764 3768 # rewriting entries that already have sidedata is not
3765 3769 # supported yet, because it introduces garbage data in the
3766 3770 # revlog.
3767 3771 msg = b"rewriting existing sidedata is not supported yet"
3768 3772 raise error.Abort(msg)
3769 3773
3770 3774 # Apply (potential) flags to add and to remove after running
3771 3775 # the sidedata helpers
3772 3776 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3773 3777 entry_update = (
3774 3778 current_offset,
3775 3779 len(serialized_sidedata),
3776 3780 new_offset_flags,
3777 3781 sidedata_compression_mode,
3778 3782 )
3779 3783
3780 3784 # the sidedata computation might have move the file cursors around
3781 3785 sdfh.seek(current_offset, os.SEEK_SET)
3782 3786 sdfh.write(serialized_sidedata)
3783 3787 new_entries.append(entry_update)
3784 3788 current_offset += len(serialized_sidedata)
3785 3789 self._docket.sidedata_end = sdfh.tell()
3786 3790
3787 3791 # rewrite the new index entries
3788 3792 ifh.seek(startrev * self.index.entry_size)
3789 3793 for i, e in enumerate(new_entries):
3790 3794 rev = startrev + i
3791 3795 self.index.replace_sidedata_info(rev, *e)
3792 3796 packed = self.index.entry_binary(rev)
3793 3797 if rev == 0 and self._docket is None:
3794 3798 header = self._format_flags | self._format_version
3795 3799 header = self.index.pack_header(header)
3796 3800 packed = header + packed
3797 3801 ifh.write(packed)
@@ -1,611 +1,603 b''
1 1 #require no-reposimplestore
2 2 #testcases revlogv1 revlogv2
3 3
4 4 #if revlogv2
5 5
6 6 $ cat >> $HGRCPATH <<EOF
7 7 > [experimental]
8 8 > revlogv2=enable-unstable-format-and-corrupt-my-data
9 9 > EOF
10 10
11 11 #endif
12 12
13 13 $ cp $HGRCPATH $HGRCPATH.orig
14 14
15 15 Create repo with unimpeachable content
16 16
17 17 $ hg init r
18 18 $ cd r
19 19 $ echo 'Initially untainted file' > target
20 20 $ echo 'Normal file here' > bystander
21 21 $ hg add target bystander
22 22 $ hg ci -m init
23 23
24 24 Clone repo so we can test pull later
25 25
26 26 $ cd ..
27 27 $ hg clone r rpull
28 28 updating to branch default
29 29 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
30 30 $ cd r
31 31
32 32 Introduce content which will ultimately require censorship. Name the first
33 33 censored node C1, second C2, and so on
34 34
35 35 $ echo 'Tainted file' > target
36 36 $ echo 'Passwords: hunter2' >> target
37 37 $ hg ci -m taint target
38 38 $ C1=`hg id --debug -i`
39 39
40 40 $ echo 'hunter3' >> target
41 41 $ echo 'Normal file v2' > bystander
42 42 $ hg ci -m moretaint target bystander
43 43 $ C2=`hg id --debug -i`
44 44
45 45 Add a new sanitized versions to correct our mistake. Name the first head H1,
46 46 the second head H2, and so on
47 47
48 48 $ echo 'Tainted file is now sanitized' > target
49 49 $ hg ci -m sanitized target
50 50 $ H1=`hg id --debug -i`
51 51
52 52 $ hg update -r $C2
53 53 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
54 54 $ echo 'Tainted file now super sanitized' > target
55 55 $ hg ci -m 'super sanitized' target
56 56 created new head
57 57 $ H2=`hg id --debug -i`
58 58
59 59 Verify target contents before censorship at each revision
60 60
61 61 $ hg cat -r $H1 target | head -n 10
62 62 Tainted file is now sanitized
63 63 $ hg cat -r $H2 target | head -n 10
64 64 Tainted file now super sanitized
65 65 $ hg cat -r $C2 target | head -n 10
66 66 Tainted file
67 67 Passwords: hunter2
68 68 hunter3
69 69 $ hg cat -r $C1 target | head -n 10
70 70 Tainted file
71 71 Passwords: hunter2
72 72 $ hg cat -r 0 target | head -n 10
73 73 Initially untainted file
74 74
75 75 Censor revision with 2 offenses
76 76
77 77 (this also tests file pattern matching: path relative to cwd case)
78 78
79 79 $ mkdir -p foo/bar/baz
80 80 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target
81 81 $ hg cat -r $H1 target | head -n 10
82 82 Tainted file is now sanitized
83 83 $ hg cat -r $H2 target | head -n 10
84 84 Tainted file now super sanitized
85 85 $ hg cat -r $C2 target | head -n 10
86 86 abort: censored node: 1e0247a9a4b7
87 87 (set censor.policy to ignore errors)
88 88 $ hg cat -r $C1 target | head -n 10
89 89 Tainted file
90 90 Passwords: hunter2
91 91 $ hg cat -r 0 target | head -n 10
92 92 Initially untainted file
93 93
94 94 Censor revision with 1 offense
95 95
96 96 (this also tests file pattern matching: with 'path:' scheme)
97 97
98 98 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C1 path:target
99 99 $ hg cat -r $H1 target | head -n 10
100 100 Tainted file is now sanitized
101 101 $ hg cat -r $H2 target | head -n 10
102 102 Tainted file now super sanitized
103 103 $ hg cat -r $C2 target | head -n 10
104 104 abort: censored node: 1e0247a9a4b7
105 105 (set censor.policy to ignore errors)
106 106 $ hg cat -r $C1 target | head -n 10
107 107 abort: censored node: 613bc869fceb
108 108 (set censor.policy to ignore errors)
109 109 $ hg cat -r 0 target | head -n 10
110 110 Initially untainted file
111 111
112 112 Can only checkout target at uncensored revisions, -X is workaround for --all
113 113
114 114 $ hg revert -r $C2 target | head -n 10
115 115 abort: censored node: 1e0247a9a4b7
116 116 (set censor.policy to ignore errors)
117 117 $ hg revert -r $C1 target | head -n 10
118 118 abort: censored node: 613bc869fceb
119 119 (set censor.policy to ignore errors)
120 120 $ hg revert -r $C1 --all
121 121 reverting bystander
122 122 reverting target
123 123 abort: censored node: 613bc869fceb
124 124 (set censor.policy to ignore errors)
125 125 [255]
126 126 $ hg revert -r $C1 --all -X target
127 127 $ cat target | head -n 10
128 128 Tainted file now super sanitized
129 129 $ hg revert -r 0 --all
130 130 reverting target
131 131 $ cat target | head -n 10
132 132 Initially untainted file
133 133 $ hg revert -r $H2 --all
134 134 reverting bystander
135 135 reverting target
136 136 $ cat target | head -n 10
137 137 Tainted file now super sanitized
138 138
139 139 Uncensored file can be viewed at any revision
140 140
141 141 $ hg cat -r $H1 bystander | head -n 10
142 142 Normal file v2
143 143 $ hg cat -r $C2 bystander | head -n 10
144 144 Normal file v2
145 145 $ hg cat -r $C1 bystander | head -n 10
146 146 Normal file here
147 147 $ hg cat -r 0 bystander | head -n 10
148 148 Normal file here
149 149
150 150 Can update to children of censored revision
151 151
152 152 $ hg update -r $H1
153 153 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
154 154 $ cat target | head -n 10
155 155 Tainted file is now sanitized
156 156 $ hg update -r $H2
157 157 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
158 158 $ cat target | head -n 10
159 159 Tainted file now super sanitized
160 160
161 161 Set censor policy to abort in trusted $HGRC so hg verify fails
162 162
163 163 $ cp $HGRCPATH.orig $HGRCPATH
164 164 $ cat >> $HGRCPATH <<EOF
165 165 > [censor]
166 166 > policy = abort
167 167 > EOF
168 168
169 169 Repo fails verification due to censorship
170 170
171 171 $ hg verify
172 172 checking changesets
173 173 checking manifests
174 174 crosschecking files in changesets and manifests
175 175 checking files
176 176 target@1: censored file data
177 177 target@2: censored file data
178 178 not checking dirstate because of previous errors
179 179 checked 5 changesets with 7 changes to 2 files
180 180 2 integrity errors encountered!
181 181 (first damaged changeset appears to be 1)
182 182 [1]
183 183
184 184 Cannot update to revision with censored data
185 185
186 186 $ hg update -r $C2
187 187 abort: censored node: 1e0247a9a4b7
188 188 (set censor.policy to ignore errors)
189 189 [255]
190 190 $ hg update -r $C1
191 191 abort: censored node: 613bc869fceb
192 192 (set censor.policy to ignore errors)
193 193 [255]
194 194 $ hg update -r 0
195 195 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
196 196 $ hg update -r $H2
197 197 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
198 198
199 199 Set censor policy to ignore in trusted $HGRC so hg verify passes
200 200
201 201 $ cp $HGRCPATH.orig $HGRCPATH
202 202 $ cat >> $HGRCPATH <<EOF
203 203 > [censor]
204 204 > policy = ignore
205 205 > EOF
206 206
207 207 Repo passes verification with warnings with explicit config
208 208
209 209 $ hg verify -q
210 210
211 211 May update to revision with censored data with explicit config
212 212
213 213 $ hg update -r $C2
214 214 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
215 215 $ cat target | head -n 10
216 216 $ hg update -r $C1
217 217 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
218 218 $ cat target | head -n 10
219 219 $ hg update -r 0
220 220 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
221 221 $ cat target | head -n 10
222 222 Initially untainted file
223 223 $ hg update -r $H2
224 224 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
225 225 $ cat target | head -n 10
226 226 Tainted file now super sanitized
227 227
228 228 Can merge in revision with censored data. Test requires one branch of history
229 229 with the file censored, but we can't censor at a head, so advance H1.
230 230
231 231 $ hg update -r $H1
232 232 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
233 233 $ C3=$H1
234 234 $ echo 'advanced head H1' > target
235 235 $ hg ci -m 'advance head H1' target
236 236 $ H1=`hg id --debug -i`
237 237 $ hg --config extensions.censor= censor -r $C3 target
238 238 $ hg update -r $H2
239 239 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
240 240 $ hg merge -r $C3
241 241 merging target
242 242 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
243 243 (branch merge, don't forget to commit)
244 244
245 245 Revisions present in repository heads may not be censored
246 246
247 247 $ hg update -C -r $H2
248 248 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
249 249 $ hg --config extensions.censor= censor -r $H2 target
250 250 abort: cannot censor file in heads (78a8fc215e79)
251 251 (clean/delete and commit first)
252 252 [255]
253 253 $ echo 'twiddling thumbs' > bystander
254 254 $ hg ci -m 'bystander commit'
255 255 $ H2=`hg id --debug -i`
256 256 $ hg --config extensions.censor= censor -r "$H2^" target
257 257 abort: cannot censor file in heads (efbe78065929)
258 258 (clean/delete and commit first)
259 259 [255]
260 260
261 261 Cannot censor working directory
262 262
263 263 $ echo 'seriously no passwords' > target
264 264 $ hg ci -m 'extend second head arbitrarily' target
265 265 $ H2=`hg id --debug -i`
266 266 $ hg update -r "$H2^"
267 267 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
268 268 $ hg --config extensions.censor= censor -r . target
269 269 abort: cannot censor working directory
270 270 (clean/delete/update first)
271 271 [255]
272 272 $ hg update -r $H2
273 273 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
274 274
275 275 Can re-add file after being deleted + censored
276 276
277 277 $ C4=$H2
278 278 $ hg rm target
279 279 $ hg ci -m 'delete target so it may be censored'
280 280 $ H2=`hg id --debug -i`
281 281 $ hg --config extensions.censor= censor -r $C4 target
282 282 $ hg cat -r $C4 target | head -n 10
283 283 $ hg cat -r "$H2^^" target | head -n 10
284 284 Tainted file now super sanitized
285 285 $ echo 'fresh start' > target
286 286 $ hg add target
287 287 $ hg ci -m reincarnated target
288 288 $ H2=`hg id --debug -i`
289 289 $ hg cat -r $H2 target | head -n 10
290 290 fresh start
291 291 $ hg cat -r "$H2^" target | head -n 10
292 292 target: no such file in rev 452ec1762369
293 293 $ hg cat -r $C4 target | head -n 10
294 294 $ hg cat -r "$H2^^^" target | head -n 10
295 295 Tainted file now super sanitized
296 296
297 297 Can censor after revlog has expanded to no longer permit inline storage
298 298
299 299 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
300 300 > do
301 301 > echo "Password: hunter$x" >> target
302 302 > done
303 303 $ hg ci -m 'add 100k passwords'
304 304 $ H2=`hg id --debug -i`
305 305 $ C5=$H2
306 306 $ hg revert -r "$H2^" target
307 307 $ hg ci -m 'cleaned 100k passwords'
308 308 $ H2=`hg id --debug -i`
309 309 $ hg --config extensions.censor= censor -r $C5 target
310 310 $ hg cat -r $C5 target | head -n 10
311 311 $ hg cat -r $H2 target | head -n 10
312 312 fresh start
313 313
314 314 Repo with censored nodes can be cloned and cloned nodes are censored
315 315
316 316 $ cd ..
317 317 $ hg clone r rclone
318 318 updating to branch default
319 319 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
320 320 $ cd rclone
321 321 $ hg cat -r $H1 target | head -n 10
322 322 advanced head H1
323 323 $ hg cat -r $H2~5 target | head -n 10
324 324 Tainted file now super sanitized
325 325 $ hg cat -r $C2 target | head -n 10
326 326 $ hg cat -r $C1 target | head -n 10
327 327 $ hg cat -r 0 target | head -n 10
328 328 Initially untainted file
329 329 $ hg verify -q
330 330
331 331 Repo cloned before tainted content introduced can pull censored nodes
332 332
333 333 $ cd ../rpull
334 334 $ hg cat -r tip target | head -n 10
335 335 Initially untainted file
336 336 $ hg verify -q
337 337 $ hg pull -r $H1 -r $H2
338 338 pulling from $TESTTMP/r
339 339 searching for changes
340 340 adding changesets
341 341 adding manifests
342 342 adding file changes
343 343 added 11 changesets with 11 changes to 2 files (+1 heads)
344 344 new changesets 186fb27560c3:683e4645fded
345 345 (run 'hg heads' to see heads, 'hg merge' to merge)
346 346 $ hg update 4
347 347 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
348 348 $ cat target | head -n 10
349 349 Tainted file now super sanitized
350 350 $ hg cat -r $H1 target | head -n 10
351 351 advanced head H1
352 352 $ hg cat -r $H2~5 target | head -n 10
353 353 Tainted file now super sanitized
354 354 $ hg cat -r $C2 target | head -n 10
355 355 $ hg cat -r $C1 target | head -n 10
356 356 $ hg cat -r 0 target | head -n 10
357 357 Initially untainted file
358 358 $ hg verify -q
359 359
360 360 Censored nodes can be pushed if they censor previously unexchanged nodes
361 361
362 362 $ echo 'Passwords: hunter2hunter2' > target
363 363 $ hg ci -m 're-add password from clone' target
364 364 created new head
365 365 $ H3=`hg id --debug -i`
366 366 $ REV=$H3
367 367 $ echo 'Re-sanitized; nothing to see here' > target
368 368 $ hg ci -m 're-sanitized' target
369 369 $ H2=`hg id --debug -i`
370 370 $ CLEANREV=$H2
371 371 $ hg cat -r $REV target | head -n 10
372 372 Passwords: hunter2hunter2
373 373 $ hg --config extensions.censor= censor -r $REV target
374 374 $ hg cat -r $REV target | head -n 10
375 375 $ hg cat -r $CLEANREV target | head -n 10
376 376 Re-sanitized; nothing to see here
377 377 $ hg push -f -r $H2
378 378 pushing to $TESTTMP/r
379 379 searching for changes
380 380 adding changesets
381 381 adding manifests
382 382 adding file changes
383 383 added 2 changesets with 2 changes to 1 files (+1 heads)
384 384
385 385 $ cd ../r
386 386 $ hg cat -r $REV target | head -n 10
387 387 $ hg cat -r $CLEANREV target | head -n 10
388 388 Re-sanitized; nothing to see here
389 389 $ hg update $CLEANREV
390 390 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
391 391 $ cat target | head -n 10
392 392 Re-sanitized; nothing to see here
393 393
394 394 Censored nodes can be bundled up and unbundled in another repo
395 395
396 396 $ hg bundle --base 0 ../pwbundle
397 397 13 changesets found
398 398 $ cd ../rclone
399 399 $ hg unbundle ../pwbundle
400 400 adding changesets
401 401 adding manifests
402 402 adding file changes
403 403 added 2 changesets with 2 changes to 2 files (+1 heads)
404 404 new changesets 075be80ac777:dcbaf17bf3a1 (2 drafts)
405 405 (run 'hg heads .' to see heads, 'hg merge' to merge)
406 406 $ hg cat -r $REV target | head -n 10
407 407 $ hg cat -r $CLEANREV target | head -n 10
408 408 Re-sanitized; nothing to see here
409 409 $ hg update $CLEANREV
410 410 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
411 411 $ cat target | head -n 10
412 412 Re-sanitized; nothing to see here
413 413 $ hg verify -q
414 414
415 415 Grepping only warns, doesn't error out
416 416
417 417 $ cd ../rpull
418 418 $ hg grep 'Normal file'
419 419 bystander:Normal file v2
420 420 $ hg grep nothing
421 421 target:Re-sanitized; nothing to see here
422 422 $ hg grep --diff 'Normal file'
423 423 cannot search in censored file: target:7
424 424 cannot search in censored file: target:10
425 425 cannot search in censored file: target:12
426 426 bystander:6:-:Normal file v2
427 427 cannot search in censored file: target:1
428 428 cannot search in censored file: target:2
429 429 cannot search in censored file: target:3
430 430 bystander:2:-:Normal file here
431 431 bystander:2:+:Normal file v2
432 432 bystander:0:+:Normal file here
433 433 $ hg grep --diff nothing
434 434 cannot search in censored file: target:7
435 435 cannot search in censored file: target:10
436 436 cannot search in censored file: target:12
437 437 target:13:+:Re-sanitized; nothing to see here
438 438 cannot search in censored file: target:1
439 439 cannot search in censored file: target:2
440 440 cannot search in censored file: target:3
441 441
442 442 Censored nodes can be imported on top of censored nodes, consecutively
443 443
444 444 $ hg init ../rimport
445 445 $ hg bundle --base 1 ../rimport/splitbundle
446 446 12 changesets found
447 447 $ cd ../rimport
448 448 $ hg pull -r $H1 -r $H2 ../r
449 449 pulling from ../r
450 450 adding changesets
451 451 adding manifests
452 452 adding file changes
453 453 added 8 changesets with 10 changes to 2 files (+1 heads)
454 454 new changesets e97f55b2665a:dcbaf17bf3a1
455 455 (run 'hg heads' to see heads, 'hg merge' to merge)
456 456 $ hg unbundle splitbundle
457 457 adding changesets
458 458 adding manifests
459 459 adding file changes
460 460 added 6 changesets with 5 changes to 2 files (+1 heads)
461 461 new changesets efbe78065929:683e4645fded (6 drafts)
462 462 (run 'hg heads .' to see heads, 'hg merge' to merge)
463 463 $ hg update $H2
464 464 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
465 465 $ cat target | head -n 10
466 466 Re-sanitized; nothing to see here
467 467 $ hg verify -q
468 468 $ cd ../r
469 469
470 470 Can import bundle where first revision of a file is censored
471 471
472 472 $ hg init ../rinit
473 473 $ hg --config extensions.censor= censor -r 0 target
474 474 $ hg bundle -r 0 --base null ../rinit/initbundle
475 475 1 changesets found
476 476 $ cd ../rinit
477 477 $ hg unbundle initbundle
478 478 adding changesets
479 479 adding manifests
480 480 adding file changes
481 481 added 1 changesets with 2 changes to 2 files
482 482 new changesets e97f55b2665a (1 drafts)
483 483 (run 'hg update' to get a working copy)
484 484 $ hg cat -r 0 target | head -n 10
485 485
486 486 #if revlogv2
487 487
488 488 Testing feature that does not work in revlog v1
489 489 ===============================================
490 490
491 491 Censoring a revision that is used as delta base
492 492 -----------------------------------------------
493 493
494 494 $ cd ..
495 495 $ hg init censor-with-delta
496 496 $ cd censor-with-delta
497 497 $ echo root > target
498 498 $ hg add target
499 499 $ hg commit -m root
500 500 $ B0=`hg id --debug -i`
501 501 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
502 502 > do
503 503 > echo "Password: hunter$x" >> target
504 504 > done
505 505 $ hg ci -m 'write a long file'
506 506 $ B1=`hg id --debug -i`
507 507 $ echo 'small change (should create a delta)' >> target
508 508 $ hg ci -m 'create a delta over the password'
509 509 (should show that the last revision is a delta, not a snapshot)
510 510 $ B2=`hg id --debug -i`
511 511
512 512 Make sure the last revision is a delta against the revision we will censor
513 513
514 514 $ hg debugdeltachain target -T '{rev} {chainid} {chainlen} {prevrev}\n'
515 515 0 1 1 -1
516 516 1 2 1 -1
517 517 2 2 2 1
518 518
519 519 Censor the file
520 520
521 521 $ hg cat -r $B1 target | wc -l
522 522 *50002 (re)
523 523 $ hg --config extensions.censor= censor -r $B1 target
524 524 $ hg cat -r $B1 target | wc -l
525 525 *0 (re)
526 526
527 527 Check the children is fine
528 528
529 529 $ hg cat -r $B2 target | wc -l
530 530 *50003 (re)
531 531
532 532 #endif
533 533
534 534 Testing repository upgrade with censors revision
535 535 ================================================
536 536
537 537 $ cd ../rclone
538 538
539 539 With the "abort" policy
540 540 =======================
541 541
542 542 $ hg verify --config censor.policy=ignore
543 543 checking changesets
544 544 checking manifests
545 545 crosschecking files in changesets and manifests
546 546 checking files
547 547 checking dirstate
548 548 checked 14 changesets with 15 changes to 2 files
549 549 $ hg debugupgraderepo --run --quiet \
550 550 > --optimize re-delta-parent \
551 551 > --config censor.policy=abort
552 552 upgrade will perform the following actions:
553 553
554 554 requirements
555 555 preserved: * (glob)
556 556
557 557 optimisations: re-delta-parent
558 558
559 559 processed revlogs:
560 560 - all-filelogs
561 561 - changelog
562 562 - manifest
563 563
564 transaction abort!
565 rollback completed
566 abort: file censored target:613bc869fceb
567 [255]
568 564 $ hg verify --config censor.policy=ignore
569 565 checking changesets
570 566 checking manifests
571 567 crosschecking files in changesets and manifests
572 568 checking files
573 569 checking dirstate
574 570 checked 14 changesets with 15 changes to 2 files
575 571
576 572 With the "ignore" policy
577 573 ========================
578 574
579 575 $ hg verify --config censor.policy=ignore
580 576 checking changesets
581 577 checking manifests
582 578 crosschecking files in changesets and manifests
583 579 checking files
584 580 checking dirstate
585 581 checked 14 changesets with 15 changes to 2 files
586 582 $ hg debugupgraderepo --run --quiet \
587 583 > --optimize re-delta-parent \
588 584 > --config censor.policy=ignore
589 585 upgrade will perform the following actions:
590 586
591 587 requirements
592 588 preserved: * (glob)
593 589
594 590 optimisations: re-delta-parent
595 591
596 592 processed revlogs:
597 593 - all-filelogs
598 594 - changelog
599 595 - manifest
600 596
601 transaction abort!
602 rollback completed
603 abort: file censored target:613bc869fceb
604 [255]
605 597 $ hg verify --config censor.policy=ignore
606 598 checking changesets
607 599 checking manifests
608 600 crosschecking files in changesets and manifests
609 601 checking files
610 602 checking dirstate
611 603 checked 14 changesets with 15 changes to 2 files
General Comments 0
You need to be logged in to leave comments. Login now