##// END OF EJS Templates
revlog: synchronise the various attribute holding the index filename...
marmoute -
r51982:c2c24b6b default
parent child Browse files
Show More
@@ -1,3907 +1,3917 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 chunk_cache,
357 357 ):
358 358 self.opener = opener
359 359 self.index = index
360 360
361 self.index_file = index_file
361 self.__index_file = index_file
362 362 self.data_file = data_file
363 363 self.sidedata_file = sidedata_file
364 364 self.inline = inline
365 365 self.data_config = data_config
366 366
367 367 # index
368 368
369 369 # 3-tuple of file handles being used for active writing.
370 370 self._writinghandles = None
371 371
372 372 self._segmentfile = randomaccessfile.randomaccessfile(
373 373 self.opener,
374 374 (self.index_file if self.inline else self.data_file),
375 375 self.data_config.chunk_cache_size,
376 376 chunk_cache,
377 377 )
378 378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 379 self.opener,
380 380 self.sidedata_file,
381 381 self.data_config.chunk_cache_size,
382 382 )
383 383
384 @property
385 def index_file(self):
386 return self.__index_file
387
388 @index_file.setter
389 def index_file(self, new_index_file):
390 self.__index_file = new_index_file
391 if self.inline:
392 self._segmentfile.filename = new_index_file
393
384 394 # Derived from index values.
385 395
386 396 def start(self, rev):
387 397 """the offset of the data chunk for this revision"""
388 398 return int(self.index[rev][0] >> 16)
389 399
390 400 def length(self, rev):
391 401 """the length of the data chunk for this revision"""
392 402 return self.index[rev][1]
393 403
394 404 def end(self, rev):
395 405 """the end of the data chunk for this revision"""
396 406 return self.start(rev) + self.length(rev)
397 407
398 408 @contextlib.contextmanager
399 409 def reading(self):
400 410 """Context manager that keeps data and sidedata files open for reading"""
401 411 if len(self.index) == 0:
402 412 yield # nothing to be read
403 413 else:
404 414 with self._segmentfile.reading():
405 415 with self._segmentfile_sidedata.reading():
406 416 yield
407 417
408 418 @property
409 419 def is_writing(self):
410 420 """True is a writing context is open"""
411 421 return self._writinghandles is not None
412 422
413 423 @contextlib.contextmanager
414 424 def writing(self, transaction, data_end=None, sidedata_end=None):
415 425 """Open the revlog files for writing
416 426
417 427 Add content to a revlog should be done within such context.
418 428 """
419 429 if self.is_writing:
420 430 yield
421 431 else:
422 432 ifh = dfh = sdfh = None
423 433 try:
424 434 r = len(self.index)
425 435 # opening the data file.
426 436 dsize = 0
427 437 if r:
428 438 dsize = self.end(r - 1)
429 439 dfh = None
430 440 if not self.inline:
431 441 try:
432 442 dfh = self.opener(self.data_file, mode=b"r+")
433 443 if data_end is None:
434 444 dfh.seek(0, os.SEEK_END)
435 445 else:
436 446 dfh.seek(data_end, os.SEEK_SET)
437 447 except FileNotFoundError:
438 448 dfh = self.opener(self.data_file, mode=b"w+")
439 449 transaction.add(self.data_file, dsize)
440 450 if self.sidedata_file is not None:
441 451 assert sidedata_end is not None
442 452 # revlog-v2 does not inline, help Pytype
443 453 assert dfh is not None
444 454 try:
445 455 sdfh = self.opener(self.sidedata_file, mode=b"r+")
446 456 dfh.seek(sidedata_end, os.SEEK_SET)
447 457 except FileNotFoundError:
448 458 sdfh = self.opener(self.sidedata_file, mode=b"w+")
449 459 transaction.add(self.sidedata_file, sidedata_end)
450 460
451 461 # opening the index file.
452 462 isize = r * self.index.entry_size
453 463 ifh = self.__index_write_fp()
454 464 if self.inline:
455 465 transaction.add(self.index_file, dsize + isize)
456 466 else:
457 467 transaction.add(self.index_file, isize)
458 468 # exposing all file handle for writing.
459 469 self._writinghandles = (ifh, dfh, sdfh)
460 470 self._segmentfile.writing_handle = ifh if self.inline else dfh
461 471 self._segmentfile_sidedata.writing_handle = sdfh
462 472 yield
463 473 finally:
464 474 self._writinghandles = None
465 475 self._segmentfile.writing_handle = None
466 476 self._segmentfile_sidedata.writing_handle = None
467 477 if dfh is not None:
468 478 dfh.close()
469 479 if sdfh is not None:
470 480 sdfh.close()
471 481 # closing the index file last to avoid exposing referent to
472 482 # potential unflushed data content.
473 483 if ifh is not None:
474 484 ifh.close()
475 485
476 486 def __index_write_fp(self, index_end=None):
477 487 """internal method to open the index file for writing
478 488
479 489 You should not use this directly and use `_writing` instead
480 490 """
481 491 try:
482 492 f = self.opener(
483 493 self.index_file,
484 494 mode=b"r+",
485 495 checkambig=self.data_config.check_ambig,
486 496 )
487 497 if index_end is None:
488 498 f.seek(0, os.SEEK_END)
489 499 else:
490 500 f.seek(index_end, os.SEEK_SET)
491 501 return f
492 502 except FileNotFoundError:
493 503 return self.opener(
494 504 self.index_file,
495 505 mode=b"w+",
496 506 checkambig=self.data_config.check_ambig,
497 507 )
498 508
499 509 def __index_new_fp(self):
500 510 """internal method to create a new index file for writing
501 511
502 512 You should not use this unless you are upgrading from inline revlog
503 513 """
504 514 return self.opener(
505 515 self.index_file,
506 516 mode=b"w",
507 517 checkambig=self.data_config.check_ambig,
508 518 atomictemp=True,
509 519 )
510 520
511 521 def get_segment_for_revs(self, startrev, endrev):
512 522 """Obtain a segment of raw data corresponding to a range of revisions.
513 523
514 524 Accepts the start and end revisions and an optional already-open
515 525 file handle to be used for reading. If the file handle is read, its
516 526 seek position will not be preserved.
517 527
518 528 Requests for data may be satisfied by a cache.
519 529
520 530 Returns a 2-tuple of (offset, data) for the requested range of
521 531 revisions. Offset is the integer offset from the beginning of the
522 532 revlog and data is a str or buffer of the raw byte data.
523 533
524 534 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
525 535 to determine where each revision's data begins and ends.
526 536
527 537 API: we should consider making this a private part of the InnerRevlog
528 538 at some point.
529 539 """
530 540 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
531 541 # (functions are expensive).
532 542 index = self.index
533 543 istart = index[startrev]
534 544 start = int(istart[0] >> 16)
535 545 if startrev == endrev:
536 546 end = start + istart[1]
537 547 else:
538 548 iend = index[endrev]
539 549 end = int(iend[0] >> 16) + iend[1]
540 550
541 551 if self.inline:
542 552 start += (startrev + 1) * self.index.entry_size
543 553 end += (endrev + 1) * self.index.entry_size
544 554 length = end - start
545 555
546 556 return start, self._segmentfile.read_chunk(start, length)
547 557
548 558
549 559 class revlog:
550 560 """
551 561 the underlying revision storage object
552 562
553 563 A revlog consists of two parts, an index and the revision data.
554 564
555 565 The index is a file with a fixed record size containing
556 566 information on each revision, including its nodeid (hash), the
557 567 nodeids of its parents, the position and offset of its data within
558 568 the data file, and the revision it's based on. Finally, each entry
559 569 contains a linkrev entry that can serve as a pointer to external
560 570 data.
561 571
562 572 The revision data itself is a linear collection of data chunks.
563 573 Each chunk represents a revision and is usually represented as a
564 574 delta against the previous chunk. To bound lookup time, runs of
565 575 deltas are limited to about 2 times the length of the original
566 576 version data. This makes retrieval of a version proportional to
567 577 its size, or O(1) relative to the number of revisions.
568 578
569 579 Both pieces of the revlog are written to in an append-only
570 580 fashion, which means we never need to rewrite a file to insert or
571 581 remove data, and can use some simple techniques to avoid the need
572 582 for locking while reading.
573 583
574 584 If checkambig, indexfile is opened with checkambig=True at
575 585 writing, to avoid file stat ambiguity.
576 586
577 587 If mmaplargeindex is True, and an mmapindexthreshold is set, the
578 588 index will be mmapped rather than read if it is larger than the
579 589 configured threshold.
580 590
581 591 If censorable is True, the revlog can have censored revisions.
582 592
583 593 If `upperboundcomp` is not None, this is the expected maximal gain from
584 594 compression for the data content.
585 595
586 596 `concurrencychecker` is an optional function that receives 3 arguments: a
587 597 file handle, a filename, and an expected position. It should check whether
588 598 the current position in the file handle is valid, and log/warn/fail (by
589 599 raising).
590 600
591 601 See mercurial/revlogutils/contants.py for details about the content of an
592 602 index entry.
593 603 """
594 604
595 605 _flagserrorclass = error.RevlogError
596 606
597 607 @staticmethod
598 608 def is_inline_index(header_bytes):
599 609 """Determine if a revlog is inline from the initial bytes of the index"""
600 610 header = INDEX_HEADER.unpack(header_bytes)[0]
601 611
602 612 _format_flags = header & ~0xFFFF
603 613 _format_version = header & 0xFFFF
604 614
605 615 features = FEATURES_BY_VERSION[_format_version]
606 616 return features[b'inline'](_format_flags)
607 617
608 618 def __init__(
609 619 self,
610 620 opener,
611 621 target,
612 622 radix,
613 623 postfix=None, # only exist for `tmpcensored` now
614 624 checkambig=False,
615 625 mmaplargeindex=False,
616 626 censorable=False,
617 627 upperboundcomp=None,
618 628 persistentnodemap=False,
619 629 concurrencychecker=None,
620 630 trypending=False,
621 631 try_split=False,
622 632 canonical_parent_order=True,
623 633 ):
624 634 """
625 635 create a revlog object
626 636
627 637 opener is a function that abstracts the file opening operation
628 638 and can be used to implement COW semantics or the like.
629 639
630 640 `target`: a (KIND, ID) tuple that identify the content stored in
631 641 this revlog. It help the rest of the code to understand what the revlog
632 642 is about without having to resort to heuristic and index filename
633 643 analysis. Note: that this must be reliably be set by normal code, but
634 644 that test, debug, or performance measurement code might not set this to
635 645 accurate value.
636 646 """
637 647
638 648 self.radix = radix
639 649
640 650 self._docket_file = None
641 651 self._indexfile = None
642 652 self._datafile = None
643 653 self._sidedatafile = None
644 654 self._nodemap_file = None
645 655 self.postfix = postfix
646 656 self._trypending = trypending
647 657 self._try_split = try_split
648 658 self.opener = opener
649 659 if persistentnodemap:
650 660 self._nodemap_file = nodemaputil.get_nodemap_file(self)
651 661
652 662 assert target[0] in ALL_KINDS
653 663 assert len(target) == 2
654 664 self.target = target
655 665 if b'feature-config' in self.opener.options:
656 666 self.feature_config = self.opener.options[b'feature-config'].copy()
657 667 else:
658 668 self.feature_config = FeatureConfig()
659 669 self.feature_config.censorable = censorable
660 670 self.feature_config.canonical_parent_order = canonical_parent_order
661 671 if b'data-config' in self.opener.options:
662 672 self.data_config = self.opener.options[b'data-config'].copy()
663 673 else:
664 674 self.data_config = DataConfig()
665 675 self.data_config.check_ambig = checkambig
666 676 self.data_config.mmap_large_index = mmaplargeindex
667 677 if b'delta-config' in self.opener.options:
668 678 self.delta_config = self.opener.options[b'delta-config'].copy()
669 679 else:
670 680 self.delta_config = DeltaConfig()
671 681 self.delta_config.upper_bound_comp = upperboundcomp
672 682
673 683 # 3-tuple of (node, rev, text) for a raw revision.
674 684 self._revisioncache = None
675 685 # Maps rev to chain base rev.
676 686 self._chainbasecache = util.lrucachedict(100)
677 687
678 688 self.index = None
679 689 self._docket = None
680 690 self._nodemap_docket = None
681 691 # Mapping of partial identifiers to full nodes.
682 692 self._pcache = {}
683 693
684 694 # other optionnals features
685 695
686 696 # Make copy of flag processors so each revlog instance can support
687 697 # custom flags.
688 698 self._flagprocessors = dict(flagutil.flagprocessors)
689 699 # prevent nesting of addgroup
690 700 self._adding_group = None
691 701
692 702 chunk_cache = self._loadindex()
693 703 self._load_inner(chunk_cache)
694 704
695 705 self._concurrencychecker = concurrencychecker
696 706
697 707 @property
698 708 def _generaldelta(self):
699 709 """temporary compatibility proxy"""
700 710 util.nouideprecwarn(
701 711 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
702 712 )
703 713 return self.delta_config.general_delta
704 714
705 715 @property
706 716 def _checkambig(self):
707 717 """temporary compatibility proxy"""
708 718 util.nouideprecwarn(
709 719 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
710 720 )
711 721 return self.data_config.check_ambig
712 722
713 723 @property
714 724 def _mmaplargeindex(self):
715 725 """temporary compatibility proxy"""
716 726 util.nouideprecwarn(
717 727 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
718 728 )
719 729 return self.data_config.mmap_large_index
720 730
721 731 @property
722 732 def _censorable(self):
723 733 """temporary compatibility proxy"""
724 734 util.nouideprecwarn(
725 735 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
726 736 )
727 737 return self.feature_config.censorable
728 738
729 739 @property
730 740 def _chunkcachesize(self):
731 741 """temporary compatibility proxy"""
732 742 util.nouideprecwarn(
733 743 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
734 744 )
735 745 return self.data_config.chunk_cache_size
736 746
737 747 @property
738 748 def _maxchainlen(self):
739 749 """temporary compatibility proxy"""
740 750 util.nouideprecwarn(
741 751 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
742 752 )
743 753 return self.delta_config.max_chain_len
744 754
745 755 @property
746 756 def _deltabothparents(self):
747 757 """temporary compatibility proxy"""
748 758 util.nouideprecwarn(
749 759 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
750 760 )
751 761 return self.delta_config.delta_both_parents
752 762
753 763 @property
754 764 def _candidate_group_chunk_size(self):
755 765 """temporary compatibility proxy"""
756 766 util.nouideprecwarn(
757 767 b"use revlog.delta_config.candidate_group_chunk_size",
758 768 b"6.6",
759 769 stacklevel=2,
760 770 )
761 771 return self.delta_config.candidate_group_chunk_size
762 772
763 773 @property
764 774 def _debug_delta(self):
765 775 """temporary compatibility proxy"""
766 776 util.nouideprecwarn(
767 777 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
768 778 )
769 779 return self.delta_config.debug_delta
770 780
771 781 @property
772 782 def _compengine(self):
773 783 """temporary compatibility proxy"""
774 784 util.nouideprecwarn(
775 785 b"use revlog.feature_config.compression_engine",
776 786 b"6.6",
777 787 stacklevel=2,
778 788 )
779 789 return self.feature_config.compression_engine
780 790
781 791 @property
782 792 def upperboundcomp(self):
783 793 """temporary compatibility proxy"""
784 794 util.nouideprecwarn(
785 795 b"use revlog.delta_config.upper_bound_comp",
786 796 b"6.6",
787 797 stacklevel=2,
788 798 )
789 799 return self.delta_config.upper_bound_comp
790 800
791 801 @property
792 802 def _compengineopts(self):
793 803 """temporary compatibility proxy"""
794 804 util.nouideprecwarn(
795 805 b"use revlog.feature_config.compression_engine_options",
796 806 b"6.6",
797 807 stacklevel=2,
798 808 )
799 809 return self.feature_config.compression_engine_options
800 810
801 811 @property
802 812 def _maxdeltachainspan(self):
803 813 """temporary compatibility proxy"""
804 814 util.nouideprecwarn(
805 815 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
806 816 )
807 817 return self.delta_config.max_deltachain_span
808 818
809 819 @property
810 820 def _withsparseread(self):
811 821 """temporary compatibility proxy"""
812 822 util.nouideprecwarn(
813 823 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
814 824 )
815 825 return self.data_config.with_sparse_read
816 826
817 827 @property
818 828 def _sparserevlog(self):
819 829 """temporary compatibility proxy"""
820 830 util.nouideprecwarn(
821 831 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
822 832 )
823 833 return self.delta_config.sparse_revlog
824 834
825 835 @property
826 836 def hassidedata(self):
827 837 """temporary compatibility proxy"""
828 838 util.nouideprecwarn(
829 839 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
830 840 )
831 841 return self.feature_config.has_side_data
832 842
833 843 @property
834 844 def _srdensitythreshold(self):
835 845 """temporary compatibility proxy"""
836 846 util.nouideprecwarn(
837 847 b"use revlog.data_config.sr_density_threshold",
838 848 b"6.6",
839 849 stacklevel=2,
840 850 )
841 851 return self.data_config.sr_density_threshold
842 852
843 853 @property
844 854 def _srmingapsize(self):
845 855 """temporary compatibility proxy"""
846 856 util.nouideprecwarn(
847 857 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
848 858 )
849 859 return self.data_config.sr_min_gap_size
850 860
851 861 @property
852 862 def _compute_rank(self):
853 863 """temporary compatibility proxy"""
854 864 util.nouideprecwarn(
855 865 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
856 866 )
857 867 return self.feature_config.compute_rank
858 868
859 869 @property
860 870 def canonical_parent_order(self):
861 871 """temporary compatibility proxy"""
862 872 util.nouideprecwarn(
863 873 b"use revlog.feature_config.canonical_parent_order",
864 874 b"6.6",
865 875 stacklevel=2,
866 876 )
867 877 return self.feature_config.canonical_parent_order
868 878
869 879 @property
870 880 def _lazydelta(self):
871 881 """temporary compatibility proxy"""
872 882 util.nouideprecwarn(
873 883 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
874 884 )
875 885 return self.delta_config.lazy_delta
876 886
877 887 @property
878 888 def _lazydeltabase(self):
879 889 """temporary compatibility proxy"""
880 890 util.nouideprecwarn(
881 891 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
882 892 )
883 893 return self.delta_config.lazy_delta_base
884 894
885 895 def _init_opts(self):
886 896 """process options (from above/config) to setup associated default revlog mode
887 897
888 898 These values might be affected when actually reading on disk information.
889 899
890 900 The relevant values are returned for use in _loadindex().
891 901
892 902 * newversionflags:
893 903 version header to use if we need to create a new revlog
894 904
895 905 * mmapindexthreshold:
896 906 minimal index size for start to use mmap
897 907
898 908 * force_nodemap:
899 909 force the usage of a "development" version of the nodemap code
900 910 """
901 911 opts = self.opener.options
902 912
903 913 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
904 914 new_header = CHANGELOGV2
905 915 compute_rank = opts.get(b'changelogv2.compute-rank', True)
906 916 self.feature_config.compute_rank = compute_rank
907 917 elif b'revlogv2' in opts:
908 918 new_header = REVLOGV2
909 919 elif b'revlogv1' in opts:
910 920 new_header = REVLOGV1 | FLAG_INLINE_DATA
911 921 if b'generaldelta' in opts:
912 922 new_header |= FLAG_GENERALDELTA
913 923 elif b'revlogv0' in self.opener.options:
914 924 new_header = REVLOGV0
915 925 else:
916 926 new_header = REVLOG_DEFAULT_VERSION
917 927
918 928 mmapindexthreshold = None
919 929 if self.data_config.mmap_large_index:
920 930 mmapindexthreshold = self.data_config.mmap_index_threshold
921 931 if self.feature_config.enable_ellipsis:
922 932 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
923 933
924 934 # revlog v0 doesn't have flag processors
925 935 for flag, processor in opts.get(b'flagprocessors', {}).items():
926 936 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
927 937
928 938 chunk_cache_size = self.data_config.chunk_cache_size
929 939 if chunk_cache_size <= 0:
930 940 raise error.RevlogError(
931 941 _(b'revlog chunk cache size %r is not greater than 0')
932 942 % chunk_cache_size
933 943 )
934 944 elif chunk_cache_size & (chunk_cache_size - 1):
935 945 raise error.RevlogError(
936 946 _(b'revlog chunk cache size %r is not a power of 2')
937 947 % chunk_cache_size
938 948 )
939 949 force_nodemap = opts.get(b'devel-force-nodemap', False)
940 950 return new_header, mmapindexthreshold, force_nodemap
941 951
942 952 def _get_data(self, filepath, mmap_threshold, size=None):
943 953 """return a file content with or without mmap
944 954
945 955 If the file is missing return the empty string"""
946 956 try:
947 957 with self.opener(filepath) as fp:
948 958 if mmap_threshold is not None:
949 959 file_size = self.opener.fstat(fp).st_size
950 960 if file_size >= mmap_threshold:
951 961 if size is not None:
952 962 # avoid potentiel mmap crash
953 963 size = min(file_size, size)
954 964 # TODO: should .close() to release resources without
955 965 # relying on Python GC
956 966 if size is None:
957 967 return util.buffer(util.mmapread(fp))
958 968 else:
959 969 return util.buffer(util.mmapread(fp, size))
960 970 if size is None:
961 971 return fp.read()
962 972 else:
963 973 return fp.read(size)
964 974 except FileNotFoundError:
965 975 return b''
966 976
967 977 def get_streams(self, max_linkrev, force_inline=False):
968 978 """return a list of streams that represent this revlog
969 979
970 980 This is used by stream-clone to do bytes to bytes copies of a repository.
971 981
972 982 This streams data for all revisions that refer to a changelog revision up
973 983 to `max_linkrev`.
974 984
975 985 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
976 986
977 987 It returns is a list of three-tuple:
978 988
979 989 [
980 990 (filename, bytes_stream, stream_size),
981 991 …
982 992 ]
983 993 """
984 994 n = len(self)
985 995 index = self.index
986 996 while n > 0:
987 997 linkrev = index[n - 1][4]
988 998 if linkrev < max_linkrev:
989 999 break
990 1000 # note: this loop will rarely go through multiple iterations, since
991 1001 # it only traverses commits created during the current streaming
992 1002 # pull operation.
993 1003 #
994 1004 # If this become a problem, using a binary search should cap the
995 1005 # runtime of this.
996 1006 n = n - 1
997 1007 if n == 0:
998 1008 # no data to send
999 1009 return []
1000 1010 index_size = n * index.entry_size
1001 1011 data_size = self.end(n - 1)
1002 1012
1003 1013 # XXX we might have been split (or stripped) since the object
1004 1014 # initialization, We need to close this race too, but having a way to
1005 1015 # pre-open the file we feed to the revlog and never closing them before
1006 1016 # we are done streaming.
1007 1017
1008 1018 if self._inline:
1009 1019
1010 1020 def get_stream():
1011 1021 with self.opener(self._indexfile, mode=b"r") as fp:
1012 1022 yield None
1013 1023 size = index_size + data_size
1014 1024 if size <= 65536:
1015 1025 yield fp.read(size)
1016 1026 else:
1017 1027 yield from util.filechunkiter(fp, limit=size)
1018 1028
1019 1029 inline_stream = get_stream()
1020 1030 next(inline_stream)
1021 1031 return [
1022 1032 (self._indexfile, inline_stream, index_size + data_size),
1023 1033 ]
1024 1034 elif force_inline:
1025 1035
1026 1036 def get_stream():
1027 1037 with self.reading():
1028 1038 yield None
1029 1039
1030 1040 for rev in range(n):
1031 1041 idx = self.index.entry_binary(rev)
1032 1042 if rev == 0 and self._docket is None:
1033 1043 # re-inject the inline flag
1034 1044 header = self._format_flags
1035 1045 header |= self._format_version
1036 1046 header |= FLAG_INLINE_DATA
1037 1047 header = self.index.pack_header(header)
1038 1048 idx = header + idx
1039 1049 yield idx
1040 1050 yield self._inner.get_segment_for_revs(rev, rev)[1]
1041 1051
1042 1052 inline_stream = get_stream()
1043 1053 next(inline_stream)
1044 1054 return [
1045 1055 (self._indexfile, inline_stream, index_size + data_size),
1046 1056 ]
1047 1057 else:
1048 1058
1049 1059 def get_index_stream():
1050 1060 with self.opener(self._indexfile, mode=b"r") as fp:
1051 1061 yield None
1052 1062 if index_size <= 65536:
1053 1063 yield fp.read(index_size)
1054 1064 else:
1055 1065 yield from util.filechunkiter(fp, limit=index_size)
1056 1066
1057 1067 def get_data_stream():
1058 1068 with self._datafp() as fp:
1059 1069 yield None
1060 1070 if data_size <= 65536:
1061 1071 yield fp.read(data_size)
1062 1072 else:
1063 1073 yield from util.filechunkiter(fp, limit=data_size)
1064 1074
1065 1075 index_stream = get_index_stream()
1066 1076 next(index_stream)
1067 1077 data_stream = get_data_stream()
1068 1078 next(data_stream)
1069 1079 return [
1070 1080 (self._datafile, data_stream, data_size),
1071 1081 (self._indexfile, index_stream, index_size),
1072 1082 ]
1073 1083
1074 1084 def _loadindex(self, docket=None):
1075 1085
1076 1086 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1077 1087
1078 1088 if self.postfix is not None:
1079 1089 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1080 1090 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1081 1091 entry_point = b'%s.i.a' % self.radix
1082 1092 elif self._try_split and self.opener.exists(self._split_index_file):
1083 1093 entry_point = self._split_index_file
1084 1094 else:
1085 1095 entry_point = b'%s.i' % self.radix
1086 1096
1087 1097 if docket is not None:
1088 1098 self._docket = docket
1089 1099 self._docket_file = entry_point
1090 1100 else:
1091 1101 self._initempty = True
1092 1102 entry_data = self._get_data(entry_point, mmapindexthreshold)
1093 1103 if len(entry_data) > 0:
1094 1104 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1095 1105 self._initempty = False
1096 1106 else:
1097 1107 header = new_header
1098 1108
1099 1109 self._format_flags = header & ~0xFFFF
1100 1110 self._format_version = header & 0xFFFF
1101 1111
1102 1112 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1103 1113 if supported_flags is None:
1104 1114 msg = _(b'unknown version (%d) in revlog %s')
1105 1115 msg %= (self._format_version, self.display_id)
1106 1116 raise error.RevlogError(msg)
1107 1117 elif self._format_flags & ~supported_flags:
1108 1118 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1109 1119 display_flag = self._format_flags >> 16
1110 1120 msg %= (display_flag, self._format_version, self.display_id)
1111 1121 raise error.RevlogError(msg)
1112 1122
1113 1123 features = FEATURES_BY_VERSION[self._format_version]
1114 1124 self._inline = features[b'inline'](self._format_flags)
1115 1125 self.delta_config.general_delta = features[b'generaldelta'](
1116 1126 self._format_flags
1117 1127 )
1118 1128 self.feature_config.has_side_data = features[b'sidedata']
1119 1129
1120 1130 if not features[b'docket']:
1121 1131 self._indexfile = entry_point
1122 1132 index_data = entry_data
1123 1133 else:
1124 1134 self._docket_file = entry_point
1125 1135 if self._initempty:
1126 1136 self._docket = docketutil.default_docket(self, header)
1127 1137 else:
1128 1138 self._docket = docketutil.parse_docket(
1129 1139 self, entry_data, use_pending=self._trypending
1130 1140 )
1131 1141
1132 1142 if self._docket is not None:
1133 1143 self._indexfile = self._docket.index_filepath()
1134 1144 index_data = b''
1135 1145 index_size = self._docket.index_end
1136 1146 if index_size > 0:
1137 1147 index_data = self._get_data(
1138 1148 self._indexfile, mmapindexthreshold, size=index_size
1139 1149 )
1140 1150 if len(index_data) < index_size:
1141 1151 msg = _(b'too few index data for %s: got %d, expected %d')
1142 1152 msg %= (self.display_id, len(index_data), index_size)
1143 1153 raise error.RevlogError(msg)
1144 1154
1145 1155 self._inline = False
1146 1156 # generaldelta implied by version 2 revlogs.
1147 1157 self.delta_config.general_delta = True
1148 1158 # the logic for persistent nodemap will be dealt with within the
1149 1159 # main docket, so disable it for now.
1150 1160 self._nodemap_file = None
1151 1161
1152 1162 if self._docket is not None:
1153 1163 self._datafile = self._docket.data_filepath()
1154 1164 self._sidedatafile = self._docket.sidedata_filepath()
1155 1165 elif self.postfix is None:
1156 1166 self._datafile = b'%s.d' % self.radix
1157 1167 else:
1158 1168 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1159 1169
1160 1170 self.nodeconstants = sha1nodeconstants
1161 1171 self.nullid = self.nodeconstants.nullid
1162 1172
1163 1173 # sparse-revlog can't be on without general-delta (issue6056)
1164 1174 if not self.delta_config.general_delta:
1165 1175 self.delta_config.sparse_revlog = False
1166 1176
1167 1177 self._storedeltachains = True
1168 1178
1169 1179 devel_nodemap = (
1170 1180 self._nodemap_file
1171 1181 and force_nodemap
1172 1182 and parse_index_v1_nodemap is not None
1173 1183 )
1174 1184
1175 1185 use_rust_index = False
1176 1186 if rustrevlog is not None:
1177 1187 if self._nodemap_file is not None:
1178 1188 use_rust_index = True
1179 1189 else:
1180 1190 use_rust_index = self.opener.options.get(b'rust.index')
1181 1191
1182 1192 self._parse_index = parse_index_v1
1183 1193 if self._format_version == REVLOGV0:
1184 1194 self._parse_index = revlogv0.parse_index_v0
1185 1195 elif self._format_version == REVLOGV2:
1186 1196 self._parse_index = parse_index_v2
1187 1197 elif self._format_version == CHANGELOGV2:
1188 1198 self._parse_index = parse_index_cl_v2
1189 1199 elif devel_nodemap:
1190 1200 self._parse_index = parse_index_v1_nodemap
1191 1201 elif use_rust_index:
1192 1202 self._parse_index = parse_index_v1_mixed
1193 1203 try:
1194 1204 d = self._parse_index(index_data, self._inline)
1195 1205 index, chunkcache = d
1196 1206 use_nodemap = (
1197 1207 not self._inline
1198 1208 and self._nodemap_file is not None
1199 1209 and hasattr(index, 'update_nodemap_data')
1200 1210 )
1201 1211 if use_nodemap:
1202 1212 nodemap_data = nodemaputil.persisted_data(self)
1203 1213 if nodemap_data is not None:
1204 1214 docket = nodemap_data[0]
1205 1215 if (
1206 1216 len(d[0]) > docket.tip_rev
1207 1217 and d[0][docket.tip_rev][7] == docket.tip_node
1208 1218 ):
1209 1219 # no changelog tampering
1210 1220 self._nodemap_docket = docket
1211 1221 index.update_nodemap_data(*nodemap_data)
1212 1222 except (ValueError, IndexError):
1213 1223 raise error.RevlogError(
1214 1224 _(b"index %s is corrupted") % self.display_id
1215 1225 )
1216 1226 self.index = index
1217 1227 # revnum -> (chain-length, sum-delta-length)
1218 1228 self._chaininfocache = util.lrucachedict(500)
1219 1229 # revlog header -> revlog compressor
1220 1230 self._decompressors = {}
1221 1231
1222 1232 return chunkcache
1223 1233
1224 1234 def _load_inner(self, chunk_cache):
1225 1235 self._inner = _InnerRevlog(
1226 1236 opener=self.opener,
1227 1237 index=self.index,
1228 1238 index_file=self._indexfile,
1229 1239 data_file=self._datafile,
1230 1240 sidedata_file=self._sidedatafile,
1231 1241 inline=self._inline,
1232 1242 data_config=self.data_config,
1233 1243 chunk_cache=chunk_cache,
1234 1244 )
1235 1245
1236 1246 def get_revlog(self):
1237 1247 """simple function to mirror API of other not-really-revlog API"""
1238 1248 return self
1239 1249
1240 1250 @util.propertycache
1241 1251 def revlog_kind(self):
1242 1252 return self.target[0]
1243 1253
1244 1254 @util.propertycache
1245 1255 def display_id(self):
1246 1256 """The public facing "ID" of the revlog that we use in message"""
1247 1257 if self.revlog_kind == KIND_FILELOG:
1248 1258 # Reference the file without the "data/" prefix, so it is familiar
1249 1259 # to the user.
1250 1260 return self.target[1]
1251 1261 else:
1252 1262 return self.radix
1253 1263
1254 1264 def _get_decompressor(self, t):
1255 1265 try:
1256 1266 compressor = self._decompressors[t]
1257 1267 except KeyError:
1258 1268 try:
1259 1269 engine = util.compengines.forrevlogheader(t)
1260 1270 compressor = engine.revlogcompressor(
1261 1271 self.feature_config.compression_engine_options
1262 1272 )
1263 1273 self._decompressors[t] = compressor
1264 1274 except KeyError:
1265 1275 raise error.RevlogError(
1266 1276 _(b'unknown compression type %s') % binascii.hexlify(t)
1267 1277 )
1268 1278 return compressor
1269 1279
1270 1280 @util.propertycache
1271 1281 def _compressor(self):
1272 1282 engine = util.compengines[self.feature_config.compression_engine]
1273 1283 return engine.revlogcompressor(
1274 1284 self.feature_config.compression_engine_options
1275 1285 )
1276 1286
1277 1287 @util.propertycache
1278 1288 def _decompressor(self):
1279 1289 """the default decompressor"""
1280 1290 if self._docket is None:
1281 1291 return None
1282 1292 t = self._docket.default_compression_header
1283 1293 c = self._get_decompressor(t)
1284 1294 return c.decompress
1285 1295
1286 1296 def _datafp(self, mode=b'r'):
1287 1297 """file object for the revlog's data file"""
1288 1298 return self.opener(self._datafile, mode=mode)
1289 1299
1290 1300 def tiprev(self):
1291 1301 return len(self.index) - 1
1292 1302
1293 1303 def tip(self):
1294 1304 return self.node(self.tiprev())
1295 1305
1296 1306 def __contains__(self, rev):
1297 1307 return 0 <= rev < len(self)
1298 1308
1299 1309 def __len__(self):
1300 1310 return len(self.index)
1301 1311
1302 1312 def __iter__(self):
1303 1313 return iter(range(len(self)))
1304 1314
1305 1315 def revs(self, start=0, stop=None):
1306 1316 """iterate over all rev in this revlog (from start to stop)"""
1307 1317 return storageutil.iterrevs(len(self), start=start, stop=stop)
1308 1318
1309 1319 def hasnode(self, node):
1310 1320 try:
1311 1321 self.rev(node)
1312 1322 return True
1313 1323 except KeyError:
1314 1324 return False
1315 1325
1316 1326 def _candelta(self, baserev, rev):
1317 1327 """whether two revisions (baserev, rev) can be delta-ed or not"""
1318 1328 # Disable delta if either rev requires a content-changing flag
1319 1329 # processor (ex. LFS). This is because such flag processor can alter
1320 1330 # the rawtext content that the delta will be based on, and two clients
1321 1331 # could have a same revlog node with different flags (i.e. different
1322 1332 # rawtext contents) and the delta could be incompatible.
1323 1333 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1324 1334 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1325 1335 ):
1326 1336 return False
1327 1337 return True
1328 1338
1329 1339 def update_caches(self, transaction):
1330 1340 """update on disk cache
1331 1341
1332 1342 If a transaction is passed, the update may be delayed to transaction
1333 1343 commit."""
1334 1344 if self._nodemap_file is not None:
1335 1345 if transaction is None:
1336 1346 nodemaputil.update_persistent_nodemap(self)
1337 1347 else:
1338 1348 nodemaputil.setup_persistent_nodemap(transaction, self)
1339 1349
1340 1350 def clearcaches(self):
1341 1351 """Clear in-memory caches"""
1342 1352 self._revisioncache = None
1343 1353 self._chainbasecache.clear()
1344 1354 self._inner._segmentfile.clear_cache()
1345 1355 self._inner._segmentfile_sidedata.clear_cache()
1346 1356 self._pcache = {}
1347 1357 self._nodemap_docket = None
1348 1358 self.index.clearcaches()
1349 1359 # The python code is the one responsible for validating the docket, we
1350 1360 # end up having to refresh it here.
1351 1361 use_nodemap = (
1352 1362 not self._inline
1353 1363 and self._nodemap_file is not None
1354 1364 and hasattr(self.index, 'update_nodemap_data')
1355 1365 )
1356 1366 if use_nodemap:
1357 1367 nodemap_data = nodemaputil.persisted_data(self)
1358 1368 if nodemap_data is not None:
1359 1369 self._nodemap_docket = nodemap_data[0]
1360 1370 self.index.update_nodemap_data(*nodemap_data)
1361 1371
1362 1372 def rev(self, node):
1363 1373 """return the revision number associated with a <nodeid>"""
1364 1374 try:
1365 1375 return self.index.rev(node)
1366 1376 except TypeError:
1367 1377 raise
1368 1378 except error.RevlogError:
1369 1379 # parsers.c radix tree lookup failed
1370 1380 if (
1371 1381 node == self.nodeconstants.wdirid
1372 1382 or node in self.nodeconstants.wdirfilenodeids
1373 1383 ):
1374 1384 raise error.WdirUnsupported
1375 1385 raise error.LookupError(node, self.display_id, _(b'no node'))
1376 1386
1377 1387 # Accessors for index entries.
1378 1388
1379 1389 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1380 1390 # are flags.
1381 1391 def start(self, rev):
1382 1392 return int(self.index[rev][0] >> 16)
1383 1393
1384 1394 def sidedata_cut_off(self, rev):
1385 1395 sd_cut_off = self.index[rev][8]
1386 1396 if sd_cut_off != 0:
1387 1397 return sd_cut_off
1388 1398 # This is some annoying dance, because entries without sidedata
1389 1399 # currently use 0 as their ofsset. (instead of previous-offset +
1390 1400 # previous-size)
1391 1401 #
1392 1402 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1393 1403 # In the meantime, we need this.
1394 1404 while 0 <= rev:
1395 1405 e = self.index[rev]
1396 1406 if e[9] != 0:
1397 1407 return e[8] + e[9]
1398 1408 rev -= 1
1399 1409 return 0
1400 1410
1401 1411 def flags(self, rev):
1402 1412 return self.index[rev][0] & 0xFFFF
1403 1413
1404 1414 def length(self, rev):
1405 1415 return self.index[rev][1]
1406 1416
1407 1417 def sidedata_length(self, rev):
1408 1418 if not self.feature_config.has_side_data:
1409 1419 return 0
1410 1420 return self.index[rev][9]
1411 1421
1412 1422 def rawsize(self, rev):
1413 1423 """return the length of the uncompressed text for a given revision"""
1414 1424 l = self.index[rev][2]
1415 1425 if l >= 0:
1416 1426 return l
1417 1427
1418 1428 t = self.rawdata(rev)
1419 1429 return len(t)
1420 1430
1421 1431 def size(self, rev):
1422 1432 """length of non-raw text (processed by a "read" flag processor)"""
1423 1433 # fast path: if no "read" flag processor could change the content,
1424 1434 # size is rawsize. note: ELLIPSIS is known to not change the content.
1425 1435 flags = self.flags(rev)
1426 1436 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1427 1437 return self.rawsize(rev)
1428 1438
1429 1439 return len(self.revision(rev))
1430 1440
1431 1441 def fast_rank(self, rev):
1432 1442 """Return the rank of a revision if already known, or None otherwise.
1433 1443
1434 1444 The rank of a revision is the size of the sub-graph it defines as a
1435 1445 head. Equivalently, the rank of a revision `r` is the size of the set
1436 1446 `ancestors(r)`, `r` included.
1437 1447
1438 1448 This method returns the rank retrieved from the revlog in constant
1439 1449 time. It makes no attempt at computing unknown values for versions of
1440 1450 the revlog which do not persist the rank.
1441 1451 """
1442 1452 rank = self.index[rev][ENTRY_RANK]
1443 1453 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1444 1454 return None
1445 1455 if rev == nullrev:
1446 1456 return 0 # convention
1447 1457 return rank
1448 1458
1449 1459 def chainbase(self, rev):
1450 1460 base = self._chainbasecache.get(rev)
1451 1461 if base is not None:
1452 1462 return base
1453 1463
1454 1464 index = self.index
1455 1465 iterrev = rev
1456 1466 base = index[iterrev][3]
1457 1467 while base != iterrev:
1458 1468 iterrev = base
1459 1469 base = index[iterrev][3]
1460 1470
1461 1471 self._chainbasecache[rev] = base
1462 1472 return base
1463 1473
1464 1474 def linkrev(self, rev):
1465 1475 return self.index[rev][4]
1466 1476
1467 1477 def parentrevs(self, rev):
1468 1478 try:
1469 1479 entry = self.index[rev]
1470 1480 except IndexError:
1471 1481 if rev == wdirrev:
1472 1482 raise error.WdirUnsupported
1473 1483 raise
1474 1484
1475 1485 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1476 1486 return entry[6], entry[5]
1477 1487 else:
1478 1488 return entry[5], entry[6]
1479 1489
1480 1490 # fast parentrevs(rev) where rev isn't filtered
1481 1491 _uncheckedparentrevs = parentrevs
1482 1492
1483 1493 def node(self, rev):
1484 1494 try:
1485 1495 return self.index[rev][7]
1486 1496 except IndexError:
1487 1497 if rev == wdirrev:
1488 1498 raise error.WdirUnsupported
1489 1499 raise
1490 1500
1491 1501 # Derived from index values.
1492 1502
1493 1503 def end(self, rev):
1494 1504 return self.start(rev) + self.length(rev)
1495 1505
1496 1506 def parents(self, node):
1497 1507 i = self.index
1498 1508 d = i[self.rev(node)]
1499 1509 # inline node() to avoid function call overhead
1500 1510 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1501 1511 return i[d[6]][7], i[d[5]][7]
1502 1512 else:
1503 1513 return i[d[5]][7], i[d[6]][7]
1504 1514
1505 1515 def chainlen(self, rev):
1506 1516 return self._chaininfo(rev)[0]
1507 1517
1508 1518 def _chaininfo(self, rev):
1509 1519 chaininfocache = self._chaininfocache
1510 1520 if rev in chaininfocache:
1511 1521 return chaininfocache[rev]
1512 1522 index = self.index
1513 1523 generaldelta = self.delta_config.general_delta
1514 1524 iterrev = rev
1515 1525 e = index[iterrev]
1516 1526 clen = 0
1517 1527 compresseddeltalen = 0
1518 1528 while iterrev != e[3]:
1519 1529 clen += 1
1520 1530 compresseddeltalen += e[1]
1521 1531 if generaldelta:
1522 1532 iterrev = e[3]
1523 1533 else:
1524 1534 iterrev -= 1
1525 1535 if iterrev in chaininfocache:
1526 1536 t = chaininfocache[iterrev]
1527 1537 clen += t[0]
1528 1538 compresseddeltalen += t[1]
1529 1539 break
1530 1540 e = index[iterrev]
1531 1541 else:
1532 1542 # Add text length of base since decompressing that also takes
1533 1543 # work. For cache hits the length is already included.
1534 1544 compresseddeltalen += e[1]
1535 1545 r = (clen, compresseddeltalen)
1536 1546 chaininfocache[rev] = r
1537 1547 return r
1538 1548
1539 1549 def _deltachain(self, rev, stoprev=None):
1540 1550 """Obtain the delta chain for a revision.
1541 1551
1542 1552 ``stoprev`` specifies a revision to stop at. If not specified, we
1543 1553 stop at the base of the chain.
1544 1554
1545 1555 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1546 1556 revs in ascending order and ``stopped`` is a bool indicating whether
1547 1557 ``stoprev`` was hit.
1548 1558 """
1549 1559 generaldelta = self.delta_config.general_delta
1550 1560 # Try C implementation.
1551 1561 try:
1552 1562 return self.index.deltachain(rev, stoprev, generaldelta)
1553 1563 except AttributeError:
1554 1564 pass
1555 1565
1556 1566 chain = []
1557 1567
1558 1568 # Alias to prevent attribute lookup in tight loop.
1559 1569 index = self.index
1560 1570
1561 1571 iterrev = rev
1562 1572 e = index[iterrev]
1563 1573 while iterrev != e[3] and iterrev != stoprev:
1564 1574 chain.append(iterrev)
1565 1575 if generaldelta:
1566 1576 iterrev = e[3]
1567 1577 else:
1568 1578 iterrev -= 1
1569 1579 e = index[iterrev]
1570 1580
1571 1581 if iterrev == stoprev:
1572 1582 stopped = True
1573 1583 else:
1574 1584 chain.append(iterrev)
1575 1585 stopped = False
1576 1586
1577 1587 chain.reverse()
1578 1588 return chain, stopped
1579 1589
1580 1590 def ancestors(self, revs, stoprev=0, inclusive=False):
1581 1591 """Generate the ancestors of 'revs' in reverse revision order.
1582 1592 Does not generate revs lower than stoprev.
1583 1593
1584 1594 See the documentation for ancestor.lazyancestors for more details."""
1585 1595
1586 1596 # first, make sure start revisions aren't filtered
1587 1597 revs = list(revs)
1588 1598 checkrev = self.node
1589 1599 for r in revs:
1590 1600 checkrev(r)
1591 1601 # and we're sure ancestors aren't filtered as well
1592 1602
1593 1603 if rustancestor is not None and self.index.rust_ext_compat:
1594 1604 lazyancestors = rustancestor.LazyAncestors
1595 1605 arg = self.index
1596 1606 else:
1597 1607 lazyancestors = ancestor.lazyancestors
1598 1608 arg = self._uncheckedparentrevs
1599 1609 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1600 1610
1601 1611 def descendants(self, revs):
1602 1612 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1603 1613
1604 1614 def findcommonmissing(self, common=None, heads=None):
1605 1615 """Return a tuple of the ancestors of common and the ancestors of heads
1606 1616 that are not ancestors of common. In revset terminology, we return the
1607 1617 tuple:
1608 1618
1609 1619 ::common, (::heads) - (::common)
1610 1620
1611 1621 The list is sorted by revision number, meaning it is
1612 1622 topologically sorted.
1613 1623
1614 1624 'heads' and 'common' are both lists of node IDs. If heads is
1615 1625 not supplied, uses all of the revlog's heads. If common is not
1616 1626 supplied, uses nullid."""
1617 1627 if common is None:
1618 1628 common = [self.nullid]
1619 1629 if heads is None:
1620 1630 heads = self.heads()
1621 1631
1622 1632 common = [self.rev(n) for n in common]
1623 1633 heads = [self.rev(n) for n in heads]
1624 1634
1625 1635 # we want the ancestors, but inclusive
1626 1636 class lazyset:
1627 1637 def __init__(self, lazyvalues):
1628 1638 self.addedvalues = set()
1629 1639 self.lazyvalues = lazyvalues
1630 1640
1631 1641 def __contains__(self, value):
1632 1642 return value in self.addedvalues or value in self.lazyvalues
1633 1643
1634 1644 def __iter__(self):
1635 1645 added = self.addedvalues
1636 1646 for r in added:
1637 1647 yield r
1638 1648 for r in self.lazyvalues:
1639 1649 if not r in added:
1640 1650 yield r
1641 1651
1642 1652 def add(self, value):
1643 1653 self.addedvalues.add(value)
1644 1654
1645 1655 def update(self, values):
1646 1656 self.addedvalues.update(values)
1647 1657
1648 1658 has = lazyset(self.ancestors(common))
1649 1659 has.add(nullrev)
1650 1660 has.update(common)
1651 1661
1652 1662 # take all ancestors from heads that aren't in has
1653 1663 missing = set()
1654 1664 visit = collections.deque(r for r in heads if r not in has)
1655 1665 while visit:
1656 1666 r = visit.popleft()
1657 1667 if r in missing:
1658 1668 continue
1659 1669 else:
1660 1670 missing.add(r)
1661 1671 for p in self.parentrevs(r):
1662 1672 if p not in has:
1663 1673 visit.append(p)
1664 1674 missing = list(missing)
1665 1675 missing.sort()
1666 1676 return has, [self.node(miss) for miss in missing]
1667 1677
1668 1678 def incrementalmissingrevs(self, common=None):
1669 1679 """Return an object that can be used to incrementally compute the
1670 1680 revision numbers of the ancestors of arbitrary sets that are not
1671 1681 ancestors of common. This is an ancestor.incrementalmissingancestors
1672 1682 object.
1673 1683
1674 1684 'common' is a list of revision numbers. If common is not supplied, uses
1675 1685 nullrev.
1676 1686 """
1677 1687 if common is None:
1678 1688 common = [nullrev]
1679 1689
1680 1690 if rustancestor is not None and self.index.rust_ext_compat:
1681 1691 return rustancestor.MissingAncestors(self.index, common)
1682 1692 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1683 1693
1684 1694 def findmissingrevs(self, common=None, heads=None):
1685 1695 """Return the revision numbers of the ancestors of heads that
1686 1696 are not ancestors of common.
1687 1697
1688 1698 More specifically, return a list of revision numbers corresponding to
1689 1699 nodes N such that every N satisfies the following constraints:
1690 1700
1691 1701 1. N is an ancestor of some node in 'heads'
1692 1702 2. N is not an ancestor of any node in 'common'
1693 1703
1694 1704 The list is sorted by revision number, meaning it is
1695 1705 topologically sorted.
1696 1706
1697 1707 'heads' and 'common' are both lists of revision numbers. If heads is
1698 1708 not supplied, uses all of the revlog's heads. If common is not
1699 1709 supplied, uses nullid."""
1700 1710 if common is None:
1701 1711 common = [nullrev]
1702 1712 if heads is None:
1703 1713 heads = self.headrevs()
1704 1714
1705 1715 inc = self.incrementalmissingrevs(common=common)
1706 1716 return inc.missingancestors(heads)
1707 1717
1708 1718 def findmissing(self, common=None, heads=None):
1709 1719 """Return the ancestors of heads that are not ancestors of common.
1710 1720
1711 1721 More specifically, return a list of nodes N such that every N
1712 1722 satisfies the following constraints:
1713 1723
1714 1724 1. N is an ancestor of some node in 'heads'
1715 1725 2. N is not an ancestor of any node in 'common'
1716 1726
1717 1727 The list is sorted by revision number, meaning it is
1718 1728 topologically sorted.
1719 1729
1720 1730 'heads' and 'common' are both lists of node IDs. If heads is
1721 1731 not supplied, uses all of the revlog's heads. If common is not
1722 1732 supplied, uses nullid."""
1723 1733 if common is None:
1724 1734 common = [self.nullid]
1725 1735 if heads is None:
1726 1736 heads = self.heads()
1727 1737
1728 1738 common = [self.rev(n) for n in common]
1729 1739 heads = [self.rev(n) for n in heads]
1730 1740
1731 1741 inc = self.incrementalmissingrevs(common=common)
1732 1742 return [self.node(r) for r in inc.missingancestors(heads)]
1733 1743
1734 1744 def nodesbetween(self, roots=None, heads=None):
1735 1745 """Return a topological path from 'roots' to 'heads'.
1736 1746
1737 1747 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1738 1748 topologically sorted list of all nodes N that satisfy both of
1739 1749 these constraints:
1740 1750
1741 1751 1. N is a descendant of some node in 'roots'
1742 1752 2. N is an ancestor of some node in 'heads'
1743 1753
1744 1754 Every node is considered to be both a descendant and an ancestor
1745 1755 of itself, so every reachable node in 'roots' and 'heads' will be
1746 1756 included in 'nodes'.
1747 1757
1748 1758 'outroots' is the list of reachable nodes in 'roots', i.e., the
1749 1759 subset of 'roots' that is returned in 'nodes'. Likewise,
1750 1760 'outheads' is the subset of 'heads' that is also in 'nodes'.
1751 1761
1752 1762 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1753 1763 unspecified, uses nullid as the only root. If 'heads' is
1754 1764 unspecified, uses list of all of the revlog's heads."""
1755 1765 nonodes = ([], [], [])
1756 1766 if roots is not None:
1757 1767 roots = list(roots)
1758 1768 if not roots:
1759 1769 return nonodes
1760 1770 lowestrev = min([self.rev(n) for n in roots])
1761 1771 else:
1762 1772 roots = [self.nullid] # Everybody's a descendant of nullid
1763 1773 lowestrev = nullrev
1764 1774 if (lowestrev == nullrev) and (heads is None):
1765 1775 # We want _all_ the nodes!
1766 1776 return (
1767 1777 [self.node(r) for r in self],
1768 1778 [self.nullid],
1769 1779 list(self.heads()),
1770 1780 )
1771 1781 if heads is None:
1772 1782 # All nodes are ancestors, so the latest ancestor is the last
1773 1783 # node.
1774 1784 highestrev = len(self) - 1
1775 1785 # Set ancestors to None to signal that every node is an ancestor.
1776 1786 ancestors = None
1777 1787 # Set heads to an empty dictionary for later discovery of heads
1778 1788 heads = {}
1779 1789 else:
1780 1790 heads = list(heads)
1781 1791 if not heads:
1782 1792 return nonodes
1783 1793 ancestors = set()
1784 1794 # Turn heads into a dictionary so we can remove 'fake' heads.
1785 1795 # Also, later we will be using it to filter out the heads we can't
1786 1796 # find from roots.
1787 1797 heads = dict.fromkeys(heads, False)
1788 1798 # Start at the top and keep marking parents until we're done.
1789 1799 nodestotag = set(heads)
1790 1800 # Remember where the top was so we can use it as a limit later.
1791 1801 highestrev = max([self.rev(n) for n in nodestotag])
1792 1802 while nodestotag:
1793 1803 # grab a node to tag
1794 1804 n = nodestotag.pop()
1795 1805 # Never tag nullid
1796 1806 if n == self.nullid:
1797 1807 continue
1798 1808 # A node's revision number represents its place in a
1799 1809 # topologically sorted list of nodes.
1800 1810 r = self.rev(n)
1801 1811 if r >= lowestrev:
1802 1812 if n not in ancestors:
1803 1813 # If we are possibly a descendant of one of the roots
1804 1814 # and we haven't already been marked as an ancestor
1805 1815 ancestors.add(n) # Mark as ancestor
1806 1816 # Add non-nullid parents to list of nodes to tag.
1807 1817 nodestotag.update(
1808 1818 [p for p in self.parents(n) if p != self.nullid]
1809 1819 )
1810 1820 elif n in heads: # We've seen it before, is it a fake head?
1811 1821 # So it is, real heads should not be the ancestors of
1812 1822 # any other heads.
1813 1823 heads.pop(n)
1814 1824 if not ancestors:
1815 1825 return nonodes
1816 1826 # Now that we have our set of ancestors, we want to remove any
1817 1827 # roots that are not ancestors.
1818 1828
1819 1829 # If one of the roots was nullid, everything is included anyway.
1820 1830 if lowestrev > nullrev:
1821 1831 # But, since we weren't, let's recompute the lowest rev to not
1822 1832 # include roots that aren't ancestors.
1823 1833
1824 1834 # Filter out roots that aren't ancestors of heads
1825 1835 roots = [root for root in roots if root in ancestors]
1826 1836 # Recompute the lowest revision
1827 1837 if roots:
1828 1838 lowestrev = min([self.rev(root) for root in roots])
1829 1839 else:
1830 1840 # No more roots? Return empty list
1831 1841 return nonodes
1832 1842 else:
1833 1843 # We are descending from nullid, and don't need to care about
1834 1844 # any other roots.
1835 1845 lowestrev = nullrev
1836 1846 roots = [self.nullid]
1837 1847 # Transform our roots list into a set.
1838 1848 descendants = set(roots)
1839 1849 # Also, keep the original roots so we can filter out roots that aren't
1840 1850 # 'real' roots (i.e. are descended from other roots).
1841 1851 roots = descendants.copy()
1842 1852 # Our topologically sorted list of output nodes.
1843 1853 orderedout = []
1844 1854 # Don't start at nullid since we don't want nullid in our output list,
1845 1855 # and if nullid shows up in descendants, empty parents will look like
1846 1856 # they're descendants.
1847 1857 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1848 1858 n = self.node(r)
1849 1859 isdescendant = False
1850 1860 if lowestrev == nullrev: # Everybody is a descendant of nullid
1851 1861 isdescendant = True
1852 1862 elif n in descendants:
1853 1863 # n is already a descendant
1854 1864 isdescendant = True
1855 1865 # This check only needs to be done here because all the roots
1856 1866 # will start being marked is descendants before the loop.
1857 1867 if n in roots:
1858 1868 # If n was a root, check if it's a 'real' root.
1859 1869 p = tuple(self.parents(n))
1860 1870 # If any of its parents are descendants, it's not a root.
1861 1871 if (p[0] in descendants) or (p[1] in descendants):
1862 1872 roots.remove(n)
1863 1873 else:
1864 1874 p = tuple(self.parents(n))
1865 1875 # A node is a descendant if either of its parents are
1866 1876 # descendants. (We seeded the dependents list with the roots
1867 1877 # up there, remember?)
1868 1878 if (p[0] in descendants) or (p[1] in descendants):
1869 1879 descendants.add(n)
1870 1880 isdescendant = True
1871 1881 if isdescendant and ((ancestors is None) or (n in ancestors)):
1872 1882 # Only include nodes that are both descendants and ancestors.
1873 1883 orderedout.append(n)
1874 1884 if (ancestors is not None) and (n in heads):
1875 1885 # We're trying to figure out which heads are reachable
1876 1886 # from roots.
1877 1887 # Mark this head as having been reached
1878 1888 heads[n] = True
1879 1889 elif ancestors is None:
1880 1890 # Otherwise, we're trying to discover the heads.
1881 1891 # Assume this is a head because if it isn't, the next step
1882 1892 # will eventually remove it.
1883 1893 heads[n] = True
1884 1894 # But, obviously its parents aren't.
1885 1895 for p in self.parents(n):
1886 1896 heads.pop(p, None)
1887 1897 heads = [head for head, flag in heads.items() if flag]
1888 1898 roots = list(roots)
1889 1899 assert orderedout
1890 1900 assert roots
1891 1901 assert heads
1892 1902 return (orderedout, roots, heads)
1893 1903
1894 1904 def headrevs(self, revs=None):
1895 1905 if revs is None:
1896 1906 try:
1897 1907 return self.index.headrevs()
1898 1908 except AttributeError:
1899 1909 return self._headrevs()
1900 1910 if rustdagop is not None and self.index.rust_ext_compat:
1901 1911 return rustdagop.headrevs(self.index, revs)
1902 1912 return dagop.headrevs(revs, self._uncheckedparentrevs)
1903 1913
1904 1914 def computephases(self, roots):
1905 1915 return self.index.computephasesmapsets(roots)
1906 1916
1907 1917 def _headrevs(self):
1908 1918 count = len(self)
1909 1919 if not count:
1910 1920 return [nullrev]
1911 1921 # we won't iter over filtered rev so nobody is a head at start
1912 1922 ishead = [0] * (count + 1)
1913 1923 index = self.index
1914 1924 for r in self:
1915 1925 ishead[r] = 1 # I may be an head
1916 1926 e = index[r]
1917 1927 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1918 1928 return [r for r, val in enumerate(ishead) if val]
1919 1929
1920 1930 def heads(self, start=None, stop=None):
1921 1931 """return the list of all nodes that have no children
1922 1932
1923 1933 if start is specified, only heads that are descendants of
1924 1934 start will be returned
1925 1935 if stop is specified, it will consider all the revs from stop
1926 1936 as if they had no children
1927 1937 """
1928 1938 if start is None and stop is None:
1929 1939 if not len(self):
1930 1940 return [self.nullid]
1931 1941 return [self.node(r) for r in self.headrevs()]
1932 1942
1933 1943 if start is None:
1934 1944 start = nullrev
1935 1945 else:
1936 1946 start = self.rev(start)
1937 1947
1938 1948 stoprevs = {self.rev(n) for n in stop or []}
1939 1949
1940 1950 revs = dagop.headrevssubset(
1941 1951 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1942 1952 )
1943 1953
1944 1954 return [self.node(rev) for rev in revs]
1945 1955
1946 1956 def children(self, node):
1947 1957 """find the children of a given node"""
1948 1958 c = []
1949 1959 p = self.rev(node)
1950 1960 for r in self.revs(start=p + 1):
1951 1961 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1952 1962 if prevs:
1953 1963 for pr in prevs:
1954 1964 if pr == p:
1955 1965 c.append(self.node(r))
1956 1966 elif p == nullrev:
1957 1967 c.append(self.node(r))
1958 1968 return c
1959 1969
1960 1970 def commonancestorsheads(self, a, b):
1961 1971 """calculate all the heads of the common ancestors of nodes a and b"""
1962 1972 a, b = self.rev(a), self.rev(b)
1963 1973 ancs = self._commonancestorsheads(a, b)
1964 1974 return pycompat.maplist(self.node, ancs)
1965 1975
1966 1976 def _commonancestorsheads(self, *revs):
1967 1977 """calculate all the heads of the common ancestors of revs"""
1968 1978 try:
1969 1979 ancs = self.index.commonancestorsheads(*revs)
1970 1980 except (AttributeError, OverflowError): # C implementation failed
1971 1981 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1972 1982 return ancs
1973 1983
1974 1984 def isancestor(self, a, b):
1975 1985 """return True if node a is an ancestor of node b
1976 1986
1977 1987 A revision is considered an ancestor of itself."""
1978 1988 a, b = self.rev(a), self.rev(b)
1979 1989 return self.isancestorrev(a, b)
1980 1990
1981 1991 def isancestorrev(self, a, b):
1982 1992 """return True if revision a is an ancestor of revision b
1983 1993
1984 1994 A revision is considered an ancestor of itself.
1985 1995
1986 1996 The implementation of this is trivial but the use of
1987 1997 reachableroots is not."""
1988 1998 if a == nullrev:
1989 1999 return True
1990 2000 elif a == b:
1991 2001 return True
1992 2002 elif a > b:
1993 2003 return False
1994 2004 return bool(self.reachableroots(a, [b], [a], includepath=False))
1995 2005
1996 2006 def reachableroots(self, minroot, heads, roots, includepath=False):
1997 2007 """return (heads(::(<roots> and <roots>::<heads>)))
1998 2008
1999 2009 If includepath is True, return (<roots>::<heads>)."""
2000 2010 try:
2001 2011 return self.index.reachableroots2(
2002 2012 minroot, heads, roots, includepath
2003 2013 )
2004 2014 except AttributeError:
2005 2015 return dagop._reachablerootspure(
2006 2016 self.parentrevs, minroot, roots, heads, includepath
2007 2017 )
2008 2018
2009 2019 def ancestor(self, a, b):
2010 2020 """calculate the "best" common ancestor of nodes a and b"""
2011 2021
2012 2022 a, b = self.rev(a), self.rev(b)
2013 2023 try:
2014 2024 ancs = self.index.ancestors(a, b)
2015 2025 except (AttributeError, OverflowError):
2016 2026 ancs = ancestor.ancestors(self.parentrevs, a, b)
2017 2027 if ancs:
2018 2028 # choose a consistent winner when there's a tie
2019 2029 return min(map(self.node, ancs))
2020 2030 return self.nullid
2021 2031
2022 2032 def _match(self, id):
2023 2033 if isinstance(id, int):
2024 2034 # rev
2025 2035 return self.node(id)
2026 2036 if len(id) == self.nodeconstants.nodelen:
2027 2037 # possibly a binary node
2028 2038 # odds of a binary node being all hex in ASCII are 1 in 10**25
2029 2039 try:
2030 2040 node = id
2031 2041 self.rev(node) # quick search the index
2032 2042 return node
2033 2043 except error.LookupError:
2034 2044 pass # may be partial hex id
2035 2045 try:
2036 2046 # str(rev)
2037 2047 rev = int(id)
2038 2048 if b"%d" % rev != id:
2039 2049 raise ValueError
2040 2050 if rev < 0:
2041 2051 rev = len(self) + rev
2042 2052 if rev < 0 or rev >= len(self):
2043 2053 raise ValueError
2044 2054 return self.node(rev)
2045 2055 except (ValueError, OverflowError):
2046 2056 pass
2047 2057 if len(id) == 2 * self.nodeconstants.nodelen:
2048 2058 try:
2049 2059 # a full hex nodeid?
2050 2060 node = bin(id)
2051 2061 self.rev(node)
2052 2062 return node
2053 2063 except (binascii.Error, error.LookupError):
2054 2064 pass
2055 2065
2056 2066 def _partialmatch(self, id):
2057 2067 # we don't care wdirfilenodeids as they should be always full hash
2058 2068 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2059 2069 ambiguous = False
2060 2070 try:
2061 2071 partial = self.index.partialmatch(id)
2062 2072 if partial and self.hasnode(partial):
2063 2073 if maybewdir:
2064 2074 # single 'ff...' match in radix tree, ambiguous with wdir
2065 2075 ambiguous = True
2066 2076 else:
2067 2077 return partial
2068 2078 elif maybewdir:
2069 2079 # no 'ff...' match in radix tree, wdir identified
2070 2080 raise error.WdirUnsupported
2071 2081 else:
2072 2082 return None
2073 2083 except error.RevlogError:
2074 2084 # parsers.c radix tree lookup gave multiple matches
2075 2085 # fast path: for unfiltered changelog, radix tree is accurate
2076 2086 if not getattr(self, 'filteredrevs', None):
2077 2087 ambiguous = True
2078 2088 # fall through to slow path that filters hidden revisions
2079 2089 except (AttributeError, ValueError):
2080 2090 # we are pure python, or key is not hex
2081 2091 pass
2082 2092 if ambiguous:
2083 2093 raise error.AmbiguousPrefixLookupError(
2084 2094 id, self.display_id, _(b'ambiguous identifier')
2085 2095 )
2086 2096
2087 2097 if id in self._pcache:
2088 2098 return self._pcache[id]
2089 2099
2090 2100 if len(id) <= 40:
2091 2101 # hex(node)[:...]
2092 2102 l = len(id) // 2 * 2 # grab an even number of digits
2093 2103 try:
2094 2104 # we're dropping the last digit, so let's check that it's hex,
2095 2105 # to avoid the expensive computation below if it's not
2096 2106 if len(id) % 2 > 0:
2097 2107 if not (id[-1] in hexdigits):
2098 2108 return None
2099 2109 prefix = bin(id[:l])
2100 2110 except binascii.Error:
2101 2111 pass
2102 2112 else:
2103 2113 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2104 2114 nl = [
2105 2115 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2106 2116 ]
2107 2117 if self.nodeconstants.nullhex.startswith(id):
2108 2118 nl.append(self.nullid)
2109 2119 if len(nl) > 0:
2110 2120 if len(nl) == 1 and not maybewdir:
2111 2121 self._pcache[id] = nl[0]
2112 2122 return nl[0]
2113 2123 raise error.AmbiguousPrefixLookupError(
2114 2124 id, self.display_id, _(b'ambiguous identifier')
2115 2125 )
2116 2126 if maybewdir:
2117 2127 raise error.WdirUnsupported
2118 2128 return None
2119 2129
2120 2130 def lookup(self, id):
2121 2131 """locate a node based on:
2122 2132 - revision number or str(revision number)
2123 2133 - nodeid or subset of hex nodeid
2124 2134 """
2125 2135 n = self._match(id)
2126 2136 if n is not None:
2127 2137 return n
2128 2138 n = self._partialmatch(id)
2129 2139 if n:
2130 2140 return n
2131 2141
2132 2142 raise error.LookupError(id, self.display_id, _(b'no match found'))
2133 2143
2134 2144 def shortest(self, node, minlength=1):
2135 2145 """Find the shortest unambiguous prefix that matches node."""
2136 2146
2137 2147 def isvalid(prefix):
2138 2148 try:
2139 2149 matchednode = self._partialmatch(prefix)
2140 2150 except error.AmbiguousPrefixLookupError:
2141 2151 return False
2142 2152 except error.WdirUnsupported:
2143 2153 # single 'ff...' match
2144 2154 return True
2145 2155 if matchednode is None:
2146 2156 raise error.LookupError(node, self.display_id, _(b'no node'))
2147 2157 return True
2148 2158
2149 2159 def maybewdir(prefix):
2150 2160 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2151 2161
2152 2162 hexnode = hex(node)
2153 2163
2154 2164 def disambiguate(hexnode, minlength):
2155 2165 """Disambiguate against wdirid."""
2156 2166 for length in range(minlength, len(hexnode) + 1):
2157 2167 prefix = hexnode[:length]
2158 2168 if not maybewdir(prefix):
2159 2169 return prefix
2160 2170
2161 2171 if not getattr(self, 'filteredrevs', None):
2162 2172 try:
2163 2173 length = max(self.index.shortest(node), minlength)
2164 2174 return disambiguate(hexnode, length)
2165 2175 except error.RevlogError:
2166 2176 if node != self.nodeconstants.wdirid:
2167 2177 raise error.LookupError(
2168 2178 node, self.display_id, _(b'no node')
2169 2179 )
2170 2180 except AttributeError:
2171 2181 # Fall through to pure code
2172 2182 pass
2173 2183
2174 2184 if node == self.nodeconstants.wdirid:
2175 2185 for length in range(minlength, len(hexnode) + 1):
2176 2186 prefix = hexnode[:length]
2177 2187 if isvalid(prefix):
2178 2188 return prefix
2179 2189
2180 2190 for length in range(minlength, len(hexnode) + 1):
2181 2191 prefix = hexnode[:length]
2182 2192 if isvalid(prefix):
2183 2193 return disambiguate(hexnode, length)
2184 2194
2185 2195 def cmp(self, node, text):
2186 2196 """compare text with a given file revision
2187 2197
2188 2198 returns True if text is different than what is stored.
2189 2199 """
2190 2200 p1, p2 = self.parents(node)
2191 2201 return storageutil.hashrevisionsha1(text, p1, p2) != node
2192 2202
2193 2203 def _chunk(self, rev):
2194 2204 """Obtain a single decompressed chunk for a revision.
2195 2205
2196 2206 Accepts an integer revision and an optional already-open file handle
2197 2207 to be used for reading. If used, the seek position of the file will not
2198 2208 be preserved.
2199 2209
2200 2210 Returns a str holding uncompressed data for the requested revision.
2201 2211 """
2202 2212 compression_mode = self.index[rev][10]
2203 2213 data = self._inner.get_segment_for_revs(rev, rev)[1]
2204 2214 if compression_mode == COMP_MODE_PLAIN:
2205 2215 return data
2206 2216 elif compression_mode == COMP_MODE_DEFAULT:
2207 2217 return self._decompressor(data)
2208 2218 elif compression_mode == COMP_MODE_INLINE:
2209 2219 return self.decompress(data)
2210 2220 else:
2211 2221 msg = b'unknown compression mode %d'
2212 2222 msg %= compression_mode
2213 2223 raise error.RevlogError(msg)
2214 2224
2215 2225 def _chunks(self, revs, targetsize=None):
2216 2226 """Obtain decompressed chunks for the specified revisions.
2217 2227
2218 2228 Accepts an iterable of numeric revisions that are assumed to be in
2219 2229 ascending order. Also accepts an optional already-open file handle
2220 2230 to be used for reading. If used, the seek position of the file will
2221 2231 not be preserved.
2222 2232
2223 2233 This function is similar to calling ``self._chunk()`` multiple times,
2224 2234 but is faster.
2225 2235
2226 2236 Returns a list with decompressed data for each requested revision.
2227 2237 """
2228 2238 if not revs:
2229 2239 return []
2230 2240 start = self.start
2231 2241 length = self.length
2232 2242 inline = self._inline
2233 2243 iosize = self.index.entry_size
2234 2244 buffer = util.buffer
2235 2245
2236 2246 l = []
2237 2247 ladd = l.append
2238 2248
2239 2249 if not self.data_config.with_sparse_read:
2240 2250 slicedchunks = (revs,)
2241 2251 else:
2242 2252 slicedchunks = deltautil.slicechunk(
2243 2253 self, revs, targetsize=targetsize
2244 2254 )
2245 2255
2246 2256 for revschunk in slicedchunks:
2247 2257 firstrev = revschunk[0]
2248 2258 # Skip trailing revisions with empty diff
2249 2259 for lastrev in revschunk[::-1]:
2250 2260 if length(lastrev) != 0:
2251 2261 break
2252 2262
2253 2263 try:
2254 2264 offset, data = self._inner.get_segment_for_revs(
2255 2265 firstrev,
2256 2266 lastrev,
2257 2267 )
2258 2268 except OverflowError:
2259 2269 # issue4215 - we can't cache a run of chunks greater than
2260 2270 # 2G on Windows
2261 2271 return [self._chunk(rev) for rev in revschunk]
2262 2272
2263 2273 decomp = self.decompress
2264 2274 # self._decompressor might be None, but will not be used in that case
2265 2275 def_decomp = self._decompressor
2266 2276 for rev in revschunk:
2267 2277 chunkstart = start(rev)
2268 2278 if inline:
2269 2279 chunkstart += (rev + 1) * iosize
2270 2280 chunklength = length(rev)
2271 2281 comp_mode = self.index[rev][10]
2272 2282 c = buffer(data, chunkstart - offset, chunklength)
2273 2283 if comp_mode == COMP_MODE_PLAIN:
2274 2284 ladd(c)
2275 2285 elif comp_mode == COMP_MODE_INLINE:
2276 2286 ladd(decomp(c))
2277 2287 elif comp_mode == COMP_MODE_DEFAULT:
2278 2288 ladd(def_decomp(c))
2279 2289 else:
2280 2290 msg = b'unknown compression mode %d'
2281 2291 msg %= comp_mode
2282 2292 raise error.RevlogError(msg)
2283 2293
2284 2294 return l
2285 2295
2286 2296 def deltaparent(self, rev):
2287 2297 """return deltaparent of the given revision"""
2288 2298 base = self.index[rev][3]
2289 2299 if base == rev:
2290 2300 return nullrev
2291 2301 elif self.delta_config.general_delta:
2292 2302 return base
2293 2303 else:
2294 2304 return rev - 1
2295 2305
2296 2306 def issnapshot(self, rev):
2297 2307 """tells whether rev is a snapshot"""
2298 2308 if not self.delta_config.sparse_revlog:
2299 2309 return self.deltaparent(rev) == nullrev
2300 2310 elif hasattr(self.index, 'issnapshot'):
2301 2311 # directly assign the method to cache the testing and access
2302 2312 self.issnapshot = self.index.issnapshot
2303 2313 return self.issnapshot(rev)
2304 2314 if rev == nullrev:
2305 2315 return True
2306 2316 entry = self.index[rev]
2307 2317 base = entry[3]
2308 2318 if base == rev:
2309 2319 return True
2310 2320 if base == nullrev:
2311 2321 return True
2312 2322 p1 = entry[5]
2313 2323 while self.length(p1) == 0:
2314 2324 b = self.deltaparent(p1)
2315 2325 if b == p1:
2316 2326 break
2317 2327 p1 = b
2318 2328 p2 = entry[6]
2319 2329 while self.length(p2) == 0:
2320 2330 b = self.deltaparent(p2)
2321 2331 if b == p2:
2322 2332 break
2323 2333 p2 = b
2324 2334 if base == p1 or base == p2:
2325 2335 return False
2326 2336 return self.issnapshot(base)
2327 2337
2328 2338 def snapshotdepth(self, rev):
2329 2339 """number of snapshot in the chain before this one"""
2330 2340 if not self.issnapshot(rev):
2331 2341 raise error.ProgrammingError(b'revision %d not a snapshot')
2332 2342 return len(self._deltachain(rev)[0]) - 1
2333 2343
2334 2344 def revdiff(self, rev1, rev2):
2335 2345 """return or calculate a delta between two revisions
2336 2346
2337 2347 The delta calculated is in binary form and is intended to be written to
2338 2348 revlog data directly. So this function needs raw revision data.
2339 2349 """
2340 2350 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2341 2351 return bytes(self._chunk(rev2))
2342 2352
2343 2353 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2344 2354
2345 2355 def revision(self, nodeorrev):
2346 2356 """return an uncompressed revision of a given node or revision
2347 2357 number.
2348 2358 """
2349 2359 return self._revisiondata(nodeorrev)
2350 2360
2351 2361 def sidedata(self, nodeorrev):
2352 2362 """a map of extra data related to the changeset but not part of the hash
2353 2363
2354 2364 This function currently return a dictionary. However, more advanced
2355 2365 mapping object will likely be used in the future for a more
2356 2366 efficient/lazy code.
2357 2367 """
2358 2368 # deal with <nodeorrev> argument type
2359 2369 if isinstance(nodeorrev, int):
2360 2370 rev = nodeorrev
2361 2371 else:
2362 2372 rev = self.rev(nodeorrev)
2363 2373 return self._sidedata(rev)
2364 2374
2365 2375 def _revisiondata(self, nodeorrev, raw=False):
2366 2376 # deal with <nodeorrev> argument type
2367 2377 if isinstance(nodeorrev, int):
2368 2378 rev = nodeorrev
2369 2379 node = self.node(rev)
2370 2380 else:
2371 2381 node = nodeorrev
2372 2382 rev = None
2373 2383
2374 2384 # fast path the special `nullid` rev
2375 2385 if node == self.nullid:
2376 2386 return b""
2377 2387
2378 2388 # ``rawtext`` is the text as stored inside the revlog. Might be the
2379 2389 # revision or might need to be processed to retrieve the revision.
2380 2390 rev, rawtext, validated = self._rawtext(node, rev)
2381 2391
2382 2392 if raw and validated:
2383 2393 # if we don't want to process the raw text and that raw
2384 2394 # text is cached, we can exit early.
2385 2395 return rawtext
2386 2396 if rev is None:
2387 2397 rev = self.rev(node)
2388 2398 # the revlog's flag for this revision
2389 2399 # (usually alter its state or content)
2390 2400 flags = self.flags(rev)
2391 2401
2392 2402 if validated and flags == REVIDX_DEFAULT_FLAGS:
2393 2403 # no extra flags set, no flag processor runs, text = rawtext
2394 2404 return rawtext
2395 2405
2396 2406 if raw:
2397 2407 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2398 2408 text = rawtext
2399 2409 else:
2400 2410 r = flagutil.processflagsread(self, rawtext, flags)
2401 2411 text, validatehash = r
2402 2412 if validatehash:
2403 2413 self.checkhash(text, node, rev=rev)
2404 2414 if not validated:
2405 2415 self._revisioncache = (node, rev, rawtext)
2406 2416
2407 2417 return text
2408 2418
2409 2419 def _rawtext(self, node, rev):
2410 2420 """return the possibly unvalidated rawtext for a revision
2411 2421
2412 2422 returns (rev, rawtext, validated)
2413 2423 """
2414 2424
2415 2425 # revision in the cache (could be useful to apply delta)
2416 2426 cachedrev = None
2417 2427 # An intermediate text to apply deltas to
2418 2428 basetext = None
2419 2429
2420 2430 # Check if we have the entry in cache
2421 2431 # The cache entry looks like (node, rev, rawtext)
2422 2432 if self._revisioncache:
2423 2433 if self._revisioncache[0] == node:
2424 2434 return (rev, self._revisioncache[2], True)
2425 2435 cachedrev = self._revisioncache[1]
2426 2436
2427 2437 if rev is None:
2428 2438 rev = self.rev(node)
2429 2439
2430 2440 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2431 2441 if stopped:
2432 2442 basetext = self._revisioncache[2]
2433 2443
2434 2444 # drop cache to save memory, the caller is expected to
2435 2445 # update self._revisioncache after validating the text
2436 2446 self._revisioncache = None
2437 2447
2438 2448 targetsize = None
2439 2449 rawsize = self.index[rev][2]
2440 2450 if 0 <= rawsize:
2441 2451 targetsize = 4 * rawsize
2442 2452
2443 2453 bins = self._chunks(chain, targetsize=targetsize)
2444 2454 if basetext is None:
2445 2455 basetext = bytes(bins[0])
2446 2456 bins = bins[1:]
2447 2457
2448 2458 rawtext = mdiff.patches(basetext, bins)
2449 2459 del basetext # let us have a chance to free memory early
2450 2460 return (rev, rawtext, False)
2451 2461
2452 2462 def _sidedata(self, rev):
2453 2463 """Return the sidedata for a given revision number."""
2454 2464 index_entry = self.index[rev]
2455 2465 sidedata_offset = index_entry[8]
2456 2466 sidedata_size = index_entry[9]
2457 2467
2458 2468 if self._inline:
2459 2469 sidedata_offset += self.index.entry_size * (1 + rev)
2460 2470 if sidedata_size == 0:
2461 2471 return {}
2462 2472
2463 2473 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2464 2474 filename = self._sidedatafile
2465 2475 end = self._docket.sidedata_end
2466 2476 offset = sidedata_offset
2467 2477 length = sidedata_size
2468 2478 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2469 2479 raise error.RevlogError(m)
2470 2480
2471 2481 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2472 2482 sidedata_offset, sidedata_size
2473 2483 )
2474 2484
2475 2485 comp = self.index[rev][11]
2476 2486 if comp == COMP_MODE_PLAIN:
2477 2487 segment = comp_segment
2478 2488 elif comp == COMP_MODE_DEFAULT:
2479 2489 segment = self._decompressor(comp_segment)
2480 2490 elif comp == COMP_MODE_INLINE:
2481 2491 segment = self.decompress(comp_segment)
2482 2492 else:
2483 2493 msg = b'unknown compression mode %d'
2484 2494 msg %= comp
2485 2495 raise error.RevlogError(msg)
2486 2496
2487 2497 sidedata = sidedatautil.deserialize_sidedata(segment)
2488 2498 return sidedata
2489 2499
2490 2500 def rawdata(self, nodeorrev):
2491 2501 """return an uncompressed raw data of a given node or revision number."""
2492 2502 return self._revisiondata(nodeorrev, raw=True)
2493 2503
2494 2504 def hash(self, text, p1, p2):
2495 2505 """Compute a node hash.
2496 2506
2497 2507 Available as a function so that subclasses can replace the hash
2498 2508 as needed.
2499 2509 """
2500 2510 return storageutil.hashrevisionsha1(text, p1, p2)
2501 2511
2502 2512 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2503 2513 """Check node hash integrity.
2504 2514
2505 2515 Available as a function so that subclasses can extend hash mismatch
2506 2516 behaviors as needed.
2507 2517 """
2508 2518 try:
2509 2519 if p1 is None and p2 is None:
2510 2520 p1, p2 = self.parents(node)
2511 2521 if node != self.hash(text, p1, p2):
2512 2522 # Clear the revision cache on hash failure. The revision cache
2513 2523 # only stores the raw revision and clearing the cache does have
2514 2524 # the side-effect that we won't have a cache hit when the raw
2515 2525 # revision data is accessed. But this case should be rare and
2516 2526 # it is extra work to teach the cache about the hash
2517 2527 # verification state.
2518 2528 if self._revisioncache and self._revisioncache[0] == node:
2519 2529 self._revisioncache = None
2520 2530
2521 2531 revornode = rev
2522 2532 if revornode is None:
2523 2533 revornode = templatefilters.short(hex(node))
2524 2534 raise error.RevlogError(
2525 2535 _(b"integrity check failed on %s:%s")
2526 2536 % (self.display_id, pycompat.bytestr(revornode))
2527 2537 )
2528 2538 except error.RevlogError:
2529 2539 if self.feature_config.censorable and storageutil.iscensoredtext(
2530 2540 text
2531 2541 ):
2532 2542 raise error.CensoredNodeError(self.display_id, node, text)
2533 2543 raise
2534 2544
2535 2545 @property
2536 2546 def _split_index_file(self):
2537 2547 """the path where to expect the index of an ongoing splitting operation
2538 2548
2539 2549 The file will only exist if a splitting operation is in progress, but
2540 2550 it is always expected at the same location."""
2541 2551 parts = self.radix.split(b'/')
2542 2552 if len(parts) > 1:
2543 2553 # adds a '-s' prefix to the ``data/` or `meta/` base
2544 2554 head = parts[0] + b'-s'
2545 2555 mids = parts[1:-1]
2546 2556 tail = parts[-1] + b'.i'
2547 2557 pieces = [head] + mids + [tail]
2548 2558 return b'/'.join(pieces)
2549 2559 else:
2550 2560 # the revlog is stored at the root of the store (changelog or
2551 2561 # manifest), no risk of collision.
2552 2562 return self.radix + b'.i.s'
2553 2563
2554 2564 def _enforceinlinesize(self, tr, side_write=True):
2555 2565 """Check if the revlog is too big for inline and convert if so.
2556 2566
2557 2567 This should be called after revisions are added to the revlog. If the
2558 2568 revlog has grown too large to be an inline revlog, it will convert it
2559 2569 to use multiple index and data files.
2560 2570 """
2561 2571 tiprev = len(self) - 1
2562 2572 total_size = self.start(tiprev) + self.length(tiprev)
2563 2573 if not self._inline or total_size < _maxinline:
2564 2574 return
2565 2575
2566 2576 if self._docket is not None:
2567 2577 msg = b"inline revlog should not have a docket"
2568 2578 raise error.ProgrammingError(msg)
2569 2579
2570 2580 troffset = tr.findoffset(self._indexfile)
2571 2581 if troffset is None:
2572 2582 raise error.RevlogError(
2573 2583 _(b"%s not found in the transaction") % self._indexfile
2574 2584 )
2575 2585 if troffset:
2576 2586 tr.addbackup(self._indexfile, for_offset=True)
2577 2587 tr.add(self._datafile, 0)
2578 2588
2579 2589 existing_handles = False
2580 2590 if self._inner._writinghandles is not None:
2581 2591 existing_handles = True
2582 2592 fp = self._inner._writinghandles[0]
2583 2593 fp.flush()
2584 2594 fp.close()
2585 2595 # We can't use the cached file handle after close(). So prevent
2586 2596 # its usage.
2587 2597 self._inner._writinghandles = None
2588 2598 self._inner._segmentfile.writing_handle = None
2589 2599 # No need to deal with sidedata writing handle as it is only
2590 2600 # relevant with revlog-v2 which is never inline, not reaching
2591 2601 # this code
2592 2602 if side_write:
2593 2603 old_index_file_path = self._indexfile
2594 2604 new_index_file_path = self._split_index_file
2595 2605 opener = self.opener
2596 2606 weak_self = weakref.ref(self)
2597 2607
2598 2608 # the "split" index replace the real index when the transaction is finalized
2599 2609 def finalize_callback(tr):
2600 2610 opener.rename(
2601 2611 new_index_file_path,
2602 2612 old_index_file_path,
2603 2613 checkambig=True,
2604 2614 )
2605 2615 maybe_self = weak_self()
2606 2616 if maybe_self is not None:
2607 2617 maybe_self._indexfile = old_index_file_path
2608 2618 maybe_self._inner.index_file = maybe_self._indexfile
2609 2619
2610 2620 def abort_callback(tr):
2611 2621 maybe_self = weak_self()
2612 2622 if maybe_self is not None:
2613 2623 maybe_self._indexfile = old_index_file_path
2614 2624 maybe_self._inner.index_file = old_index_file_path
2615 2625
2616 2626 tr.registertmp(new_index_file_path)
2617 2627 if self.target[1] is not None:
2618 2628 callback_id = b'000-revlog-split-%d-%s' % self.target
2619 2629 else:
2620 2630 callback_id = b'000-revlog-split-%d' % self.target[0]
2621 2631 tr.addfinalize(callback_id, finalize_callback)
2622 2632 tr.addabort(callback_id, abort_callback)
2623 2633
2624 2634 new_dfh = self._datafp(b'w+')
2625 2635 new_dfh.truncate(0) # drop any potentially existing data
2626 2636 try:
2627 2637 with self.reading():
2628 2638 for r in self:
2629 2639 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2630 2640 new_dfh.flush()
2631 2641
2632 2642 if side_write:
2633 2643 self._indexfile = new_index_file_path
2634 2644 self._inner.index_file = self._indexfile
2635 2645 with self._inner._InnerRevlog__index_new_fp() as fp:
2636 2646 self._format_flags &= ~FLAG_INLINE_DATA
2637 2647 self._inline = False
2638 2648 self._inner.inline = False
2639 2649 for i in self:
2640 2650 e = self.index.entry_binary(i)
2641 2651 if i == 0:
2642 2652 header = self._format_flags | self._format_version
2643 2653 header = self.index.pack_header(header)
2644 2654 e = header + e
2645 2655 fp.write(e)
2646 2656
2647 2657 # If we don't use side-write, the temp file replace the real
2648 2658 # index when we exit the context manager
2649 2659
2650 2660 nodemaputil.setup_persistent_nodemap(tr, self)
2651 2661 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2652 2662 self.opener,
2653 2663 self._datafile,
2654 2664 self.data_config.chunk_cache_size,
2655 2665 )
2656 2666
2657 2667 if existing_handles:
2658 2668 # switched from inline to conventional reopen the index
2659 2669 index_end = None
2660 2670 ifh = self._inner._InnerRevlog__index_write_fp(
2661 2671 index_end=index_end
2662 2672 )
2663 2673 self._inner._writinghandles = (ifh, new_dfh, None)
2664 2674 self._inner._segmentfile.writing_handle = new_dfh
2665 2675 new_dfh = None
2666 2676 # No need to deal with sidedata writing handle as it is only
2667 2677 # relevant with revlog-v2 which is never inline, not reaching
2668 2678 # this code
2669 2679 finally:
2670 2680 if new_dfh is not None:
2671 2681 new_dfh.close()
2672 2682
2673 2683 def _nodeduplicatecallback(self, transaction, node):
2674 2684 """called when trying to add a node already stored."""
2675 2685
2676 2686 @contextlib.contextmanager
2677 2687 def reading(self):
2678 2688 with self._inner.reading():
2679 2689 yield
2680 2690
2681 2691 @contextlib.contextmanager
2682 2692 def _writing(self, transaction):
2683 2693 if self._trypending:
2684 2694 msg = b'try to write in a `trypending` revlog: %s'
2685 2695 msg %= self.display_id
2686 2696 raise error.ProgrammingError(msg)
2687 2697 if self._inner.is_writing:
2688 2698 yield
2689 2699 else:
2690 2700 data_end = None
2691 2701 sidedata_end = None
2692 2702 if self._docket is not None:
2693 2703 data_end = self._docket.data_end
2694 2704 sidedata_end = self._docket.sidedata_end
2695 2705 with self._inner.writing(
2696 2706 transaction,
2697 2707 data_end=data_end,
2698 2708 sidedata_end=sidedata_end,
2699 2709 ):
2700 2710 yield
2701 2711 if self._docket is not None:
2702 2712 self._write_docket(transaction)
2703 2713
2704 2714 def _write_docket(self, transaction):
2705 2715 """write the current docket on disk
2706 2716
2707 2717 Exist as a method to help changelog to implement transaction logic
2708 2718
2709 2719 We could also imagine using the same transaction logic for all revlog
2710 2720 since docket are cheap."""
2711 2721 self._docket.write(transaction)
2712 2722
2713 2723 def addrevision(
2714 2724 self,
2715 2725 text,
2716 2726 transaction,
2717 2727 link,
2718 2728 p1,
2719 2729 p2,
2720 2730 cachedelta=None,
2721 2731 node=None,
2722 2732 flags=REVIDX_DEFAULT_FLAGS,
2723 2733 deltacomputer=None,
2724 2734 sidedata=None,
2725 2735 ):
2726 2736 """add a revision to the log
2727 2737
2728 2738 text - the revision data to add
2729 2739 transaction - the transaction object used for rollback
2730 2740 link - the linkrev data to add
2731 2741 p1, p2 - the parent nodeids of the revision
2732 2742 cachedelta - an optional precomputed delta
2733 2743 node - nodeid of revision; typically node is not specified, and it is
2734 2744 computed by default as hash(text, p1, p2), however subclasses might
2735 2745 use different hashing method (and override checkhash() in such case)
2736 2746 flags - the known flags to set on the revision
2737 2747 deltacomputer - an optional deltacomputer instance shared between
2738 2748 multiple calls
2739 2749 """
2740 2750 if link == nullrev:
2741 2751 raise error.RevlogError(
2742 2752 _(b"attempted to add linkrev -1 to %s") % self.display_id
2743 2753 )
2744 2754
2745 2755 if sidedata is None:
2746 2756 sidedata = {}
2747 2757 elif sidedata and not self.feature_config.has_side_data:
2748 2758 raise error.ProgrammingError(
2749 2759 _(b"trying to add sidedata to a revlog who don't support them")
2750 2760 )
2751 2761
2752 2762 if flags:
2753 2763 node = node or self.hash(text, p1, p2)
2754 2764
2755 2765 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2756 2766
2757 2767 # If the flag processor modifies the revision data, ignore any provided
2758 2768 # cachedelta.
2759 2769 if rawtext != text:
2760 2770 cachedelta = None
2761 2771
2762 2772 if len(rawtext) > _maxentrysize:
2763 2773 raise error.RevlogError(
2764 2774 _(
2765 2775 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2766 2776 )
2767 2777 % (self.display_id, len(rawtext))
2768 2778 )
2769 2779
2770 2780 node = node or self.hash(rawtext, p1, p2)
2771 2781 rev = self.index.get_rev(node)
2772 2782 if rev is not None:
2773 2783 return rev
2774 2784
2775 2785 if validatehash:
2776 2786 self.checkhash(rawtext, node, p1=p1, p2=p2)
2777 2787
2778 2788 return self.addrawrevision(
2779 2789 rawtext,
2780 2790 transaction,
2781 2791 link,
2782 2792 p1,
2783 2793 p2,
2784 2794 node,
2785 2795 flags,
2786 2796 cachedelta=cachedelta,
2787 2797 deltacomputer=deltacomputer,
2788 2798 sidedata=sidedata,
2789 2799 )
2790 2800
2791 2801 def addrawrevision(
2792 2802 self,
2793 2803 rawtext,
2794 2804 transaction,
2795 2805 link,
2796 2806 p1,
2797 2807 p2,
2798 2808 node,
2799 2809 flags,
2800 2810 cachedelta=None,
2801 2811 deltacomputer=None,
2802 2812 sidedata=None,
2803 2813 ):
2804 2814 """add a raw revision with known flags, node and parents
2805 2815 useful when reusing a revision not stored in this revlog (ex: received
2806 2816 over wire, or read from an external bundle).
2807 2817 """
2808 2818 with self._writing(transaction):
2809 2819 return self._addrevision(
2810 2820 node,
2811 2821 rawtext,
2812 2822 transaction,
2813 2823 link,
2814 2824 p1,
2815 2825 p2,
2816 2826 flags,
2817 2827 cachedelta,
2818 2828 deltacomputer=deltacomputer,
2819 2829 sidedata=sidedata,
2820 2830 )
2821 2831
2822 2832 def compress(self, data):
2823 2833 """Generate a possibly-compressed representation of data."""
2824 2834 if not data:
2825 2835 return b'', data
2826 2836
2827 2837 compressed = self._compressor.compress(data)
2828 2838
2829 2839 if compressed:
2830 2840 # The revlog compressor added the header in the returned data.
2831 2841 return b'', compressed
2832 2842
2833 2843 if data[0:1] == b'\0':
2834 2844 return b'', data
2835 2845 return b'u', data
2836 2846
2837 2847 def decompress(self, data):
2838 2848 """Decompress a revlog chunk.
2839 2849
2840 2850 The chunk is expected to begin with a header identifying the
2841 2851 format type so it can be routed to an appropriate decompressor.
2842 2852 """
2843 2853 if not data:
2844 2854 return data
2845 2855
2846 2856 # Revlogs are read much more frequently than they are written and many
2847 2857 # chunks only take microseconds to decompress, so performance is
2848 2858 # important here.
2849 2859 #
2850 2860 # We can make a few assumptions about revlogs:
2851 2861 #
2852 2862 # 1) the majority of chunks will be compressed (as opposed to inline
2853 2863 # raw data).
2854 2864 # 2) decompressing *any* data will likely by at least 10x slower than
2855 2865 # returning raw inline data.
2856 2866 # 3) we want to prioritize common and officially supported compression
2857 2867 # engines
2858 2868 #
2859 2869 # It follows that we want to optimize for "decompress compressed data
2860 2870 # when encoded with common and officially supported compression engines"
2861 2871 # case over "raw data" and "data encoded by less common or non-official
2862 2872 # compression engines." That is why we have the inline lookup first
2863 2873 # followed by the compengines lookup.
2864 2874 #
2865 2875 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2866 2876 # compressed chunks. And this matters for changelog and manifest reads.
2867 2877 t = data[0:1]
2868 2878
2869 2879 if t == b'x':
2870 2880 try:
2871 2881 return _zlibdecompress(data)
2872 2882 except zlib.error as e:
2873 2883 raise error.RevlogError(
2874 2884 _(b'revlog decompress error: %s')
2875 2885 % stringutil.forcebytestr(e)
2876 2886 )
2877 2887 # '\0' is more common than 'u' so it goes first.
2878 2888 elif t == b'\0':
2879 2889 return data
2880 2890 elif t == b'u':
2881 2891 return util.buffer(data, 1)
2882 2892
2883 2893 compressor = self._get_decompressor(t)
2884 2894
2885 2895 return compressor.decompress(data)
2886 2896
2887 2897 def _addrevision(
2888 2898 self,
2889 2899 node,
2890 2900 rawtext,
2891 2901 transaction,
2892 2902 link,
2893 2903 p1,
2894 2904 p2,
2895 2905 flags,
2896 2906 cachedelta,
2897 2907 alwayscache=False,
2898 2908 deltacomputer=None,
2899 2909 sidedata=None,
2900 2910 ):
2901 2911 """internal function to add revisions to the log
2902 2912
2903 2913 see addrevision for argument descriptions.
2904 2914
2905 2915 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2906 2916
2907 2917 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2908 2918 be used.
2909 2919
2910 2920 invariants:
2911 2921 - rawtext is optional (can be None); if not set, cachedelta must be set.
2912 2922 if both are set, they must correspond to each other.
2913 2923 """
2914 2924 if node == self.nullid:
2915 2925 raise error.RevlogError(
2916 2926 _(b"%s: attempt to add null revision") % self.display_id
2917 2927 )
2918 2928 if (
2919 2929 node == self.nodeconstants.wdirid
2920 2930 or node in self.nodeconstants.wdirfilenodeids
2921 2931 ):
2922 2932 raise error.RevlogError(
2923 2933 _(b"%s: attempt to add wdir revision") % self.display_id
2924 2934 )
2925 2935 if self._inner._writinghandles is None:
2926 2936 msg = b'adding revision outside `revlog._writing` context'
2927 2937 raise error.ProgrammingError(msg)
2928 2938
2929 2939 btext = [rawtext]
2930 2940
2931 2941 curr = len(self)
2932 2942 prev = curr - 1
2933 2943
2934 2944 offset = self._get_data_offset(prev)
2935 2945
2936 2946 if self._concurrencychecker:
2937 2947 ifh, dfh, sdfh = self._inner._writinghandles
2938 2948 # XXX no checking for the sidedata file
2939 2949 if self._inline:
2940 2950 # offset is "as if" it were in the .d file, so we need to add on
2941 2951 # the size of the entry metadata.
2942 2952 self._concurrencychecker(
2943 2953 ifh, self._indexfile, offset + curr * self.index.entry_size
2944 2954 )
2945 2955 else:
2946 2956 # Entries in the .i are a consistent size.
2947 2957 self._concurrencychecker(
2948 2958 ifh, self._indexfile, curr * self.index.entry_size
2949 2959 )
2950 2960 self._concurrencychecker(dfh, self._datafile, offset)
2951 2961
2952 2962 p1r, p2r = self.rev(p1), self.rev(p2)
2953 2963
2954 2964 # full versions are inserted when the needed deltas
2955 2965 # become comparable to the uncompressed text
2956 2966 if rawtext is None:
2957 2967 # need rawtext size, before changed by flag processors, which is
2958 2968 # the non-raw size. use revlog explicitly to avoid filelog's extra
2959 2969 # logic that might remove metadata size.
2960 2970 textlen = mdiff.patchedsize(
2961 2971 revlog.size(self, cachedelta[0]), cachedelta[1]
2962 2972 )
2963 2973 else:
2964 2974 textlen = len(rawtext)
2965 2975
2966 2976 if deltacomputer is None:
2967 2977 write_debug = None
2968 2978 if self.delta_config.debug_delta:
2969 2979 write_debug = transaction._report
2970 2980 deltacomputer = deltautil.deltacomputer(
2971 2981 self, write_debug=write_debug
2972 2982 )
2973 2983
2974 2984 if cachedelta is not None and len(cachedelta) == 2:
2975 2985 # If the cached delta has no information about how it should be
2976 2986 # reused, add the default reuse instruction according to the
2977 2987 # revlog's configuration.
2978 2988 if (
2979 2989 self.delta_config.general_delta
2980 2990 and self.delta_config.lazy_delta_base
2981 2991 ):
2982 2992 delta_base_reuse = DELTA_BASE_REUSE_TRY
2983 2993 else:
2984 2994 delta_base_reuse = DELTA_BASE_REUSE_NO
2985 2995 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2986 2996
2987 2997 revinfo = revlogutils.revisioninfo(
2988 2998 node,
2989 2999 p1,
2990 3000 p2,
2991 3001 btext,
2992 3002 textlen,
2993 3003 cachedelta,
2994 3004 flags,
2995 3005 )
2996 3006
2997 3007 deltainfo = deltacomputer.finddeltainfo(revinfo)
2998 3008
2999 3009 compression_mode = COMP_MODE_INLINE
3000 3010 if self._docket is not None:
3001 3011 default_comp = self._docket.default_compression_header
3002 3012 r = deltautil.delta_compression(default_comp, deltainfo)
3003 3013 compression_mode, deltainfo = r
3004 3014
3005 3015 sidedata_compression_mode = COMP_MODE_INLINE
3006 3016 if sidedata and self.feature_config.has_side_data:
3007 3017 sidedata_compression_mode = COMP_MODE_PLAIN
3008 3018 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3009 3019 sidedata_offset = self._docket.sidedata_end
3010 3020 h, comp_sidedata = self.compress(serialized_sidedata)
3011 3021 if (
3012 3022 h != b'u'
3013 3023 and comp_sidedata[0:1] != b'\0'
3014 3024 and len(comp_sidedata) < len(serialized_sidedata)
3015 3025 ):
3016 3026 assert not h
3017 3027 if (
3018 3028 comp_sidedata[0:1]
3019 3029 == self._docket.default_compression_header
3020 3030 ):
3021 3031 sidedata_compression_mode = COMP_MODE_DEFAULT
3022 3032 serialized_sidedata = comp_sidedata
3023 3033 else:
3024 3034 sidedata_compression_mode = COMP_MODE_INLINE
3025 3035 serialized_sidedata = comp_sidedata
3026 3036 else:
3027 3037 serialized_sidedata = b""
3028 3038 # Don't store the offset if the sidedata is empty, that way
3029 3039 # we can easily detect empty sidedata and they will be no different
3030 3040 # than ones we manually add.
3031 3041 sidedata_offset = 0
3032 3042
3033 3043 rank = RANK_UNKNOWN
3034 3044 if self.feature_config.compute_rank:
3035 3045 if (p1r, p2r) == (nullrev, nullrev):
3036 3046 rank = 1
3037 3047 elif p1r != nullrev and p2r == nullrev:
3038 3048 rank = 1 + self.fast_rank(p1r)
3039 3049 elif p1r == nullrev and p2r != nullrev:
3040 3050 rank = 1 + self.fast_rank(p2r)
3041 3051 else: # merge node
3042 3052 if rustdagop is not None and self.index.rust_ext_compat:
3043 3053 rank = rustdagop.rank(self.index, p1r, p2r)
3044 3054 else:
3045 3055 pmin, pmax = sorted((p1r, p2r))
3046 3056 rank = 1 + self.fast_rank(pmax)
3047 3057 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3048 3058
3049 3059 e = revlogutils.entry(
3050 3060 flags=flags,
3051 3061 data_offset=offset,
3052 3062 data_compressed_length=deltainfo.deltalen,
3053 3063 data_uncompressed_length=textlen,
3054 3064 data_compression_mode=compression_mode,
3055 3065 data_delta_base=deltainfo.base,
3056 3066 link_rev=link,
3057 3067 parent_rev_1=p1r,
3058 3068 parent_rev_2=p2r,
3059 3069 node_id=node,
3060 3070 sidedata_offset=sidedata_offset,
3061 3071 sidedata_compressed_length=len(serialized_sidedata),
3062 3072 sidedata_compression_mode=sidedata_compression_mode,
3063 3073 rank=rank,
3064 3074 )
3065 3075
3066 3076 self.index.append(e)
3067 3077 entry = self.index.entry_binary(curr)
3068 3078 if curr == 0 and self._docket is None:
3069 3079 header = self._format_flags | self._format_version
3070 3080 header = self.index.pack_header(header)
3071 3081 entry = header + entry
3072 3082 self._writeentry(
3073 3083 transaction,
3074 3084 entry,
3075 3085 deltainfo.data,
3076 3086 link,
3077 3087 offset,
3078 3088 serialized_sidedata,
3079 3089 sidedata_offset,
3080 3090 )
3081 3091
3082 3092 rawtext = btext[0]
3083 3093
3084 3094 if alwayscache and rawtext is None:
3085 3095 rawtext = deltacomputer.buildtext(revinfo)
3086 3096
3087 3097 if type(rawtext) == bytes: # only accept immutable objects
3088 3098 self._revisioncache = (node, curr, rawtext)
3089 3099 self._chainbasecache[curr] = deltainfo.chainbase
3090 3100 return curr
3091 3101
3092 3102 def _get_data_offset(self, prev):
3093 3103 """Returns the current offset in the (in-transaction) data file.
3094 3104 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3095 3105 file to store that information: since sidedata can be rewritten to the
3096 3106 end of the data file within a transaction, you can have cases where, for
3097 3107 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3098 3108 to `n - 1`'s sidedata being written after `n`'s data.
3099 3109
3100 3110 TODO cache this in a docket file before getting out of experimental."""
3101 3111 if self._docket is None:
3102 3112 return self.end(prev)
3103 3113 else:
3104 3114 return self._docket.data_end
3105 3115
3106 3116 def _writeentry(
3107 3117 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3108 3118 ):
3109 3119 # Files opened in a+ mode have inconsistent behavior on various
3110 3120 # platforms. Windows requires that a file positioning call be made
3111 3121 # when the file handle transitions between reads and writes. See
3112 3122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3113 3123 # platforms, Python or the platform itself can be buggy. Some versions
3114 3124 # of Solaris have been observed to not append at the end of the file
3115 3125 # if the file was seeked to before the end. See issue4943 for more.
3116 3126 #
3117 3127 # We work around this issue by inserting a seek() before writing.
3118 3128 # Note: This is likely not necessary on Python 3. However, because
3119 3129 # the file handle is reused for reads and may be seeked there, we need
3120 3130 # to be careful before changing this.
3121 3131 if self._inner._writinghandles is None:
3122 3132 msg = b'adding revision outside `revlog._writing` context'
3123 3133 raise error.ProgrammingError(msg)
3124 3134 ifh, dfh, sdfh = self._inner._writinghandles
3125 3135 if self._docket is None:
3126 3136 ifh.seek(0, os.SEEK_END)
3127 3137 else:
3128 3138 ifh.seek(self._docket.index_end, os.SEEK_SET)
3129 3139 if dfh:
3130 3140 if self._docket is None:
3131 3141 dfh.seek(0, os.SEEK_END)
3132 3142 else:
3133 3143 dfh.seek(self._docket.data_end, os.SEEK_SET)
3134 3144 if sdfh:
3135 3145 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3136 3146
3137 3147 curr = len(self) - 1
3138 3148 if not self._inline:
3139 3149 transaction.add(self._datafile, offset)
3140 3150 if self._sidedatafile:
3141 3151 transaction.add(self._sidedatafile, sidedata_offset)
3142 3152 transaction.add(self._indexfile, curr * len(entry))
3143 3153 if data[0]:
3144 3154 dfh.write(data[0])
3145 3155 dfh.write(data[1])
3146 3156 if sidedata:
3147 3157 sdfh.write(sidedata)
3148 3158 ifh.write(entry)
3149 3159 else:
3150 3160 offset += curr * self.index.entry_size
3151 3161 transaction.add(self._indexfile, offset)
3152 3162 ifh.write(entry)
3153 3163 ifh.write(data[0])
3154 3164 ifh.write(data[1])
3155 3165 assert not sidedata
3156 3166 self._enforceinlinesize(transaction)
3157 3167 if self._docket is not None:
3158 3168 # revlog-v2 always has 3 writing handles, help Pytype
3159 3169 wh1 = self._inner._writinghandles[0]
3160 3170 wh2 = self._inner._writinghandles[1]
3161 3171 wh3 = self._inner._writinghandles[2]
3162 3172 assert wh1 is not None
3163 3173 assert wh2 is not None
3164 3174 assert wh3 is not None
3165 3175 self._docket.index_end = wh1.tell()
3166 3176 self._docket.data_end = wh2.tell()
3167 3177 self._docket.sidedata_end = wh3.tell()
3168 3178
3169 3179 nodemaputil.setup_persistent_nodemap(transaction, self)
3170 3180
3171 3181 def addgroup(
3172 3182 self,
3173 3183 deltas,
3174 3184 linkmapper,
3175 3185 transaction,
3176 3186 alwayscache=False,
3177 3187 addrevisioncb=None,
3178 3188 duplicaterevisioncb=None,
3179 3189 debug_info=None,
3180 3190 delta_base_reuse_policy=None,
3181 3191 ):
3182 3192 """
3183 3193 add a delta group
3184 3194
3185 3195 given a set of deltas, add them to the revision log. the
3186 3196 first delta is against its parent, which should be in our
3187 3197 log, the rest are against the previous delta.
3188 3198
3189 3199 If ``addrevisioncb`` is defined, it will be called with arguments of
3190 3200 this revlog and the node that was added.
3191 3201 """
3192 3202
3193 3203 if self._adding_group:
3194 3204 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3195 3205
3196 3206 # read the default delta-base reuse policy from revlog config if the
3197 3207 # group did not specify one.
3198 3208 if delta_base_reuse_policy is None:
3199 3209 if (
3200 3210 self.delta_config.general_delta
3201 3211 and self.delta_config.lazy_delta_base
3202 3212 ):
3203 3213 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3204 3214 else:
3205 3215 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3206 3216
3207 3217 self._adding_group = True
3208 3218 empty = True
3209 3219 try:
3210 3220 with self._writing(transaction):
3211 3221 write_debug = None
3212 3222 if self.delta_config.debug_delta:
3213 3223 write_debug = transaction._report
3214 3224 deltacomputer = deltautil.deltacomputer(
3215 3225 self,
3216 3226 write_debug=write_debug,
3217 3227 debug_info=debug_info,
3218 3228 )
3219 3229 # loop through our set of deltas
3220 3230 for data in deltas:
3221 3231 (
3222 3232 node,
3223 3233 p1,
3224 3234 p2,
3225 3235 linknode,
3226 3236 deltabase,
3227 3237 delta,
3228 3238 flags,
3229 3239 sidedata,
3230 3240 ) = data
3231 3241 link = linkmapper(linknode)
3232 3242 flags = flags or REVIDX_DEFAULT_FLAGS
3233 3243
3234 3244 rev = self.index.get_rev(node)
3235 3245 if rev is not None:
3236 3246 # this can happen if two branches make the same change
3237 3247 self._nodeduplicatecallback(transaction, rev)
3238 3248 if duplicaterevisioncb:
3239 3249 duplicaterevisioncb(self, rev)
3240 3250 empty = False
3241 3251 continue
3242 3252
3243 3253 for p in (p1, p2):
3244 3254 if not self.index.has_node(p):
3245 3255 raise error.LookupError(
3246 3256 p, self.radix, _(b'unknown parent')
3247 3257 )
3248 3258
3249 3259 if not self.index.has_node(deltabase):
3250 3260 raise error.LookupError(
3251 3261 deltabase, self.display_id, _(b'unknown delta base')
3252 3262 )
3253 3263
3254 3264 baserev = self.rev(deltabase)
3255 3265
3256 3266 if baserev != nullrev and self.iscensored(baserev):
3257 3267 # if base is censored, delta must be full replacement in a
3258 3268 # single patch operation
3259 3269 hlen = struct.calcsize(b">lll")
3260 3270 oldlen = self.rawsize(baserev)
3261 3271 newlen = len(delta) - hlen
3262 3272 if delta[:hlen] != mdiff.replacediffheader(
3263 3273 oldlen, newlen
3264 3274 ):
3265 3275 raise error.CensoredBaseError(
3266 3276 self.display_id, self.node(baserev)
3267 3277 )
3268 3278
3269 3279 if not flags and self._peek_iscensored(baserev, delta):
3270 3280 flags |= REVIDX_ISCENSORED
3271 3281
3272 3282 # We assume consumers of addrevisioncb will want to retrieve
3273 3283 # the added revision, which will require a call to
3274 3284 # revision(). revision() will fast path if there is a cache
3275 3285 # hit. So, we tell _addrevision() to always cache in this case.
3276 3286 # We're only using addgroup() in the context of changegroup
3277 3287 # generation so the revision data can always be handled as raw
3278 3288 # by the flagprocessor.
3279 3289 rev = self._addrevision(
3280 3290 node,
3281 3291 None,
3282 3292 transaction,
3283 3293 link,
3284 3294 p1,
3285 3295 p2,
3286 3296 flags,
3287 3297 (baserev, delta, delta_base_reuse_policy),
3288 3298 alwayscache=alwayscache,
3289 3299 deltacomputer=deltacomputer,
3290 3300 sidedata=sidedata,
3291 3301 )
3292 3302
3293 3303 if addrevisioncb:
3294 3304 addrevisioncb(self, rev)
3295 3305 empty = False
3296 3306 finally:
3297 3307 self._adding_group = False
3298 3308 return not empty
3299 3309
3300 3310 def iscensored(self, rev):
3301 3311 """Check if a file revision is censored."""
3302 3312 if not self.feature_config.censorable:
3303 3313 return False
3304 3314
3305 3315 return self.flags(rev) & REVIDX_ISCENSORED
3306 3316
3307 3317 def _peek_iscensored(self, baserev, delta):
3308 3318 """Quickly check if a delta produces a censored revision."""
3309 3319 if not self.feature_config.censorable:
3310 3320 return False
3311 3321
3312 3322 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3313 3323
3314 3324 def getstrippoint(self, minlink):
3315 3325 """find the minimum rev that must be stripped to strip the linkrev
3316 3326
3317 3327 Returns a tuple containing the minimum rev and a set of all revs that
3318 3328 have linkrevs that will be broken by this strip.
3319 3329 """
3320 3330 return storageutil.resolvestripinfo(
3321 3331 minlink,
3322 3332 len(self) - 1,
3323 3333 self.headrevs(),
3324 3334 self.linkrev,
3325 3335 self.parentrevs,
3326 3336 )
3327 3337
3328 3338 def strip(self, minlink, transaction):
3329 3339 """truncate the revlog on the first revision with a linkrev >= minlink
3330 3340
3331 3341 This function is called when we're stripping revision minlink and
3332 3342 its descendants from the repository.
3333 3343
3334 3344 We have to remove all revisions with linkrev >= minlink, because
3335 3345 the equivalent changelog revisions will be renumbered after the
3336 3346 strip.
3337 3347
3338 3348 So we truncate the revlog on the first of these revisions, and
3339 3349 trust that the caller has saved the revisions that shouldn't be
3340 3350 removed and that it'll re-add them after this truncation.
3341 3351 """
3342 3352 if len(self) == 0:
3343 3353 return
3344 3354
3345 3355 rev, _ = self.getstrippoint(minlink)
3346 3356 if rev == len(self):
3347 3357 return
3348 3358
3349 3359 # first truncate the files on disk
3350 3360 data_end = self.start(rev)
3351 3361 if not self._inline:
3352 3362 transaction.add(self._datafile, data_end)
3353 3363 end = rev * self.index.entry_size
3354 3364 else:
3355 3365 end = data_end + (rev * self.index.entry_size)
3356 3366
3357 3367 if self._sidedatafile:
3358 3368 sidedata_end = self.sidedata_cut_off(rev)
3359 3369 transaction.add(self._sidedatafile, sidedata_end)
3360 3370
3361 3371 transaction.add(self._indexfile, end)
3362 3372 if self._docket is not None:
3363 3373 # XXX we could, leverage the docket while stripping. However it is
3364 3374 # not powerfull enough at the time of this comment
3365 3375 self._docket.index_end = end
3366 3376 self._docket.data_end = data_end
3367 3377 self._docket.sidedata_end = sidedata_end
3368 3378 self._docket.write(transaction, stripping=True)
3369 3379
3370 3380 # then reset internal state in memory to forget those revisions
3371 3381 self._revisioncache = None
3372 3382 self._chaininfocache = util.lrucachedict(500)
3373 3383 self._inner._segmentfile.clear_cache()
3374 3384 self._inner._segmentfile_sidedata.clear_cache()
3375 3385
3376 3386 del self.index[rev:-1]
3377 3387
3378 3388 def checksize(self):
3379 3389 """Check size of index and data files
3380 3390
3381 3391 return a (dd, di) tuple.
3382 3392 - dd: extra bytes for the "data" file
3383 3393 - di: extra bytes for the "index" file
3384 3394
3385 3395 A healthy revlog will return (0, 0).
3386 3396 """
3387 3397 expected = 0
3388 3398 if len(self):
3389 3399 expected = max(0, self.end(len(self) - 1))
3390 3400
3391 3401 try:
3392 3402 with self._datafp() as f:
3393 3403 f.seek(0, io.SEEK_END)
3394 3404 actual = f.tell()
3395 3405 dd = actual - expected
3396 3406 except FileNotFoundError:
3397 3407 dd = 0
3398 3408
3399 3409 try:
3400 3410 f = self.opener(self._indexfile)
3401 3411 f.seek(0, io.SEEK_END)
3402 3412 actual = f.tell()
3403 3413 f.close()
3404 3414 s = self.index.entry_size
3405 3415 i = max(0, actual // s)
3406 3416 di = actual - (i * s)
3407 3417 if self._inline:
3408 3418 databytes = 0
3409 3419 for r in self:
3410 3420 databytes += max(0, self.length(r))
3411 3421 dd = 0
3412 3422 di = actual - len(self) * s - databytes
3413 3423 except FileNotFoundError:
3414 3424 di = 0
3415 3425
3416 3426 return (dd, di)
3417 3427
3418 3428 def files(self):
3419 3429 """return list of files that compose this revlog"""
3420 3430 res = [self._indexfile]
3421 3431 if self._docket_file is None:
3422 3432 if not self._inline:
3423 3433 res.append(self._datafile)
3424 3434 else:
3425 3435 res.append(self._docket_file)
3426 3436 res.extend(self._docket.old_index_filepaths(include_empty=False))
3427 3437 if self._docket.data_end:
3428 3438 res.append(self._datafile)
3429 3439 res.extend(self._docket.old_data_filepaths(include_empty=False))
3430 3440 if self._docket.sidedata_end:
3431 3441 res.append(self._sidedatafile)
3432 3442 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3433 3443 return res
3434 3444
3435 3445 def emitrevisions(
3436 3446 self,
3437 3447 nodes,
3438 3448 nodesorder=None,
3439 3449 revisiondata=False,
3440 3450 assumehaveparentrevisions=False,
3441 3451 deltamode=repository.CG_DELTAMODE_STD,
3442 3452 sidedata_helpers=None,
3443 3453 debug_info=None,
3444 3454 ):
3445 3455 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3446 3456 raise error.ProgrammingError(
3447 3457 b'unhandled value for nodesorder: %s' % nodesorder
3448 3458 )
3449 3459
3450 3460 if nodesorder is None and not self.delta_config.general_delta:
3451 3461 nodesorder = b'storage'
3452 3462
3453 3463 if (
3454 3464 not self._storedeltachains
3455 3465 and deltamode != repository.CG_DELTAMODE_PREV
3456 3466 ):
3457 3467 deltamode = repository.CG_DELTAMODE_FULL
3458 3468
3459 3469 return storageutil.emitrevisions(
3460 3470 self,
3461 3471 nodes,
3462 3472 nodesorder,
3463 3473 revlogrevisiondelta,
3464 3474 deltaparentfn=self.deltaparent,
3465 3475 candeltafn=self._candelta,
3466 3476 rawsizefn=self.rawsize,
3467 3477 revdifffn=self.revdiff,
3468 3478 flagsfn=self.flags,
3469 3479 deltamode=deltamode,
3470 3480 revisiondata=revisiondata,
3471 3481 assumehaveparentrevisions=assumehaveparentrevisions,
3472 3482 sidedata_helpers=sidedata_helpers,
3473 3483 debug_info=debug_info,
3474 3484 )
3475 3485
3476 3486 DELTAREUSEALWAYS = b'always'
3477 3487 DELTAREUSESAMEREVS = b'samerevs'
3478 3488 DELTAREUSENEVER = b'never'
3479 3489
3480 3490 DELTAREUSEFULLADD = b'fulladd'
3481 3491
3482 3492 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3483 3493
3484 3494 def clone(
3485 3495 self,
3486 3496 tr,
3487 3497 destrevlog,
3488 3498 addrevisioncb=None,
3489 3499 deltareuse=DELTAREUSESAMEREVS,
3490 3500 forcedeltabothparents=None,
3491 3501 sidedata_helpers=None,
3492 3502 ):
3493 3503 """Copy this revlog to another, possibly with format changes.
3494 3504
3495 3505 The destination revlog will contain the same revisions and nodes.
3496 3506 However, it may not be bit-for-bit identical due to e.g. delta encoding
3497 3507 differences.
3498 3508
3499 3509 The ``deltareuse`` argument control how deltas from the existing revlog
3500 3510 are preserved in the destination revlog. The argument can have the
3501 3511 following values:
3502 3512
3503 3513 DELTAREUSEALWAYS
3504 3514 Deltas will always be reused (if possible), even if the destination
3505 3515 revlog would not select the same revisions for the delta. This is the
3506 3516 fastest mode of operation.
3507 3517 DELTAREUSESAMEREVS
3508 3518 Deltas will be reused if the destination revlog would pick the same
3509 3519 revisions for the delta. This mode strikes a balance between speed
3510 3520 and optimization.
3511 3521 DELTAREUSENEVER
3512 3522 Deltas will never be reused. This is the slowest mode of execution.
3513 3523 This mode can be used to recompute deltas (e.g. if the diff/delta
3514 3524 algorithm changes).
3515 3525 DELTAREUSEFULLADD
3516 3526 Revision will be re-added as if their were new content. This is
3517 3527 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3518 3528 eg: large file detection and handling.
3519 3529
3520 3530 Delta computation can be slow, so the choice of delta reuse policy can
3521 3531 significantly affect run time.
3522 3532
3523 3533 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3524 3534 two extremes. Deltas will be reused if they are appropriate. But if the
3525 3535 delta could choose a better revision, it will do so. This means if you
3526 3536 are converting a non-generaldelta revlog to a generaldelta revlog,
3527 3537 deltas will be recomputed if the delta's parent isn't a parent of the
3528 3538 revision.
3529 3539
3530 3540 In addition to the delta policy, the ``forcedeltabothparents``
3531 3541 argument controls whether to force compute deltas against both parents
3532 3542 for merges. By default, the current default is used.
3533 3543
3534 3544 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3535 3545 `sidedata_helpers`.
3536 3546 """
3537 3547 if deltareuse not in self.DELTAREUSEALL:
3538 3548 raise ValueError(
3539 3549 _(b'value for deltareuse invalid: %s') % deltareuse
3540 3550 )
3541 3551
3542 3552 if len(destrevlog):
3543 3553 raise ValueError(_(b'destination revlog is not empty'))
3544 3554
3545 3555 if getattr(self, 'filteredrevs', None):
3546 3556 raise ValueError(_(b'source revlog has filtered revisions'))
3547 3557 if getattr(destrevlog, 'filteredrevs', None):
3548 3558 raise ValueError(_(b'destination revlog has filtered revisions'))
3549 3559
3550 3560 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3551 3561 # if possible.
3552 3562 old_delta_config = destrevlog.delta_config
3553 3563 destrevlog.delta_config = destrevlog.delta_config.copy()
3554 3564
3555 3565 try:
3556 3566 if deltareuse == self.DELTAREUSEALWAYS:
3557 3567 destrevlog.delta_config.lazy_delta_base = True
3558 3568 destrevlog.delta_config.lazy_delta = True
3559 3569 elif deltareuse == self.DELTAREUSESAMEREVS:
3560 3570 destrevlog.delta_config.lazy_delta_base = False
3561 3571 destrevlog.delta_config.lazy_delta = True
3562 3572 elif deltareuse == self.DELTAREUSENEVER:
3563 3573 destrevlog.delta_config.lazy_delta_base = False
3564 3574 destrevlog.delta_config.lazy_delta = False
3565 3575
3566 3576 delta_both_parents = (
3567 3577 forcedeltabothparents or old_delta_config.delta_both_parents
3568 3578 )
3569 3579 destrevlog.delta_config.delta_both_parents = delta_both_parents
3570 3580
3571 3581 with self.reading(), destrevlog._writing(tr):
3572 3582 self._clone(
3573 3583 tr,
3574 3584 destrevlog,
3575 3585 addrevisioncb,
3576 3586 deltareuse,
3577 3587 forcedeltabothparents,
3578 3588 sidedata_helpers,
3579 3589 )
3580 3590
3581 3591 finally:
3582 3592 destrevlog.delta_config = old_delta_config
3583 3593
3584 3594 def _clone(
3585 3595 self,
3586 3596 tr,
3587 3597 destrevlog,
3588 3598 addrevisioncb,
3589 3599 deltareuse,
3590 3600 forcedeltabothparents,
3591 3601 sidedata_helpers,
3592 3602 ):
3593 3603 """perform the core duty of `revlog.clone` after parameter processing"""
3594 3604 write_debug = None
3595 3605 if self.delta_config.debug_delta:
3596 3606 write_debug = tr._report
3597 3607 deltacomputer = deltautil.deltacomputer(
3598 3608 destrevlog,
3599 3609 write_debug=write_debug,
3600 3610 )
3601 3611 index = self.index
3602 3612 for rev in self:
3603 3613 entry = index[rev]
3604 3614
3605 3615 # Some classes override linkrev to take filtered revs into
3606 3616 # account. Use raw entry from index.
3607 3617 flags = entry[0] & 0xFFFF
3608 3618 linkrev = entry[4]
3609 3619 p1 = index[entry[5]][7]
3610 3620 p2 = index[entry[6]][7]
3611 3621 node = entry[7]
3612 3622
3613 3623 # (Possibly) reuse the delta from the revlog if allowed and
3614 3624 # the revlog chunk is a delta.
3615 3625 cachedelta = None
3616 3626 rawtext = None
3617 3627 if deltareuse == self.DELTAREUSEFULLADD:
3618 3628 text = self._revisiondata(rev)
3619 3629 sidedata = self.sidedata(rev)
3620 3630
3621 3631 if sidedata_helpers is not None:
3622 3632 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3623 3633 self, sidedata_helpers, sidedata, rev
3624 3634 )
3625 3635 flags = flags | new_flags[0] & ~new_flags[1]
3626 3636
3627 3637 destrevlog.addrevision(
3628 3638 text,
3629 3639 tr,
3630 3640 linkrev,
3631 3641 p1,
3632 3642 p2,
3633 3643 cachedelta=cachedelta,
3634 3644 node=node,
3635 3645 flags=flags,
3636 3646 deltacomputer=deltacomputer,
3637 3647 sidedata=sidedata,
3638 3648 )
3639 3649 else:
3640 3650 if destrevlog.delta_config.lazy_delta:
3641 3651 dp = self.deltaparent(rev)
3642 3652 if dp != nullrev:
3643 3653 cachedelta = (dp, bytes(self._chunk(rev)))
3644 3654
3645 3655 sidedata = None
3646 3656 if not cachedelta:
3647 3657 try:
3648 3658 rawtext = self._revisiondata(rev)
3649 3659 except error.CensoredNodeError as censored:
3650 3660 assert flags & REVIDX_ISCENSORED
3651 3661 rawtext = censored.tombstone
3652 3662 sidedata = self.sidedata(rev)
3653 3663 if sidedata is None:
3654 3664 sidedata = self.sidedata(rev)
3655 3665
3656 3666 if sidedata_helpers is not None:
3657 3667 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3658 3668 self, sidedata_helpers, sidedata, rev
3659 3669 )
3660 3670 flags = flags | new_flags[0] & ~new_flags[1]
3661 3671
3662 3672 destrevlog._addrevision(
3663 3673 node,
3664 3674 rawtext,
3665 3675 tr,
3666 3676 linkrev,
3667 3677 p1,
3668 3678 p2,
3669 3679 flags,
3670 3680 cachedelta,
3671 3681 deltacomputer=deltacomputer,
3672 3682 sidedata=sidedata,
3673 3683 )
3674 3684
3675 3685 if addrevisioncb:
3676 3686 addrevisioncb(self, rev, node)
3677 3687
3678 3688 def censorrevision(self, tr, censornode, tombstone=b''):
3679 3689 if self._format_version == REVLOGV0:
3680 3690 raise error.RevlogError(
3681 3691 _(b'cannot censor with version %d revlogs')
3682 3692 % self._format_version
3683 3693 )
3684 3694 elif self._format_version == REVLOGV1:
3685 3695 rewrite.v1_censor(self, tr, censornode, tombstone)
3686 3696 else:
3687 3697 rewrite.v2_censor(self, tr, censornode, tombstone)
3688 3698
3689 3699 def verifyintegrity(self, state):
3690 3700 """Verifies the integrity of the revlog.
3691 3701
3692 3702 Yields ``revlogproblem`` instances describing problems that are
3693 3703 found.
3694 3704 """
3695 3705 dd, di = self.checksize()
3696 3706 if dd:
3697 3707 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3698 3708 if di:
3699 3709 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3700 3710
3701 3711 version = self._format_version
3702 3712
3703 3713 # The verifier tells us what version revlog we should be.
3704 3714 if version != state[b'expectedversion']:
3705 3715 yield revlogproblem(
3706 3716 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3707 3717 % (self.display_id, version, state[b'expectedversion'])
3708 3718 )
3709 3719
3710 3720 state[b'skipread'] = set()
3711 3721 state[b'safe_renamed'] = set()
3712 3722
3713 3723 for rev in self:
3714 3724 node = self.node(rev)
3715 3725
3716 3726 # Verify contents. 4 cases to care about:
3717 3727 #
3718 3728 # common: the most common case
3719 3729 # rename: with a rename
3720 3730 # meta: file content starts with b'\1\n', the metadata
3721 3731 # header defined in filelog.py, but without a rename
3722 3732 # ext: content stored externally
3723 3733 #
3724 3734 # More formally, their differences are shown below:
3725 3735 #
3726 3736 # | common | rename | meta | ext
3727 3737 # -------------------------------------------------------
3728 3738 # flags() | 0 | 0 | 0 | not 0
3729 3739 # renamed() | False | True | False | ?
3730 3740 # rawtext[0:2]=='\1\n'| False | True | True | ?
3731 3741 #
3732 3742 # "rawtext" means the raw text stored in revlog data, which
3733 3743 # could be retrieved by "rawdata(rev)". "text"
3734 3744 # mentioned below is "revision(rev)".
3735 3745 #
3736 3746 # There are 3 different lengths stored physically:
3737 3747 # 1. L1: rawsize, stored in revlog index
3738 3748 # 2. L2: len(rawtext), stored in revlog data
3739 3749 # 3. L3: len(text), stored in revlog data if flags==0, or
3740 3750 # possibly somewhere else if flags!=0
3741 3751 #
3742 3752 # L1 should be equal to L2. L3 could be different from them.
3743 3753 # "text" may or may not affect commit hash depending on flag
3744 3754 # processors (see flagutil.addflagprocessor).
3745 3755 #
3746 3756 # | common | rename | meta | ext
3747 3757 # -------------------------------------------------
3748 3758 # rawsize() | L1 | L1 | L1 | L1
3749 3759 # size() | L1 | L2-LM | L1(*) | L1 (?)
3750 3760 # len(rawtext) | L2 | L2 | L2 | L2
3751 3761 # len(text) | L2 | L2 | L2 | L3
3752 3762 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3753 3763 #
3754 3764 # LM: length of metadata, depending on rawtext
3755 3765 # (*): not ideal, see comment in filelog.size
3756 3766 # (?): could be "- len(meta)" if the resolved content has
3757 3767 # rename metadata
3758 3768 #
3759 3769 # Checks needed to be done:
3760 3770 # 1. length check: L1 == L2, in all cases.
3761 3771 # 2. hash check: depending on flag processor, we may need to
3762 3772 # use either "text" (external), or "rawtext" (in revlog).
3763 3773
3764 3774 try:
3765 3775 skipflags = state.get(b'skipflags', 0)
3766 3776 if skipflags:
3767 3777 skipflags &= self.flags(rev)
3768 3778
3769 3779 _verify_revision(self, skipflags, state, node)
3770 3780
3771 3781 l1 = self.rawsize(rev)
3772 3782 l2 = len(self.rawdata(node))
3773 3783
3774 3784 if l1 != l2:
3775 3785 yield revlogproblem(
3776 3786 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3777 3787 node=node,
3778 3788 )
3779 3789
3780 3790 except error.CensoredNodeError:
3781 3791 if state[b'erroroncensored']:
3782 3792 yield revlogproblem(
3783 3793 error=_(b'censored file data'), node=node
3784 3794 )
3785 3795 state[b'skipread'].add(node)
3786 3796 except Exception as e:
3787 3797 yield revlogproblem(
3788 3798 error=_(b'unpacking %s: %s')
3789 3799 % (short(node), stringutil.forcebytestr(e)),
3790 3800 node=node,
3791 3801 )
3792 3802 state[b'skipread'].add(node)
3793 3803
3794 3804 def storageinfo(
3795 3805 self,
3796 3806 exclusivefiles=False,
3797 3807 sharedfiles=False,
3798 3808 revisionscount=False,
3799 3809 trackedsize=False,
3800 3810 storedsize=False,
3801 3811 ):
3802 3812 d = {}
3803 3813
3804 3814 if exclusivefiles:
3805 3815 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3806 3816 if not self._inline:
3807 3817 d[b'exclusivefiles'].append((self.opener, self._datafile))
3808 3818
3809 3819 if sharedfiles:
3810 3820 d[b'sharedfiles'] = []
3811 3821
3812 3822 if revisionscount:
3813 3823 d[b'revisionscount'] = len(self)
3814 3824
3815 3825 if trackedsize:
3816 3826 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3817 3827
3818 3828 if storedsize:
3819 3829 d[b'storedsize'] = sum(
3820 3830 self.opener.stat(path).st_size for path in self.files()
3821 3831 )
3822 3832
3823 3833 return d
3824 3834
3825 3835 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3826 3836 if not self.feature_config.has_side_data:
3827 3837 return
3828 3838 # revlog formats with sidedata support does not support inline
3829 3839 assert not self._inline
3830 3840 if not helpers[1] and not helpers[2]:
3831 3841 # Nothing to generate or remove
3832 3842 return
3833 3843
3834 3844 new_entries = []
3835 3845 # append the new sidedata
3836 3846 with self._writing(transaction):
3837 3847 ifh, dfh, sdfh = self._inner._writinghandles
3838 3848 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3839 3849
3840 3850 current_offset = sdfh.tell()
3841 3851 for rev in range(startrev, endrev + 1):
3842 3852 entry = self.index[rev]
3843 3853 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3844 3854 store=self,
3845 3855 sidedata_helpers=helpers,
3846 3856 sidedata={},
3847 3857 rev=rev,
3848 3858 )
3849 3859
3850 3860 serialized_sidedata = sidedatautil.serialize_sidedata(
3851 3861 new_sidedata
3852 3862 )
3853 3863
3854 3864 sidedata_compression_mode = COMP_MODE_INLINE
3855 3865 if serialized_sidedata and self.feature_config.has_side_data:
3856 3866 sidedata_compression_mode = COMP_MODE_PLAIN
3857 3867 h, comp_sidedata = self.compress(serialized_sidedata)
3858 3868 if (
3859 3869 h != b'u'
3860 3870 and comp_sidedata[0] != b'\0'
3861 3871 and len(comp_sidedata) < len(serialized_sidedata)
3862 3872 ):
3863 3873 assert not h
3864 3874 if (
3865 3875 comp_sidedata[0]
3866 3876 == self._docket.default_compression_header
3867 3877 ):
3868 3878 sidedata_compression_mode = COMP_MODE_DEFAULT
3869 3879 serialized_sidedata = comp_sidedata
3870 3880 else:
3871 3881 sidedata_compression_mode = COMP_MODE_INLINE
3872 3882 serialized_sidedata = comp_sidedata
3873 3883 if entry[8] != 0 or entry[9] != 0:
3874 3884 # rewriting entries that already have sidedata is not
3875 3885 # supported yet, because it introduces garbage data in the
3876 3886 # revlog.
3877 3887 msg = b"rewriting existing sidedata is not supported yet"
3878 3888 raise error.Abort(msg)
3879 3889
3880 3890 # Apply (potential) flags to add and to remove after running
3881 3891 # the sidedata helpers
3882 3892 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3883 3893 entry_update = (
3884 3894 current_offset,
3885 3895 len(serialized_sidedata),
3886 3896 new_offset_flags,
3887 3897 sidedata_compression_mode,
3888 3898 )
3889 3899
3890 3900 # the sidedata computation might have move the file cursors around
3891 3901 sdfh.seek(current_offset, os.SEEK_SET)
3892 3902 sdfh.write(serialized_sidedata)
3893 3903 new_entries.append(entry_update)
3894 3904 current_offset += len(serialized_sidedata)
3895 3905 self._docket.sidedata_end = sdfh.tell()
3896 3906
3897 3907 # rewrite the new index entries
3898 3908 ifh.seek(startrev * self.index.entry_size)
3899 3909 for i, e in enumerate(new_entries):
3900 3910 rev = startrev + i
3901 3911 self.index.replace_sidedata_info(rev, *e)
3902 3912 packed = self.index.entry_binary(rev)
3903 3913 if rev == 0 and self._docket is None:
3904 3914 header = self._format_flags | self._format_version
3905 3915 header = self.index.pack_header(header)
3906 3916 packed = header + packed
3907 3917 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now