##// END OF EJS Templates
revlog: move the splitting-inline-revlog logic inside the inner object...
marmoute -
r51983:de6a8cc2 default
parent child Browse files
Show More
@@ -1,3917 +1,3929 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 chunk_cache,
357 357 ):
358 358 self.opener = opener
359 359 self.index = index
360 360
361 361 self.__index_file = index_file
362 362 self.data_file = data_file
363 363 self.sidedata_file = sidedata_file
364 364 self.inline = inline
365 365 self.data_config = data_config
366 366
367 367 # index
368 368
369 369 # 3-tuple of file handles being used for active writing.
370 370 self._writinghandles = None
371 371
372 372 self._segmentfile = randomaccessfile.randomaccessfile(
373 373 self.opener,
374 374 (self.index_file if self.inline else self.data_file),
375 375 self.data_config.chunk_cache_size,
376 376 chunk_cache,
377 377 )
378 378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 379 self.opener,
380 380 self.sidedata_file,
381 381 self.data_config.chunk_cache_size,
382 382 )
383 383
384 384 @property
385 385 def index_file(self):
386 386 return self.__index_file
387 387
388 388 @index_file.setter
389 389 def index_file(self, new_index_file):
390 390 self.__index_file = new_index_file
391 391 if self.inline:
392 392 self._segmentfile.filename = new_index_file
393 393
394 394 # Derived from index values.
395 395
396 396 def start(self, rev):
397 397 """the offset of the data chunk for this revision"""
398 398 return int(self.index[rev][0] >> 16)
399 399
400 400 def length(self, rev):
401 401 """the length of the data chunk for this revision"""
402 402 return self.index[rev][1]
403 403
404 404 def end(self, rev):
405 405 """the end of the data chunk for this revision"""
406 406 return self.start(rev) + self.length(rev)
407 407
408 408 @contextlib.contextmanager
409 409 def reading(self):
410 410 """Context manager that keeps data and sidedata files open for reading"""
411 411 if len(self.index) == 0:
412 412 yield # nothing to be read
413 413 else:
414 414 with self._segmentfile.reading():
415 415 with self._segmentfile_sidedata.reading():
416 416 yield
417 417
418 418 @property
419 419 def is_writing(self):
420 420 """True is a writing context is open"""
421 421 return self._writinghandles is not None
422 422
423 423 @contextlib.contextmanager
424 424 def writing(self, transaction, data_end=None, sidedata_end=None):
425 425 """Open the revlog files for writing
426 426
427 427 Add content to a revlog should be done within such context.
428 428 """
429 429 if self.is_writing:
430 430 yield
431 431 else:
432 432 ifh = dfh = sdfh = None
433 433 try:
434 434 r = len(self.index)
435 435 # opening the data file.
436 436 dsize = 0
437 437 if r:
438 438 dsize = self.end(r - 1)
439 439 dfh = None
440 440 if not self.inline:
441 441 try:
442 442 dfh = self.opener(self.data_file, mode=b"r+")
443 443 if data_end is None:
444 444 dfh.seek(0, os.SEEK_END)
445 445 else:
446 446 dfh.seek(data_end, os.SEEK_SET)
447 447 except FileNotFoundError:
448 448 dfh = self.opener(self.data_file, mode=b"w+")
449 449 transaction.add(self.data_file, dsize)
450 450 if self.sidedata_file is not None:
451 451 assert sidedata_end is not None
452 452 # revlog-v2 does not inline, help Pytype
453 453 assert dfh is not None
454 454 try:
455 455 sdfh = self.opener(self.sidedata_file, mode=b"r+")
456 456 dfh.seek(sidedata_end, os.SEEK_SET)
457 457 except FileNotFoundError:
458 458 sdfh = self.opener(self.sidedata_file, mode=b"w+")
459 459 transaction.add(self.sidedata_file, sidedata_end)
460 460
461 461 # opening the index file.
462 462 isize = r * self.index.entry_size
463 463 ifh = self.__index_write_fp()
464 464 if self.inline:
465 465 transaction.add(self.index_file, dsize + isize)
466 466 else:
467 467 transaction.add(self.index_file, isize)
468 468 # exposing all file handle for writing.
469 469 self._writinghandles = (ifh, dfh, sdfh)
470 470 self._segmentfile.writing_handle = ifh if self.inline else dfh
471 471 self._segmentfile_sidedata.writing_handle = sdfh
472 472 yield
473 473 finally:
474 474 self._writinghandles = None
475 475 self._segmentfile.writing_handle = None
476 476 self._segmentfile_sidedata.writing_handle = None
477 477 if dfh is not None:
478 478 dfh.close()
479 479 if sdfh is not None:
480 480 sdfh.close()
481 481 # closing the index file last to avoid exposing referent to
482 482 # potential unflushed data content.
483 483 if ifh is not None:
484 484 ifh.close()
485 485
486 486 def __index_write_fp(self, index_end=None):
487 487 """internal method to open the index file for writing
488 488
489 489 You should not use this directly and use `_writing` instead
490 490 """
491 491 try:
492 492 f = self.opener(
493 493 self.index_file,
494 494 mode=b"r+",
495 495 checkambig=self.data_config.check_ambig,
496 496 )
497 497 if index_end is None:
498 498 f.seek(0, os.SEEK_END)
499 499 else:
500 500 f.seek(index_end, os.SEEK_SET)
501 501 return f
502 502 except FileNotFoundError:
503 503 return self.opener(
504 504 self.index_file,
505 505 mode=b"w+",
506 506 checkambig=self.data_config.check_ambig,
507 507 )
508 508
509 509 def __index_new_fp(self):
510 510 """internal method to create a new index file for writing
511 511
512 512 You should not use this unless you are upgrading from inline revlog
513 513 """
514 514 return self.opener(
515 515 self.index_file,
516 516 mode=b"w",
517 517 checkambig=self.data_config.check_ambig,
518 518 atomictemp=True,
519 519 )
520 520
521 def split_inline(self, tr, header, new_index_file_path=None):
522 """split the data of an inline revlog into an index and a data file"""
523 existing_handles = False
524 if self._writinghandles is not None:
525 existing_handles = True
526 fp = self._writinghandles[0]
527 fp.flush()
528 fp.close()
529 # We can't use the cached file handle after close(). So prevent
530 # its usage.
531 self._writinghandles = None
532 self._segmentfile.writing_handle = None
533 # No need to deal with sidedata writing handle as it is only
534 # relevant with revlog-v2 which is never inline, not reaching
535 # this code
536
537 new_dfh = self.opener(self.data_file, mode=b"w+")
538 new_dfh.truncate(0) # drop any potentially existing data
539 try:
540 with self.reading():
541 for r in range(len(self.index)):
542 new_dfh.write(self.get_segment_for_revs(r, r)[1])
543 new_dfh.flush()
544
545 if new_index_file_path is not None:
546 self.index_file = new_index_file_path
547 with self.__index_new_fp() as fp:
548 self.inline = False
549 for i in range(len(self.index)):
550 e = self.index.entry_binary(i)
551 if i == 0:
552 packed_header = self.index.pack_header(header)
553 e = packed_header + e
554 fp.write(e)
555
556 # If we don't use side-write, the temp file replace the real
557 # index when we exit the context manager
558
559 self._segmentfile = randomaccessfile.randomaccessfile(
560 self.opener,
561 self.data_file,
562 self.data_config.chunk_cache_size,
563 )
564
565 if existing_handles:
566 # switched from inline to conventional reopen the index
567 ifh = self.__index_write_fp()
568 self._writinghandles = (ifh, new_dfh, None)
569 self._segmentfile.writing_handle = new_dfh
570 new_dfh = None
571 # No need to deal with sidedata writing handle as it is only
572 # relevant with revlog-v2 which is never inline, not reaching
573 # this code
574 finally:
575 if new_dfh is not None:
576 new_dfh.close()
577 return self.index_file
578
521 579 def get_segment_for_revs(self, startrev, endrev):
522 580 """Obtain a segment of raw data corresponding to a range of revisions.
523 581
524 582 Accepts the start and end revisions and an optional already-open
525 583 file handle to be used for reading. If the file handle is read, its
526 584 seek position will not be preserved.
527 585
528 586 Requests for data may be satisfied by a cache.
529 587
530 588 Returns a 2-tuple of (offset, data) for the requested range of
531 589 revisions. Offset is the integer offset from the beginning of the
532 590 revlog and data is a str or buffer of the raw byte data.
533 591
534 592 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
535 593 to determine where each revision's data begins and ends.
536 594
537 595 API: we should consider making this a private part of the InnerRevlog
538 596 at some point.
539 597 """
540 598 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
541 599 # (functions are expensive).
542 600 index = self.index
543 601 istart = index[startrev]
544 602 start = int(istart[0] >> 16)
545 603 if startrev == endrev:
546 604 end = start + istart[1]
547 605 else:
548 606 iend = index[endrev]
549 607 end = int(iend[0] >> 16) + iend[1]
550 608
551 609 if self.inline:
552 610 start += (startrev + 1) * self.index.entry_size
553 611 end += (endrev + 1) * self.index.entry_size
554 612 length = end - start
555 613
556 614 return start, self._segmentfile.read_chunk(start, length)
557 615
558 616
559 617 class revlog:
560 618 """
561 619 the underlying revision storage object
562 620
563 621 A revlog consists of two parts, an index and the revision data.
564 622
565 623 The index is a file with a fixed record size containing
566 624 information on each revision, including its nodeid (hash), the
567 625 nodeids of its parents, the position and offset of its data within
568 626 the data file, and the revision it's based on. Finally, each entry
569 627 contains a linkrev entry that can serve as a pointer to external
570 628 data.
571 629
572 630 The revision data itself is a linear collection of data chunks.
573 631 Each chunk represents a revision and is usually represented as a
574 632 delta against the previous chunk. To bound lookup time, runs of
575 633 deltas are limited to about 2 times the length of the original
576 634 version data. This makes retrieval of a version proportional to
577 635 its size, or O(1) relative to the number of revisions.
578 636
579 637 Both pieces of the revlog are written to in an append-only
580 638 fashion, which means we never need to rewrite a file to insert or
581 639 remove data, and can use some simple techniques to avoid the need
582 640 for locking while reading.
583 641
584 642 If checkambig, indexfile is opened with checkambig=True at
585 643 writing, to avoid file stat ambiguity.
586 644
587 645 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 646 index will be mmapped rather than read if it is larger than the
589 647 configured threshold.
590 648
591 649 If censorable is True, the revlog can have censored revisions.
592 650
593 651 If `upperboundcomp` is not None, this is the expected maximal gain from
594 652 compression for the data content.
595 653
596 654 `concurrencychecker` is an optional function that receives 3 arguments: a
597 655 file handle, a filename, and an expected position. It should check whether
598 656 the current position in the file handle is valid, and log/warn/fail (by
599 657 raising).
600 658
601 659 See mercurial/revlogutils/contants.py for details about the content of an
602 660 index entry.
603 661 """
604 662
605 663 _flagserrorclass = error.RevlogError
606 664
607 665 @staticmethod
608 666 def is_inline_index(header_bytes):
609 667 """Determine if a revlog is inline from the initial bytes of the index"""
610 668 header = INDEX_HEADER.unpack(header_bytes)[0]
611 669
612 670 _format_flags = header & ~0xFFFF
613 671 _format_version = header & 0xFFFF
614 672
615 673 features = FEATURES_BY_VERSION[_format_version]
616 674 return features[b'inline'](_format_flags)
617 675
618 676 def __init__(
619 677 self,
620 678 opener,
621 679 target,
622 680 radix,
623 681 postfix=None, # only exist for `tmpcensored` now
624 682 checkambig=False,
625 683 mmaplargeindex=False,
626 684 censorable=False,
627 685 upperboundcomp=None,
628 686 persistentnodemap=False,
629 687 concurrencychecker=None,
630 688 trypending=False,
631 689 try_split=False,
632 690 canonical_parent_order=True,
633 691 ):
634 692 """
635 693 create a revlog object
636 694
637 695 opener is a function that abstracts the file opening operation
638 696 and can be used to implement COW semantics or the like.
639 697
640 698 `target`: a (KIND, ID) tuple that identify the content stored in
641 699 this revlog. It help the rest of the code to understand what the revlog
642 700 is about without having to resort to heuristic and index filename
643 701 analysis. Note: that this must be reliably be set by normal code, but
644 702 that test, debug, or performance measurement code might not set this to
645 703 accurate value.
646 704 """
647 705
648 706 self.radix = radix
649 707
650 708 self._docket_file = None
651 709 self._indexfile = None
652 710 self._datafile = None
653 711 self._sidedatafile = None
654 712 self._nodemap_file = None
655 713 self.postfix = postfix
656 714 self._trypending = trypending
657 715 self._try_split = try_split
658 716 self.opener = opener
659 717 if persistentnodemap:
660 718 self._nodemap_file = nodemaputil.get_nodemap_file(self)
661 719
662 720 assert target[0] in ALL_KINDS
663 721 assert len(target) == 2
664 722 self.target = target
665 723 if b'feature-config' in self.opener.options:
666 724 self.feature_config = self.opener.options[b'feature-config'].copy()
667 725 else:
668 726 self.feature_config = FeatureConfig()
669 727 self.feature_config.censorable = censorable
670 728 self.feature_config.canonical_parent_order = canonical_parent_order
671 729 if b'data-config' in self.opener.options:
672 730 self.data_config = self.opener.options[b'data-config'].copy()
673 731 else:
674 732 self.data_config = DataConfig()
675 733 self.data_config.check_ambig = checkambig
676 734 self.data_config.mmap_large_index = mmaplargeindex
677 735 if b'delta-config' in self.opener.options:
678 736 self.delta_config = self.opener.options[b'delta-config'].copy()
679 737 else:
680 738 self.delta_config = DeltaConfig()
681 739 self.delta_config.upper_bound_comp = upperboundcomp
682 740
683 741 # 3-tuple of (node, rev, text) for a raw revision.
684 742 self._revisioncache = None
685 743 # Maps rev to chain base rev.
686 744 self._chainbasecache = util.lrucachedict(100)
687 745
688 746 self.index = None
689 747 self._docket = None
690 748 self._nodemap_docket = None
691 749 # Mapping of partial identifiers to full nodes.
692 750 self._pcache = {}
693 751
694 752 # other optionnals features
695 753
696 754 # Make copy of flag processors so each revlog instance can support
697 755 # custom flags.
698 756 self._flagprocessors = dict(flagutil.flagprocessors)
699 757 # prevent nesting of addgroup
700 758 self._adding_group = None
701 759
702 760 chunk_cache = self._loadindex()
703 761 self._load_inner(chunk_cache)
704 762
705 763 self._concurrencychecker = concurrencychecker
706 764
707 765 @property
708 766 def _generaldelta(self):
709 767 """temporary compatibility proxy"""
710 768 util.nouideprecwarn(
711 769 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
712 770 )
713 771 return self.delta_config.general_delta
714 772
715 773 @property
716 774 def _checkambig(self):
717 775 """temporary compatibility proxy"""
718 776 util.nouideprecwarn(
719 777 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
720 778 )
721 779 return self.data_config.check_ambig
722 780
723 781 @property
724 782 def _mmaplargeindex(self):
725 783 """temporary compatibility proxy"""
726 784 util.nouideprecwarn(
727 785 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
728 786 )
729 787 return self.data_config.mmap_large_index
730 788
731 789 @property
732 790 def _censorable(self):
733 791 """temporary compatibility proxy"""
734 792 util.nouideprecwarn(
735 793 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
736 794 )
737 795 return self.feature_config.censorable
738 796
739 797 @property
740 798 def _chunkcachesize(self):
741 799 """temporary compatibility proxy"""
742 800 util.nouideprecwarn(
743 801 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
744 802 )
745 803 return self.data_config.chunk_cache_size
746 804
747 805 @property
748 806 def _maxchainlen(self):
749 807 """temporary compatibility proxy"""
750 808 util.nouideprecwarn(
751 809 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
752 810 )
753 811 return self.delta_config.max_chain_len
754 812
755 813 @property
756 814 def _deltabothparents(self):
757 815 """temporary compatibility proxy"""
758 816 util.nouideprecwarn(
759 817 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
760 818 )
761 819 return self.delta_config.delta_both_parents
762 820
763 821 @property
764 822 def _candidate_group_chunk_size(self):
765 823 """temporary compatibility proxy"""
766 824 util.nouideprecwarn(
767 825 b"use revlog.delta_config.candidate_group_chunk_size",
768 826 b"6.6",
769 827 stacklevel=2,
770 828 )
771 829 return self.delta_config.candidate_group_chunk_size
772 830
773 831 @property
774 832 def _debug_delta(self):
775 833 """temporary compatibility proxy"""
776 834 util.nouideprecwarn(
777 835 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
778 836 )
779 837 return self.delta_config.debug_delta
780 838
781 839 @property
782 840 def _compengine(self):
783 841 """temporary compatibility proxy"""
784 842 util.nouideprecwarn(
785 843 b"use revlog.feature_config.compression_engine",
786 844 b"6.6",
787 845 stacklevel=2,
788 846 )
789 847 return self.feature_config.compression_engine
790 848
791 849 @property
792 850 def upperboundcomp(self):
793 851 """temporary compatibility proxy"""
794 852 util.nouideprecwarn(
795 853 b"use revlog.delta_config.upper_bound_comp",
796 854 b"6.6",
797 855 stacklevel=2,
798 856 )
799 857 return self.delta_config.upper_bound_comp
800 858
801 859 @property
802 860 def _compengineopts(self):
803 861 """temporary compatibility proxy"""
804 862 util.nouideprecwarn(
805 863 b"use revlog.feature_config.compression_engine_options",
806 864 b"6.6",
807 865 stacklevel=2,
808 866 )
809 867 return self.feature_config.compression_engine_options
810 868
811 869 @property
812 870 def _maxdeltachainspan(self):
813 871 """temporary compatibility proxy"""
814 872 util.nouideprecwarn(
815 873 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
816 874 )
817 875 return self.delta_config.max_deltachain_span
818 876
819 877 @property
820 878 def _withsparseread(self):
821 879 """temporary compatibility proxy"""
822 880 util.nouideprecwarn(
823 881 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
824 882 )
825 883 return self.data_config.with_sparse_read
826 884
827 885 @property
828 886 def _sparserevlog(self):
829 887 """temporary compatibility proxy"""
830 888 util.nouideprecwarn(
831 889 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
832 890 )
833 891 return self.delta_config.sparse_revlog
834 892
835 893 @property
836 894 def hassidedata(self):
837 895 """temporary compatibility proxy"""
838 896 util.nouideprecwarn(
839 897 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
840 898 )
841 899 return self.feature_config.has_side_data
842 900
843 901 @property
844 902 def _srdensitythreshold(self):
845 903 """temporary compatibility proxy"""
846 904 util.nouideprecwarn(
847 905 b"use revlog.data_config.sr_density_threshold",
848 906 b"6.6",
849 907 stacklevel=2,
850 908 )
851 909 return self.data_config.sr_density_threshold
852 910
853 911 @property
854 912 def _srmingapsize(self):
855 913 """temporary compatibility proxy"""
856 914 util.nouideprecwarn(
857 915 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
858 916 )
859 917 return self.data_config.sr_min_gap_size
860 918
861 919 @property
862 920 def _compute_rank(self):
863 921 """temporary compatibility proxy"""
864 922 util.nouideprecwarn(
865 923 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
866 924 )
867 925 return self.feature_config.compute_rank
868 926
869 927 @property
870 928 def canonical_parent_order(self):
871 929 """temporary compatibility proxy"""
872 930 util.nouideprecwarn(
873 931 b"use revlog.feature_config.canonical_parent_order",
874 932 b"6.6",
875 933 stacklevel=2,
876 934 )
877 935 return self.feature_config.canonical_parent_order
878 936
879 937 @property
880 938 def _lazydelta(self):
881 939 """temporary compatibility proxy"""
882 940 util.nouideprecwarn(
883 941 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
884 942 )
885 943 return self.delta_config.lazy_delta
886 944
887 945 @property
888 946 def _lazydeltabase(self):
889 947 """temporary compatibility proxy"""
890 948 util.nouideprecwarn(
891 949 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
892 950 )
893 951 return self.delta_config.lazy_delta_base
894 952
895 953 def _init_opts(self):
896 954 """process options (from above/config) to setup associated default revlog mode
897 955
898 956 These values might be affected when actually reading on disk information.
899 957
900 958 The relevant values are returned for use in _loadindex().
901 959
902 960 * newversionflags:
903 961 version header to use if we need to create a new revlog
904 962
905 963 * mmapindexthreshold:
906 964 minimal index size for start to use mmap
907 965
908 966 * force_nodemap:
909 967 force the usage of a "development" version of the nodemap code
910 968 """
911 969 opts = self.opener.options
912 970
913 971 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
914 972 new_header = CHANGELOGV2
915 973 compute_rank = opts.get(b'changelogv2.compute-rank', True)
916 974 self.feature_config.compute_rank = compute_rank
917 975 elif b'revlogv2' in opts:
918 976 new_header = REVLOGV2
919 977 elif b'revlogv1' in opts:
920 978 new_header = REVLOGV1 | FLAG_INLINE_DATA
921 979 if b'generaldelta' in opts:
922 980 new_header |= FLAG_GENERALDELTA
923 981 elif b'revlogv0' in self.opener.options:
924 982 new_header = REVLOGV0
925 983 else:
926 984 new_header = REVLOG_DEFAULT_VERSION
927 985
928 986 mmapindexthreshold = None
929 987 if self.data_config.mmap_large_index:
930 988 mmapindexthreshold = self.data_config.mmap_index_threshold
931 989 if self.feature_config.enable_ellipsis:
932 990 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
933 991
934 992 # revlog v0 doesn't have flag processors
935 993 for flag, processor in opts.get(b'flagprocessors', {}).items():
936 994 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
937 995
938 996 chunk_cache_size = self.data_config.chunk_cache_size
939 997 if chunk_cache_size <= 0:
940 998 raise error.RevlogError(
941 999 _(b'revlog chunk cache size %r is not greater than 0')
942 1000 % chunk_cache_size
943 1001 )
944 1002 elif chunk_cache_size & (chunk_cache_size - 1):
945 1003 raise error.RevlogError(
946 1004 _(b'revlog chunk cache size %r is not a power of 2')
947 1005 % chunk_cache_size
948 1006 )
949 1007 force_nodemap = opts.get(b'devel-force-nodemap', False)
950 1008 return new_header, mmapindexthreshold, force_nodemap
951 1009
952 1010 def _get_data(self, filepath, mmap_threshold, size=None):
953 1011 """return a file content with or without mmap
954 1012
955 1013 If the file is missing return the empty string"""
956 1014 try:
957 1015 with self.opener(filepath) as fp:
958 1016 if mmap_threshold is not None:
959 1017 file_size = self.opener.fstat(fp).st_size
960 1018 if file_size >= mmap_threshold:
961 1019 if size is not None:
962 1020 # avoid potentiel mmap crash
963 1021 size = min(file_size, size)
964 1022 # TODO: should .close() to release resources without
965 1023 # relying on Python GC
966 1024 if size is None:
967 1025 return util.buffer(util.mmapread(fp))
968 1026 else:
969 1027 return util.buffer(util.mmapread(fp, size))
970 1028 if size is None:
971 1029 return fp.read()
972 1030 else:
973 1031 return fp.read(size)
974 1032 except FileNotFoundError:
975 1033 return b''
976 1034
977 1035 def get_streams(self, max_linkrev, force_inline=False):
978 1036 """return a list of streams that represent this revlog
979 1037
980 1038 This is used by stream-clone to do bytes to bytes copies of a repository.
981 1039
982 1040 This streams data for all revisions that refer to a changelog revision up
983 1041 to `max_linkrev`.
984 1042
985 1043 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
986 1044
987 1045 It returns is a list of three-tuple:
988 1046
989 1047 [
990 1048 (filename, bytes_stream, stream_size),
991 1049 …
992 1050 ]
993 1051 """
994 1052 n = len(self)
995 1053 index = self.index
996 1054 while n > 0:
997 1055 linkrev = index[n - 1][4]
998 1056 if linkrev < max_linkrev:
999 1057 break
1000 1058 # note: this loop will rarely go through multiple iterations, since
1001 1059 # it only traverses commits created during the current streaming
1002 1060 # pull operation.
1003 1061 #
1004 1062 # If this become a problem, using a binary search should cap the
1005 1063 # runtime of this.
1006 1064 n = n - 1
1007 1065 if n == 0:
1008 1066 # no data to send
1009 1067 return []
1010 1068 index_size = n * index.entry_size
1011 1069 data_size = self.end(n - 1)
1012 1070
1013 1071 # XXX we might have been split (or stripped) since the object
1014 1072 # initialization, We need to close this race too, but having a way to
1015 1073 # pre-open the file we feed to the revlog and never closing them before
1016 1074 # we are done streaming.
1017 1075
1018 1076 if self._inline:
1019 1077
1020 1078 def get_stream():
1021 1079 with self.opener(self._indexfile, mode=b"r") as fp:
1022 1080 yield None
1023 1081 size = index_size + data_size
1024 1082 if size <= 65536:
1025 1083 yield fp.read(size)
1026 1084 else:
1027 1085 yield from util.filechunkiter(fp, limit=size)
1028 1086
1029 1087 inline_stream = get_stream()
1030 1088 next(inline_stream)
1031 1089 return [
1032 1090 (self._indexfile, inline_stream, index_size + data_size),
1033 1091 ]
1034 1092 elif force_inline:
1035 1093
1036 1094 def get_stream():
1037 1095 with self.reading():
1038 1096 yield None
1039 1097
1040 1098 for rev in range(n):
1041 1099 idx = self.index.entry_binary(rev)
1042 1100 if rev == 0 and self._docket is None:
1043 1101 # re-inject the inline flag
1044 1102 header = self._format_flags
1045 1103 header |= self._format_version
1046 1104 header |= FLAG_INLINE_DATA
1047 1105 header = self.index.pack_header(header)
1048 1106 idx = header + idx
1049 1107 yield idx
1050 1108 yield self._inner.get_segment_for_revs(rev, rev)[1]
1051 1109
1052 1110 inline_stream = get_stream()
1053 1111 next(inline_stream)
1054 1112 return [
1055 1113 (self._indexfile, inline_stream, index_size + data_size),
1056 1114 ]
1057 1115 else:
1058 1116
1059 1117 def get_index_stream():
1060 1118 with self.opener(self._indexfile, mode=b"r") as fp:
1061 1119 yield None
1062 1120 if index_size <= 65536:
1063 1121 yield fp.read(index_size)
1064 1122 else:
1065 1123 yield from util.filechunkiter(fp, limit=index_size)
1066 1124
1067 1125 def get_data_stream():
1068 1126 with self._datafp() as fp:
1069 1127 yield None
1070 1128 if data_size <= 65536:
1071 1129 yield fp.read(data_size)
1072 1130 else:
1073 1131 yield from util.filechunkiter(fp, limit=data_size)
1074 1132
1075 1133 index_stream = get_index_stream()
1076 1134 next(index_stream)
1077 1135 data_stream = get_data_stream()
1078 1136 next(data_stream)
1079 1137 return [
1080 1138 (self._datafile, data_stream, data_size),
1081 1139 (self._indexfile, index_stream, index_size),
1082 1140 ]
1083 1141
1084 1142 def _loadindex(self, docket=None):
1085 1143
1086 1144 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1087 1145
1088 1146 if self.postfix is not None:
1089 1147 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1090 1148 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1091 1149 entry_point = b'%s.i.a' % self.radix
1092 1150 elif self._try_split and self.opener.exists(self._split_index_file):
1093 1151 entry_point = self._split_index_file
1094 1152 else:
1095 1153 entry_point = b'%s.i' % self.radix
1096 1154
1097 1155 if docket is not None:
1098 1156 self._docket = docket
1099 1157 self._docket_file = entry_point
1100 1158 else:
1101 1159 self._initempty = True
1102 1160 entry_data = self._get_data(entry_point, mmapindexthreshold)
1103 1161 if len(entry_data) > 0:
1104 1162 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1105 1163 self._initempty = False
1106 1164 else:
1107 1165 header = new_header
1108 1166
1109 1167 self._format_flags = header & ~0xFFFF
1110 1168 self._format_version = header & 0xFFFF
1111 1169
1112 1170 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1113 1171 if supported_flags is None:
1114 1172 msg = _(b'unknown version (%d) in revlog %s')
1115 1173 msg %= (self._format_version, self.display_id)
1116 1174 raise error.RevlogError(msg)
1117 1175 elif self._format_flags & ~supported_flags:
1118 1176 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1119 1177 display_flag = self._format_flags >> 16
1120 1178 msg %= (display_flag, self._format_version, self.display_id)
1121 1179 raise error.RevlogError(msg)
1122 1180
1123 1181 features = FEATURES_BY_VERSION[self._format_version]
1124 1182 self._inline = features[b'inline'](self._format_flags)
1125 1183 self.delta_config.general_delta = features[b'generaldelta'](
1126 1184 self._format_flags
1127 1185 )
1128 1186 self.feature_config.has_side_data = features[b'sidedata']
1129 1187
1130 1188 if not features[b'docket']:
1131 1189 self._indexfile = entry_point
1132 1190 index_data = entry_data
1133 1191 else:
1134 1192 self._docket_file = entry_point
1135 1193 if self._initempty:
1136 1194 self._docket = docketutil.default_docket(self, header)
1137 1195 else:
1138 1196 self._docket = docketutil.parse_docket(
1139 1197 self, entry_data, use_pending=self._trypending
1140 1198 )
1141 1199
1142 1200 if self._docket is not None:
1143 1201 self._indexfile = self._docket.index_filepath()
1144 1202 index_data = b''
1145 1203 index_size = self._docket.index_end
1146 1204 if index_size > 0:
1147 1205 index_data = self._get_data(
1148 1206 self._indexfile, mmapindexthreshold, size=index_size
1149 1207 )
1150 1208 if len(index_data) < index_size:
1151 1209 msg = _(b'too few index data for %s: got %d, expected %d')
1152 1210 msg %= (self.display_id, len(index_data), index_size)
1153 1211 raise error.RevlogError(msg)
1154 1212
1155 1213 self._inline = False
1156 1214 # generaldelta implied by version 2 revlogs.
1157 1215 self.delta_config.general_delta = True
1158 1216 # the logic for persistent nodemap will be dealt with within the
1159 1217 # main docket, so disable it for now.
1160 1218 self._nodemap_file = None
1161 1219
1162 1220 if self._docket is not None:
1163 1221 self._datafile = self._docket.data_filepath()
1164 1222 self._sidedatafile = self._docket.sidedata_filepath()
1165 1223 elif self.postfix is None:
1166 1224 self._datafile = b'%s.d' % self.radix
1167 1225 else:
1168 1226 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1169 1227
1170 1228 self.nodeconstants = sha1nodeconstants
1171 1229 self.nullid = self.nodeconstants.nullid
1172 1230
1173 1231 # sparse-revlog can't be on without general-delta (issue6056)
1174 1232 if not self.delta_config.general_delta:
1175 1233 self.delta_config.sparse_revlog = False
1176 1234
1177 1235 self._storedeltachains = True
1178 1236
1179 1237 devel_nodemap = (
1180 1238 self._nodemap_file
1181 1239 and force_nodemap
1182 1240 and parse_index_v1_nodemap is not None
1183 1241 )
1184 1242
1185 1243 use_rust_index = False
1186 1244 if rustrevlog is not None:
1187 1245 if self._nodemap_file is not None:
1188 1246 use_rust_index = True
1189 1247 else:
1190 1248 use_rust_index = self.opener.options.get(b'rust.index')
1191 1249
1192 1250 self._parse_index = parse_index_v1
1193 1251 if self._format_version == REVLOGV0:
1194 1252 self._parse_index = revlogv0.parse_index_v0
1195 1253 elif self._format_version == REVLOGV2:
1196 1254 self._parse_index = parse_index_v2
1197 1255 elif self._format_version == CHANGELOGV2:
1198 1256 self._parse_index = parse_index_cl_v2
1199 1257 elif devel_nodemap:
1200 1258 self._parse_index = parse_index_v1_nodemap
1201 1259 elif use_rust_index:
1202 1260 self._parse_index = parse_index_v1_mixed
1203 1261 try:
1204 1262 d = self._parse_index(index_data, self._inline)
1205 1263 index, chunkcache = d
1206 1264 use_nodemap = (
1207 1265 not self._inline
1208 1266 and self._nodemap_file is not None
1209 1267 and hasattr(index, 'update_nodemap_data')
1210 1268 )
1211 1269 if use_nodemap:
1212 1270 nodemap_data = nodemaputil.persisted_data(self)
1213 1271 if nodemap_data is not None:
1214 1272 docket = nodemap_data[0]
1215 1273 if (
1216 1274 len(d[0]) > docket.tip_rev
1217 1275 and d[0][docket.tip_rev][7] == docket.tip_node
1218 1276 ):
1219 1277 # no changelog tampering
1220 1278 self._nodemap_docket = docket
1221 1279 index.update_nodemap_data(*nodemap_data)
1222 1280 except (ValueError, IndexError):
1223 1281 raise error.RevlogError(
1224 1282 _(b"index %s is corrupted") % self.display_id
1225 1283 )
1226 1284 self.index = index
1227 1285 # revnum -> (chain-length, sum-delta-length)
1228 1286 self._chaininfocache = util.lrucachedict(500)
1229 1287 # revlog header -> revlog compressor
1230 1288 self._decompressors = {}
1231 1289
1232 1290 return chunkcache
1233 1291
1234 1292 def _load_inner(self, chunk_cache):
1235 1293 self._inner = _InnerRevlog(
1236 1294 opener=self.opener,
1237 1295 index=self.index,
1238 1296 index_file=self._indexfile,
1239 1297 data_file=self._datafile,
1240 1298 sidedata_file=self._sidedatafile,
1241 1299 inline=self._inline,
1242 1300 data_config=self.data_config,
1243 1301 chunk_cache=chunk_cache,
1244 1302 )
1245 1303
1246 1304 def get_revlog(self):
1247 1305 """simple function to mirror API of other not-really-revlog API"""
1248 1306 return self
1249 1307
1250 1308 @util.propertycache
1251 1309 def revlog_kind(self):
1252 1310 return self.target[0]
1253 1311
1254 1312 @util.propertycache
1255 1313 def display_id(self):
1256 1314 """The public facing "ID" of the revlog that we use in message"""
1257 1315 if self.revlog_kind == KIND_FILELOG:
1258 1316 # Reference the file without the "data/" prefix, so it is familiar
1259 1317 # to the user.
1260 1318 return self.target[1]
1261 1319 else:
1262 1320 return self.radix
1263 1321
1264 1322 def _get_decompressor(self, t):
1265 1323 try:
1266 1324 compressor = self._decompressors[t]
1267 1325 except KeyError:
1268 1326 try:
1269 1327 engine = util.compengines.forrevlogheader(t)
1270 1328 compressor = engine.revlogcompressor(
1271 1329 self.feature_config.compression_engine_options
1272 1330 )
1273 1331 self._decompressors[t] = compressor
1274 1332 except KeyError:
1275 1333 raise error.RevlogError(
1276 1334 _(b'unknown compression type %s') % binascii.hexlify(t)
1277 1335 )
1278 1336 return compressor
1279 1337
1280 1338 @util.propertycache
1281 1339 def _compressor(self):
1282 1340 engine = util.compengines[self.feature_config.compression_engine]
1283 1341 return engine.revlogcompressor(
1284 1342 self.feature_config.compression_engine_options
1285 1343 )
1286 1344
1287 1345 @util.propertycache
1288 1346 def _decompressor(self):
1289 1347 """the default decompressor"""
1290 1348 if self._docket is None:
1291 1349 return None
1292 1350 t = self._docket.default_compression_header
1293 1351 c = self._get_decompressor(t)
1294 1352 return c.decompress
1295 1353
1296 1354 def _datafp(self, mode=b'r'):
1297 1355 """file object for the revlog's data file"""
1298 1356 return self.opener(self._datafile, mode=mode)
1299 1357
1300 1358 def tiprev(self):
1301 1359 return len(self.index) - 1
1302 1360
1303 1361 def tip(self):
1304 1362 return self.node(self.tiprev())
1305 1363
1306 1364 def __contains__(self, rev):
1307 1365 return 0 <= rev < len(self)
1308 1366
1309 1367 def __len__(self):
1310 1368 return len(self.index)
1311 1369
1312 1370 def __iter__(self):
1313 1371 return iter(range(len(self)))
1314 1372
1315 1373 def revs(self, start=0, stop=None):
1316 1374 """iterate over all rev in this revlog (from start to stop)"""
1317 1375 return storageutil.iterrevs(len(self), start=start, stop=stop)
1318 1376
1319 1377 def hasnode(self, node):
1320 1378 try:
1321 1379 self.rev(node)
1322 1380 return True
1323 1381 except KeyError:
1324 1382 return False
1325 1383
1326 1384 def _candelta(self, baserev, rev):
1327 1385 """whether two revisions (baserev, rev) can be delta-ed or not"""
1328 1386 # Disable delta if either rev requires a content-changing flag
1329 1387 # processor (ex. LFS). This is because such flag processor can alter
1330 1388 # the rawtext content that the delta will be based on, and two clients
1331 1389 # could have a same revlog node with different flags (i.e. different
1332 1390 # rawtext contents) and the delta could be incompatible.
1333 1391 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1334 1392 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1335 1393 ):
1336 1394 return False
1337 1395 return True
1338 1396
1339 1397 def update_caches(self, transaction):
1340 1398 """update on disk cache
1341 1399
1342 1400 If a transaction is passed, the update may be delayed to transaction
1343 1401 commit."""
1344 1402 if self._nodemap_file is not None:
1345 1403 if transaction is None:
1346 1404 nodemaputil.update_persistent_nodemap(self)
1347 1405 else:
1348 1406 nodemaputil.setup_persistent_nodemap(transaction, self)
1349 1407
1350 1408 def clearcaches(self):
1351 1409 """Clear in-memory caches"""
1352 1410 self._revisioncache = None
1353 1411 self._chainbasecache.clear()
1354 1412 self._inner._segmentfile.clear_cache()
1355 1413 self._inner._segmentfile_sidedata.clear_cache()
1356 1414 self._pcache = {}
1357 1415 self._nodemap_docket = None
1358 1416 self.index.clearcaches()
1359 1417 # The python code is the one responsible for validating the docket, we
1360 1418 # end up having to refresh it here.
1361 1419 use_nodemap = (
1362 1420 not self._inline
1363 1421 and self._nodemap_file is not None
1364 1422 and hasattr(self.index, 'update_nodemap_data')
1365 1423 )
1366 1424 if use_nodemap:
1367 1425 nodemap_data = nodemaputil.persisted_data(self)
1368 1426 if nodemap_data is not None:
1369 1427 self._nodemap_docket = nodemap_data[0]
1370 1428 self.index.update_nodemap_data(*nodemap_data)
1371 1429
1372 1430 def rev(self, node):
1373 1431 """return the revision number associated with a <nodeid>"""
1374 1432 try:
1375 1433 return self.index.rev(node)
1376 1434 except TypeError:
1377 1435 raise
1378 1436 except error.RevlogError:
1379 1437 # parsers.c radix tree lookup failed
1380 1438 if (
1381 1439 node == self.nodeconstants.wdirid
1382 1440 or node in self.nodeconstants.wdirfilenodeids
1383 1441 ):
1384 1442 raise error.WdirUnsupported
1385 1443 raise error.LookupError(node, self.display_id, _(b'no node'))
1386 1444
1387 1445 # Accessors for index entries.
1388 1446
1389 1447 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1390 1448 # are flags.
1391 1449 def start(self, rev):
1392 1450 return int(self.index[rev][0] >> 16)
1393 1451
1394 1452 def sidedata_cut_off(self, rev):
1395 1453 sd_cut_off = self.index[rev][8]
1396 1454 if sd_cut_off != 0:
1397 1455 return sd_cut_off
1398 1456 # This is some annoying dance, because entries without sidedata
1399 1457 # currently use 0 as their ofsset. (instead of previous-offset +
1400 1458 # previous-size)
1401 1459 #
1402 1460 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1403 1461 # In the meantime, we need this.
1404 1462 while 0 <= rev:
1405 1463 e = self.index[rev]
1406 1464 if e[9] != 0:
1407 1465 return e[8] + e[9]
1408 1466 rev -= 1
1409 1467 return 0
1410 1468
1411 1469 def flags(self, rev):
1412 1470 return self.index[rev][0] & 0xFFFF
1413 1471
1414 1472 def length(self, rev):
1415 1473 return self.index[rev][1]
1416 1474
1417 1475 def sidedata_length(self, rev):
1418 1476 if not self.feature_config.has_side_data:
1419 1477 return 0
1420 1478 return self.index[rev][9]
1421 1479
1422 1480 def rawsize(self, rev):
1423 1481 """return the length of the uncompressed text for a given revision"""
1424 1482 l = self.index[rev][2]
1425 1483 if l >= 0:
1426 1484 return l
1427 1485
1428 1486 t = self.rawdata(rev)
1429 1487 return len(t)
1430 1488
1431 1489 def size(self, rev):
1432 1490 """length of non-raw text (processed by a "read" flag processor)"""
1433 1491 # fast path: if no "read" flag processor could change the content,
1434 1492 # size is rawsize. note: ELLIPSIS is known to not change the content.
1435 1493 flags = self.flags(rev)
1436 1494 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1437 1495 return self.rawsize(rev)
1438 1496
1439 1497 return len(self.revision(rev))
1440 1498
1441 1499 def fast_rank(self, rev):
1442 1500 """Return the rank of a revision if already known, or None otherwise.
1443 1501
1444 1502 The rank of a revision is the size of the sub-graph it defines as a
1445 1503 head. Equivalently, the rank of a revision `r` is the size of the set
1446 1504 `ancestors(r)`, `r` included.
1447 1505
1448 1506 This method returns the rank retrieved from the revlog in constant
1449 1507 time. It makes no attempt at computing unknown values for versions of
1450 1508 the revlog which do not persist the rank.
1451 1509 """
1452 1510 rank = self.index[rev][ENTRY_RANK]
1453 1511 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1454 1512 return None
1455 1513 if rev == nullrev:
1456 1514 return 0 # convention
1457 1515 return rank
1458 1516
1459 1517 def chainbase(self, rev):
1460 1518 base = self._chainbasecache.get(rev)
1461 1519 if base is not None:
1462 1520 return base
1463 1521
1464 1522 index = self.index
1465 1523 iterrev = rev
1466 1524 base = index[iterrev][3]
1467 1525 while base != iterrev:
1468 1526 iterrev = base
1469 1527 base = index[iterrev][3]
1470 1528
1471 1529 self._chainbasecache[rev] = base
1472 1530 return base
1473 1531
1474 1532 def linkrev(self, rev):
1475 1533 return self.index[rev][4]
1476 1534
1477 1535 def parentrevs(self, rev):
1478 1536 try:
1479 1537 entry = self.index[rev]
1480 1538 except IndexError:
1481 1539 if rev == wdirrev:
1482 1540 raise error.WdirUnsupported
1483 1541 raise
1484 1542
1485 1543 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1486 1544 return entry[6], entry[5]
1487 1545 else:
1488 1546 return entry[5], entry[6]
1489 1547
1490 1548 # fast parentrevs(rev) where rev isn't filtered
1491 1549 _uncheckedparentrevs = parentrevs
1492 1550
1493 1551 def node(self, rev):
1494 1552 try:
1495 1553 return self.index[rev][7]
1496 1554 except IndexError:
1497 1555 if rev == wdirrev:
1498 1556 raise error.WdirUnsupported
1499 1557 raise
1500 1558
1501 1559 # Derived from index values.
1502 1560
1503 1561 def end(self, rev):
1504 1562 return self.start(rev) + self.length(rev)
1505 1563
1506 1564 def parents(self, node):
1507 1565 i = self.index
1508 1566 d = i[self.rev(node)]
1509 1567 # inline node() to avoid function call overhead
1510 1568 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1511 1569 return i[d[6]][7], i[d[5]][7]
1512 1570 else:
1513 1571 return i[d[5]][7], i[d[6]][7]
1514 1572
1515 1573 def chainlen(self, rev):
1516 1574 return self._chaininfo(rev)[0]
1517 1575
1518 1576 def _chaininfo(self, rev):
1519 1577 chaininfocache = self._chaininfocache
1520 1578 if rev in chaininfocache:
1521 1579 return chaininfocache[rev]
1522 1580 index = self.index
1523 1581 generaldelta = self.delta_config.general_delta
1524 1582 iterrev = rev
1525 1583 e = index[iterrev]
1526 1584 clen = 0
1527 1585 compresseddeltalen = 0
1528 1586 while iterrev != e[3]:
1529 1587 clen += 1
1530 1588 compresseddeltalen += e[1]
1531 1589 if generaldelta:
1532 1590 iterrev = e[3]
1533 1591 else:
1534 1592 iterrev -= 1
1535 1593 if iterrev in chaininfocache:
1536 1594 t = chaininfocache[iterrev]
1537 1595 clen += t[0]
1538 1596 compresseddeltalen += t[1]
1539 1597 break
1540 1598 e = index[iterrev]
1541 1599 else:
1542 1600 # Add text length of base since decompressing that also takes
1543 1601 # work. For cache hits the length is already included.
1544 1602 compresseddeltalen += e[1]
1545 1603 r = (clen, compresseddeltalen)
1546 1604 chaininfocache[rev] = r
1547 1605 return r
1548 1606
1549 1607 def _deltachain(self, rev, stoprev=None):
1550 1608 """Obtain the delta chain for a revision.
1551 1609
1552 1610 ``stoprev`` specifies a revision to stop at. If not specified, we
1553 1611 stop at the base of the chain.
1554 1612
1555 1613 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1556 1614 revs in ascending order and ``stopped`` is a bool indicating whether
1557 1615 ``stoprev`` was hit.
1558 1616 """
1559 1617 generaldelta = self.delta_config.general_delta
1560 1618 # Try C implementation.
1561 1619 try:
1562 1620 return self.index.deltachain(rev, stoprev, generaldelta)
1563 1621 except AttributeError:
1564 1622 pass
1565 1623
1566 1624 chain = []
1567 1625
1568 1626 # Alias to prevent attribute lookup in tight loop.
1569 1627 index = self.index
1570 1628
1571 1629 iterrev = rev
1572 1630 e = index[iterrev]
1573 1631 while iterrev != e[3] and iterrev != stoprev:
1574 1632 chain.append(iterrev)
1575 1633 if generaldelta:
1576 1634 iterrev = e[3]
1577 1635 else:
1578 1636 iterrev -= 1
1579 1637 e = index[iterrev]
1580 1638
1581 1639 if iterrev == stoprev:
1582 1640 stopped = True
1583 1641 else:
1584 1642 chain.append(iterrev)
1585 1643 stopped = False
1586 1644
1587 1645 chain.reverse()
1588 1646 return chain, stopped
1589 1647
1590 1648 def ancestors(self, revs, stoprev=0, inclusive=False):
1591 1649 """Generate the ancestors of 'revs' in reverse revision order.
1592 1650 Does not generate revs lower than stoprev.
1593 1651
1594 1652 See the documentation for ancestor.lazyancestors for more details."""
1595 1653
1596 1654 # first, make sure start revisions aren't filtered
1597 1655 revs = list(revs)
1598 1656 checkrev = self.node
1599 1657 for r in revs:
1600 1658 checkrev(r)
1601 1659 # and we're sure ancestors aren't filtered as well
1602 1660
1603 1661 if rustancestor is not None and self.index.rust_ext_compat:
1604 1662 lazyancestors = rustancestor.LazyAncestors
1605 1663 arg = self.index
1606 1664 else:
1607 1665 lazyancestors = ancestor.lazyancestors
1608 1666 arg = self._uncheckedparentrevs
1609 1667 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1610 1668
1611 1669 def descendants(self, revs):
1612 1670 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1613 1671
1614 1672 def findcommonmissing(self, common=None, heads=None):
1615 1673 """Return a tuple of the ancestors of common and the ancestors of heads
1616 1674 that are not ancestors of common. In revset terminology, we return the
1617 1675 tuple:
1618 1676
1619 1677 ::common, (::heads) - (::common)
1620 1678
1621 1679 The list is sorted by revision number, meaning it is
1622 1680 topologically sorted.
1623 1681
1624 1682 'heads' and 'common' are both lists of node IDs. If heads is
1625 1683 not supplied, uses all of the revlog's heads. If common is not
1626 1684 supplied, uses nullid."""
1627 1685 if common is None:
1628 1686 common = [self.nullid]
1629 1687 if heads is None:
1630 1688 heads = self.heads()
1631 1689
1632 1690 common = [self.rev(n) for n in common]
1633 1691 heads = [self.rev(n) for n in heads]
1634 1692
1635 1693 # we want the ancestors, but inclusive
1636 1694 class lazyset:
1637 1695 def __init__(self, lazyvalues):
1638 1696 self.addedvalues = set()
1639 1697 self.lazyvalues = lazyvalues
1640 1698
1641 1699 def __contains__(self, value):
1642 1700 return value in self.addedvalues or value in self.lazyvalues
1643 1701
1644 1702 def __iter__(self):
1645 1703 added = self.addedvalues
1646 1704 for r in added:
1647 1705 yield r
1648 1706 for r in self.lazyvalues:
1649 1707 if not r in added:
1650 1708 yield r
1651 1709
1652 1710 def add(self, value):
1653 1711 self.addedvalues.add(value)
1654 1712
1655 1713 def update(self, values):
1656 1714 self.addedvalues.update(values)
1657 1715
1658 1716 has = lazyset(self.ancestors(common))
1659 1717 has.add(nullrev)
1660 1718 has.update(common)
1661 1719
1662 1720 # take all ancestors from heads that aren't in has
1663 1721 missing = set()
1664 1722 visit = collections.deque(r for r in heads if r not in has)
1665 1723 while visit:
1666 1724 r = visit.popleft()
1667 1725 if r in missing:
1668 1726 continue
1669 1727 else:
1670 1728 missing.add(r)
1671 1729 for p in self.parentrevs(r):
1672 1730 if p not in has:
1673 1731 visit.append(p)
1674 1732 missing = list(missing)
1675 1733 missing.sort()
1676 1734 return has, [self.node(miss) for miss in missing]
1677 1735
1678 1736 def incrementalmissingrevs(self, common=None):
1679 1737 """Return an object that can be used to incrementally compute the
1680 1738 revision numbers of the ancestors of arbitrary sets that are not
1681 1739 ancestors of common. This is an ancestor.incrementalmissingancestors
1682 1740 object.
1683 1741
1684 1742 'common' is a list of revision numbers. If common is not supplied, uses
1685 1743 nullrev.
1686 1744 """
1687 1745 if common is None:
1688 1746 common = [nullrev]
1689 1747
1690 1748 if rustancestor is not None and self.index.rust_ext_compat:
1691 1749 return rustancestor.MissingAncestors(self.index, common)
1692 1750 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1693 1751
1694 1752 def findmissingrevs(self, common=None, heads=None):
1695 1753 """Return the revision numbers of the ancestors of heads that
1696 1754 are not ancestors of common.
1697 1755
1698 1756 More specifically, return a list of revision numbers corresponding to
1699 1757 nodes N such that every N satisfies the following constraints:
1700 1758
1701 1759 1. N is an ancestor of some node in 'heads'
1702 1760 2. N is not an ancestor of any node in 'common'
1703 1761
1704 1762 The list is sorted by revision number, meaning it is
1705 1763 topologically sorted.
1706 1764
1707 1765 'heads' and 'common' are both lists of revision numbers. If heads is
1708 1766 not supplied, uses all of the revlog's heads. If common is not
1709 1767 supplied, uses nullid."""
1710 1768 if common is None:
1711 1769 common = [nullrev]
1712 1770 if heads is None:
1713 1771 heads = self.headrevs()
1714 1772
1715 1773 inc = self.incrementalmissingrevs(common=common)
1716 1774 return inc.missingancestors(heads)
1717 1775
1718 1776 def findmissing(self, common=None, heads=None):
1719 1777 """Return the ancestors of heads that are not ancestors of common.
1720 1778
1721 1779 More specifically, return a list of nodes N such that every N
1722 1780 satisfies the following constraints:
1723 1781
1724 1782 1. N is an ancestor of some node in 'heads'
1725 1783 2. N is not an ancestor of any node in 'common'
1726 1784
1727 1785 The list is sorted by revision number, meaning it is
1728 1786 topologically sorted.
1729 1787
1730 1788 'heads' and 'common' are both lists of node IDs. If heads is
1731 1789 not supplied, uses all of the revlog's heads. If common is not
1732 1790 supplied, uses nullid."""
1733 1791 if common is None:
1734 1792 common = [self.nullid]
1735 1793 if heads is None:
1736 1794 heads = self.heads()
1737 1795
1738 1796 common = [self.rev(n) for n in common]
1739 1797 heads = [self.rev(n) for n in heads]
1740 1798
1741 1799 inc = self.incrementalmissingrevs(common=common)
1742 1800 return [self.node(r) for r in inc.missingancestors(heads)]
1743 1801
1744 1802 def nodesbetween(self, roots=None, heads=None):
1745 1803 """Return a topological path from 'roots' to 'heads'.
1746 1804
1747 1805 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1748 1806 topologically sorted list of all nodes N that satisfy both of
1749 1807 these constraints:
1750 1808
1751 1809 1. N is a descendant of some node in 'roots'
1752 1810 2. N is an ancestor of some node in 'heads'
1753 1811
1754 1812 Every node is considered to be both a descendant and an ancestor
1755 1813 of itself, so every reachable node in 'roots' and 'heads' will be
1756 1814 included in 'nodes'.
1757 1815
1758 1816 'outroots' is the list of reachable nodes in 'roots', i.e., the
1759 1817 subset of 'roots' that is returned in 'nodes'. Likewise,
1760 1818 'outheads' is the subset of 'heads' that is also in 'nodes'.
1761 1819
1762 1820 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1763 1821 unspecified, uses nullid as the only root. If 'heads' is
1764 1822 unspecified, uses list of all of the revlog's heads."""
1765 1823 nonodes = ([], [], [])
1766 1824 if roots is not None:
1767 1825 roots = list(roots)
1768 1826 if not roots:
1769 1827 return nonodes
1770 1828 lowestrev = min([self.rev(n) for n in roots])
1771 1829 else:
1772 1830 roots = [self.nullid] # Everybody's a descendant of nullid
1773 1831 lowestrev = nullrev
1774 1832 if (lowestrev == nullrev) and (heads is None):
1775 1833 # We want _all_ the nodes!
1776 1834 return (
1777 1835 [self.node(r) for r in self],
1778 1836 [self.nullid],
1779 1837 list(self.heads()),
1780 1838 )
1781 1839 if heads is None:
1782 1840 # All nodes are ancestors, so the latest ancestor is the last
1783 1841 # node.
1784 1842 highestrev = len(self) - 1
1785 1843 # Set ancestors to None to signal that every node is an ancestor.
1786 1844 ancestors = None
1787 1845 # Set heads to an empty dictionary for later discovery of heads
1788 1846 heads = {}
1789 1847 else:
1790 1848 heads = list(heads)
1791 1849 if not heads:
1792 1850 return nonodes
1793 1851 ancestors = set()
1794 1852 # Turn heads into a dictionary so we can remove 'fake' heads.
1795 1853 # Also, later we will be using it to filter out the heads we can't
1796 1854 # find from roots.
1797 1855 heads = dict.fromkeys(heads, False)
1798 1856 # Start at the top and keep marking parents until we're done.
1799 1857 nodestotag = set(heads)
1800 1858 # Remember where the top was so we can use it as a limit later.
1801 1859 highestrev = max([self.rev(n) for n in nodestotag])
1802 1860 while nodestotag:
1803 1861 # grab a node to tag
1804 1862 n = nodestotag.pop()
1805 1863 # Never tag nullid
1806 1864 if n == self.nullid:
1807 1865 continue
1808 1866 # A node's revision number represents its place in a
1809 1867 # topologically sorted list of nodes.
1810 1868 r = self.rev(n)
1811 1869 if r >= lowestrev:
1812 1870 if n not in ancestors:
1813 1871 # If we are possibly a descendant of one of the roots
1814 1872 # and we haven't already been marked as an ancestor
1815 1873 ancestors.add(n) # Mark as ancestor
1816 1874 # Add non-nullid parents to list of nodes to tag.
1817 1875 nodestotag.update(
1818 1876 [p for p in self.parents(n) if p != self.nullid]
1819 1877 )
1820 1878 elif n in heads: # We've seen it before, is it a fake head?
1821 1879 # So it is, real heads should not be the ancestors of
1822 1880 # any other heads.
1823 1881 heads.pop(n)
1824 1882 if not ancestors:
1825 1883 return nonodes
1826 1884 # Now that we have our set of ancestors, we want to remove any
1827 1885 # roots that are not ancestors.
1828 1886
1829 1887 # If one of the roots was nullid, everything is included anyway.
1830 1888 if lowestrev > nullrev:
1831 1889 # But, since we weren't, let's recompute the lowest rev to not
1832 1890 # include roots that aren't ancestors.
1833 1891
1834 1892 # Filter out roots that aren't ancestors of heads
1835 1893 roots = [root for root in roots if root in ancestors]
1836 1894 # Recompute the lowest revision
1837 1895 if roots:
1838 1896 lowestrev = min([self.rev(root) for root in roots])
1839 1897 else:
1840 1898 # No more roots? Return empty list
1841 1899 return nonodes
1842 1900 else:
1843 1901 # We are descending from nullid, and don't need to care about
1844 1902 # any other roots.
1845 1903 lowestrev = nullrev
1846 1904 roots = [self.nullid]
1847 1905 # Transform our roots list into a set.
1848 1906 descendants = set(roots)
1849 1907 # Also, keep the original roots so we can filter out roots that aren't
1850 1908 # 'real' roots (i.e. are descended from other roots).
1851 1909 roots = descendants.copy()
1852 1910 # Our topologically sorted list of output nodes.
1853 1911 orderedout = []
1854 1912 # Don't start at nullid since we don't want nullid in our output list,
1855 1913 # and if nullid shows up in descendants, empty parents will look like
1856 1914 # they're descendants.
1857 1915 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1858 1916 n = self.node(r)
1859 1917 isdescendant = False
1860 1918 if lowestrev == nullrev: # Everybody is a descendant of nullid
1861 1919 isdescendant = True
1862 1920 elif n in descendants:
1863 1921 # n is already a descendant
1864 1922 isdescendant = True
1865 1923 # This check only needs to be done here because all the roots
1866 1924 # will start being marked is descendants before the loop.
1867 1925 if n in roots:
1868 1926 # If n was a root, check if it's a 'real' root.
1869 1927 p = tuple(self.parents(n))
1870 1928 # If any of its parents are descendants, it's not a root.
1871 1929 if (p[0] in descendants) or (p[1] in descendants):
1872 1930 roots.remove(n)
1873 1931 else:
1874 1932 p = tuple(self.parents(n))
1875 1933 # A node is a descendant if either of its parents are
1876 1934 # descendants. (We seeded the dependents list with the roots
1877 1935 # up there, remember?)
1878 1936 if (p[0] in descendants) or (p[1] in descendants):
1879 1937 descendants.add(n)
1880 1938 isdescendant = True
1881 1939 if isdescendant and ((ancestors is None) or (n in ancestors)):
1882 1940 # Only include nodes that are both descendants and ancestors.
1883 1941 orderedout.append(n)
1884 1942 if (ancestors is not None) and (n in heads):
1885 1943 # We're trying to figure out which heads are reachable
1886 1944 # from roots.
1887 1945 # Mark this head as having been reached
1888 1946 heads[n] = True
1889 1947 elif ancestors is None:
1890 1948 # Otherwise, we're trying to discover the heads.
1891 1949 # Assume this is a head because if it isn't, the next step
1892 1950 # will eventually remove it.
1893 1951 heads[n] = True
1894 1952 # But, obviously its parents aren't.
1895 1953 for p in self.parents(n):
1896 1954 heads.pop(p, None)
1897 1955 heads = [head for head, flag in heads.items() if flag]
1898 1956 roots = list(roots)
1899 1957 assert orderedout
1900 1958 assert roots
1901 1959 assert heads
1902 1960 return (orderedout, roots, heads)
1903 1961
1904 1962 def headrevs(self, revs=None):
1905 1963 if revs is None:
1906 1964 try:
1907 1965 return self.index.headrevs()
1908 1966 except AttributeError:
1909 1967 return self._headrevs()
1910 1968 if rustdagop is not None and self.index.rust_ext_compat:
1911 1969 return rustdagop.headrevs(self.index, revs)
1912 1970 return dagop.headrevs(revs, self._uncheckedparentrevs)
1913 1971
1914 1972 def computephases(self, roots):
1915 1973 return self.index.computephasesmapsets(roots)
1916 1974
1917 1975 def _headrevs(self):
1918 1976 count = len(self)
1919 1977 if not count:
1920 1978 return [nullrev]
1921 1979 # we won't iter over filtered rev so nobody is a head at start
1922 1980 ishead = [0] * (count + 1)
1923 1981 index = self.index
1924 1982 for r in self:
1925 1983 ishead[r] = 1 # I may be an head
1926 1984 e = index[r]
1927 1985 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1928 1986 return [r for r, val in enumerate(ishead) if val]
1929 1987
1930 1988 def heads(self, start=None, stop=None):
1931 1989 """return the list of all nodes that have no children
1932 1990
1933 1991 if start is specified, only heads that are descendants of
1934 1992 start will be returned
1935 1993 if stop is specified, it will consider all the revs from stop
1936 1994 as if they had no children
1937 1995 """
1938 1996 if start is None and stop is None:
1939 1997 if not len(self):
1940 1998 return [self.nullid]
1941 1999 return [self.node(r) for r in self.headrevs()]
1942 2000
1943 2001 if start is None:
1944 2002 start = nullrev
1945 2003 else:
1946 2004 start = self.rev(start)
1947 2005
1948 2006 stoprevs = {self.rev(n) for n in stop or []}
1949 2007
1950 2008 revs = dagop.headrevssubset(
1951 2009 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1952 2010 )
1953 2011
1954 2012 return [self.node(rev) for rev in revs]
1955 2013
1956 2014 def children(self, node):
1957 2015 """find the children of a given node"""
1958 2016 c = []
1959 2017 p = self.rev(node)
1960 2018 for r in self.revs(start=p + 1):
1961 2019 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1962 2020 if prevs:
1963 2021 for pr in prevs:
1964 2022 if pr == p:
1965 2023 c.append(self.node(r))
1966 2024 elif p == nullrev:
1967 2025 c.append(self.node(r))
1968 2026 return c
1969 2027
1970 2028 def commonancestorsheads(self, a, b):
1971 2029 """calculate all the heads of the common ancestors of nodes a and b"""
1972 2030 a, b = self.rev(a), self.rev(b)
1973 2031 ancs = self._commonancestorsheads(a, b)
1974 2032 return pycompat.maplist(self.node, ancs)
1975 2033
1976 2034 def _commonancestorsheads(self, *revs):
1977 2035 """calculate all the heads of the common ancestors of revs"""
1978 2036 try:
1979 2037 ancs = self.index.commonancestorsheads(*revs)
1980 2038 except (AttributeError, OverflowError): # C implementation failed
1981 2039 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1982 2040 return ancs
1983 2041
1984 2042 def isancestor(self, a, b):
1985 2043 """return True if node a is an ancestor of node b
1986 2044
1987 2045 A revision is considered an ancestor of itself."""
1988 2046 a, b = self.rev(a), self.rev(b)
1989 2047 return self.isancestorrev(a, b)
1990 2048
1991 2049 def isancestorrev(self, a, b):
1992 2050 """return True if revision a is an ancestor of revision b
1993 2051
1994 2052 A revision is considered an ancestor of itself.
1995 2053
1996 2054 The implementation of this is trivial but the use of
1997 2055 reachableroots is not."""
1998 2056 if a == nullrev:
1999 2057 return True
2000 2058 elif a == b:
2001 2059 return True
2002 2060 elif a > b:
2003 2061 return False
2004 2062 return bool(self.reachableroots(a, [b], [a], includepath=False))
2005 2063
2006 2064 def reachableroots(self, minroot, heads, roots, includepath=False):
2007 2065 """return (heads(::(<roots> and <roots>::<heads>)))
2008 2066
2009 2067 If includepath is True, return (<roots>::<heads>)."""
2010 2068 try:
2011 2069 return self.index.reachableroots2(
2012 2070 minroot, heads, roots, includepath
2013 2071 )
2014 2072 except AttributeError:
2015 2073 return dagop._reachablerootspure(
2016 2074 self.parentrevs, minroot, roots, heads, includepath
2017 2075 )
2018 2076
2019 2077 def ancestor(self, a, b):
2020 2078 """calculate the "best" common ancestor of nodes a and b"""
2021 2079
2022 2080 a, b = self.rev(a), self.rev(b)
2023 2081 try:
2024 2082 ancs = self.index.ancestors(a, b)
2025 2083 except (AttributeError, OverflowError):
2026 2084 ancs = ancestor.ancestors(self.parentrevs, a, b)
2027 2085 if ancs:
2028 2086 # choose a consistent winner when there's a tie
2029 2087 return min(map(self.node, ancs))
2030 2088 return self.nullid
2031 2089
2032 2090 def _match(self, id):
2033 2091 if isinstance(id, int):
2034 2092 # rev
2035 2093 return self.node(id)
2036 2094 if len(id) == self.nodeconstants.nodelen:
2037 2095 # possibly a binary node
2038 2096 # odds of a binary node being all hex in ASCII are 1 in 10**25
2039 2097 try:
2040 2098 node = id
2041 2099 self.rev(node) # quick search the index
2042 2100 return node
2043 2101 except error.LookupError:
2044 2102 pass # may be partial hex id
2045 2103 try:
2046 2104 # str(rev)
2047 2105 rev = int(id)
2048 2106 if b"%d" % rev != id:
2049 2107 raise ValueError
2050 2108 if rev < 0:
2051 2109 rev = len(self) + rev
2052 2110 if rev < 0 or rev >= len(self):
2053 2111 raise ValueError
2054 2112 return self.node(rev)
2055 2113 except (ValueError, OverflowError):
2056 2114 pass
2057 2115 if len(id) == 2 * self.nodeconstants.nodelen:
2058 2116 try:
2059 2117 # a full hex nodeid?
2060 2118 node = bin(id)
2061 2119 self.rev(node)
2062 2120 return node
2063 2121 except (binascii.Error, error.LookupError):
2064 2122 pass
2065 2123
2066 2124 def _partialmatch(self, id):
2067 2125 # we don't care wdirfilenodeids as they should be always full hash
2068 2126 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2069 2127 ambiguous = False
2070 2128 try:
2071 2129 partial = self.index.partialmatch(id)
2072 2130 if partial and self.hasnode(partial):
2073 2131 if maybewdir:
2074 2132 # single 'ff...' match in radix tree, ambiguous with wdir
2075 2133 ambiguous = True
2076 2134 else:
2077 2135 return partial
2078 2136 elif maybewdir:
2079 2137 # no 'ff...' match in radix tree, wdir identified
2080 2138 raise error.WdirUnsupported
2081 2139 else:
2082 2140 return None
2083 2141 except error.RevlogError:
2084 2142 # parsers.c radix tree lookup gave multiple matches
2085 2143 # fast path: for unfiltered changelog, radix tree is accurate
2086 2144 if not getattr(self, 'filteredrevs', None):
2087 2145 ambiguous = True
2088 2146 # fall through to slow path that filters hidden revisions
2089 2147 except (AttributeError, ValueError):
2090 2148 # we are pure python, or key is not hex
2091 2149 pass
2092 2150 if ambiguous:
2093 2151 raise error.AmbiguousPrefixLookupError(
2094 2152 id, self.display_id, _(b'ambiguous identifier')
2095 2153 )
2096 2154
2097 2155 if id in self._pcache:
2098 2156 return self._pcache[id]
2099 2157
2100 2158 if len(id) <= 40:
2101 2159 # hex(node)[:...]
2102 2160 l = len(id) // 2 * 2 # grab an even number of digits
2103 2161 try:
2104 2162 # we're dropping the last digit, so let's check that it's hex,
2105 2163 # to avoid the expensive computation below if it's not
2106 2164 if len(id) % 2 > 0:
2107 2165 if not (id[-1] in hexdigits):
2108 2166 return None
2109 2167 prefix = bin(id[:l])
2110 2168 except binascii.Error:
2111 2169 pass
2112 2170 else:
2113 2171 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2114 2172 nl = [
2115 2173 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2116 2174 ]
2117 2175 if self.nodeconstants.nullhex.startswith(id):
2118 2176 nl.append(self.nullid)
2119 2177 if len(nl) > 0:
2120 2178 if len(nl) == 1 and not maybewdir:
2121 2179 self._pcache[id] = nl[0]
2122 2180 return nl[0]
2123 2181 raise error.AmbiguousPrefixLookupError(
2124 2182 id, self.display_id, _(b'ambiguous identifier')
2125 2183 )
2126 2184 if maybewdir:
2127 2185 raise error.WdirUnsupported
2128 2186 return None
2129 2187
2130 2188 def lookup(self, id):
2131 2189 """locate a node based on:
2132 2190 - revision number or str(revision number)
2133 2191 - nodeid or subset of hex nodeid
2134 2192 """
2135 2193 n = self._match(id)
2136 2194 if n is not None:
2137 2195 return n
2138 2196 n = self._partialmatch(id)
2139 2197 if n:
2140 2198 return n
2141 2199
2142 2200 raise error.LookupError(id, self.display_id, _(b'no match found'))
2143 2201
2144 2202 def shortest(self, node, minlength=1):
2145 2203 """Find the shortest unambiguous prefix that matches node."""
2146 2204
2147 2205 def isvalid(prefix):
2148 2206 try:
2149 2207 matchednode = self._partialmatch(prefix)
2150 2208 except error.AmbiguousPrefixLookupError:
2151 2209 return False
2152 2210 except error.WdirUnsupported:
2153 2211 # single 'ff...' match
2154 2212 return True
2155 2213 if matchednode is None:
2156 2214 raise error.LookupError(node, self.display_id, _(b'no node'))
2157 2215 return True
2158 2216
2159 2217 def maybewdir(prefix):
2160 2218 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2161 2219
2162 2220 hexnode = hex(node)
2163 2221
2164 2222 def disambiguate(hexnode, minlength):
2165 2223 """Disambiguate against wdirid."""
2166 2224 for length in range(minlength, len(hexnode) + 1):
2167 2225 prefix = hexnode[:length]
2168 2226 if not maybewdir(prefix):
2169 2227 return prefix
2170 2228
2171 2229 if not getattr(self, 'filteredrevs', None):
2172 2230 try:
2173 2231 length = max(self.index.shortest(node), minlength)
2174 2232 return disambiguate(hexnode, length)
2175 2233 except error.RevlogError:
2176 2234 if node != self.nodeconstants.wdirid:
2177 2235 raise error.LookupError(
2178 2236 node, self.display_id, _(b'no node')
2179 2237 )
2180 2238 except AttributeError:
2181 2239 # Fall through to pure code
2182 2240 pass
2183 2241
2184 2242 if node == self.nodeconstants.wdirid:
2185 2243 for length in range(minlength, len(hexnode) + 1):
2186 2244 prefix = hexnode[:length]
2187 2245 if isvalid(prefix):
2188 2246 return prefix
2189 2247
2190 2248 for length in range(minlength, len(hexnode) + 1):
2191 2249 prefix = hexnode[:length]
2192 2250 if isvalid(prefix):
2193 2251 return disambiguate(hexnode, length)
2194 2252
2195 2253 def cmp(self, node, text):
2196 2254 """compare text with a given file revision
2197 2255
2198 2256 returns True if text is different than what is stored.
2199 2257 """
2200 2258 p1, p2 = self.parents(node)
2201 2259 return storageutil.hashrevisionsha1(text, p1, p2) != node
2202 2260
2203 2261 def _chunk(self, rev):
2204 2262 """Obtain a single decompressed chunk for a revision.
2205 2263
2206 2264 Accepts an integer revision and an optional already-open file handle
2207 2265 to be used for reading. If used, the seek position of the file will not
2208 2266 be preserved.
2209 2267
2210 2268 Returns a str holding uncompressed data for the requested revision.
2211 2269 """
2212 2270 compression_mode = self.index[rev][10]
2213 2271 data = self._inner.get_segment_for_revs(rev, rev)[1]
2214 2272 if compression_mode == COMP_MODE_PLAIN:
2215 2273 return data
2216 2274 elif compression_mode == COMP_MODE_DEFAULT:
2217 2275 return self._decompressor(data)
2218 2276 elif compression_mode == COMP_MODE_INLINE:
2219 2277 return self.decompress(data)
2220 2278 else:
2221 2279 msg = b'unknown compression mode %d'
2222 2280 msg %= compression_mode
2223 2281 raise error.RevlogError(msg)
2224 2282
2225 2283 def _chunks(self, revs, targetsize=None):
2226 2284 """Obtain decompressed chunks for the specified revisions.
2227 2285
2228 2286 Accepts an iterable of numeric revisions that are assumed to be in
2229 2287 ascending order. Also accepts an optional already-open file handle
2230 2288 to be used for reading. If used, the seek position of the file will
2231 2289 not be preserved.
2232 2290
2233 2291 This function is similar to calling ``self._chunk()`` multiple times,
2234 2292 but is faster.
2235 2293
2236 2294 Returns a list with decompressed data for each requested revision.
2237 2295 """
2238 2296 if not revs:
2239 2297 return []
2240 2298 start = self.start
2241 2299 length = self.length
2242 2300 inline = self._inline
2243 2301 iosize = self.index.entry_size
2244 2302 buffer = util.buffer
2245 2303
2246 2304 l = []
2247 2305 ladd = l.append
2248 2306
2249 2307 if not self.data_config.with_sparse_read:
2250 2308 slicedchunks = (revs,)
2251 2309 else:
2252 2310 slicedchunks = deltautil.slicechunk(
2253 2311 self, revs, targetsize=targetsize
2254 2312 )
2255 2313
2256 2314 for revschunk in slicedchunks:
2257 2315 firstrev = revschunk[0]
2258 2316 # Skip trailing revisions with empty diff
2259 2317 for lastrev in revschunk[::-1]:
2260 2318 if length(lastrev) != 0:
2261 2319 break
2262 2320
2263 2321 try:
2264 2322 offset, data = self._inner.get_segment_for_revs(
2265 2323 firstrev,
2266 2324 lastrev,
2267 2325 )
2268 2326 except OverflowError:
2269 2327 # issue4215 - we can't cache a run of chunks greater than
2270 2328 # 2G on Windows
2271 2329 return [self._chunk(rev) for rev in revschunk]
2272 2330
2273 2331 decomp = self.decompress
2274 2332 # self._decompressor might be None, but will not be used in that case
2275 2333 def_decomp = self._decompressor
2276 2334 for rev in revschunk:
2277 2335 chunkstart = start(rev)
2278 2336 if inline:
2279 2337 chunkstart += (rev + 1) * iosize
2280 2338 chunklength = length(rev)
2281 2339 comp_mode = self.index[rev][10]
2282 2340 c = buffer(data, chunkstart - offset, chunklength)
2283 2341 if comp_mode == COMP_MODE_PLAIN:
2284 2342 ladd(c)
2285 2343 elif comp_mode == COMP_MODE_INLINE:
2286 2344 ladd(decomp(c))
2287 2345 elif comp_mode == COMP_MODE_DEFAULT:
2288 2346 ladd(def_decomp(c))
2289 2347 else:
2290 2348 msg = b'unknown compression mode %d'
2291 2349 msg %= comp_mode
2292 2350 raise error.RevlogError(msg)
2293 2351
2294 2352 return l
2295 2353
2296 2354 def deltaparent(self, rev):
2297 2355 """return deltaparent of the given revision"""
2298 2356 base = self.index[rev][3]
2299 2357 if base == rev:
2300 2358 return nullrev
2301 2359 elif self.delta_config.general_delta:
2302 2360 return base
2303 2361 else:
2304 2362 return rev - 1
2305 2363
2306 2364 def issnapshot(self, rev):
2307 2365 """tells whether rev is a snapshot"""
2308 2366 if not self.delta_config.sparse_revlog:
2309 2367 return self.deltaparent(rev) == nullrev
2310 2368 elif hasattr(self.index, 'issnapshot'):
2311 2369 # directly assign the method to cache the testing and access
2312 2370 self.issnapshot = self.index.issnapshot
2313 2371 return self.issnapshot(rev)
2314 2372 if rev == nullrev:
2315 2373 return True
2316 2374 entry = self.index[rev]
2317 2375 base = entry[3]
2318 2376 if base == rev:
2319 2377 return True
2320 2378 if base == nullrev:
2321 2379 return True
2322 2380 p1 = entry[5]
2323 2381 while self.length(p1) == 0:
2324 2382 b = self.deltaparent(p1)
2325 2383 if b == p1:
2326 2384 break
2327 2385 p1 = b
2328 2386 p2 = entry[6]
2329 2387 while self.length(p2) == 0:
2330 2388 b = self.deltaparent(p2)
2331 2389 if b == p2:
2332 2390 break
2333 2391 p2 = b
2334 2392 if base == p1 or base == p2:
2335 2393 return False
2336 2394 return self.issnapshot(base)
2337 2395
2338 2396 def snapshotdepth(self, rev):
2339 2397 """number of snapshot in the chain before this one"""
2340 2398 if not self.issnapshot(rev):
2341 2399 raise error.ProgrammingError(b'revision %d not a snapshot')
2342 2400 return len(self._deltachain(rev)[0]) - 1
2343 2401
2344 2402 def revdiff(self, rev1, rev2):
2345 2403 """return or calculate a delta between two revisions
2346 2404
2347 2405 The delta calculated is in binary form and is intended to be written to
2348 2406 revlog data directly. So this function needs raw revision data.
2349 2407 """
2350 2408 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2351 2409 return bytes(self._chunk(rev2))
2352 2410
2353 2411 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2354 2412
2355 2413 def revision(self, nodeorrev):
2356 2414 """return an uncompressed revision of a given node or revision
2357 2415 number.
2358 2416 """
2359 2417 return self._revisiondata(nodeorrev)
2360 2418
2361 2419 def sidedata(self, nodeorrev):
2362 2420 """a map of extra data related to the changeset but not part of the hash
2363 2421
2364 2422 This function currently return a dictionary. However, more advanced
2365 2423 mapping object will likely be used in the future for a more
2366 2424 efficient/lazy code.
2367 2425 """
2368 2426 # deal with <nodeorrev> argument type
2369 2427 if isinstance(nodeorrev, int):
2370 2428 rev = nodeorrev
2371 2429 else:
2372 2430 rev = self.rev(nodeorrev)
2373 2431 return self._sidedata(rev)
2374 2432
2375 2433 def _revisiondata(self, nodeorrev, raw=False):
2376 2434 # deal with <nodeorrev> argument type
2377 2435 if isinstance(nodeorrev, int):
2378 2436 rev = nodeorrev
2379 2437 node = self.node(rev)
2380 2438 else:
2381 2439 node = nodeorrev
2382 2440 rev = None
2383 2441
2384 2442 # fast path the special `nullid` rev
2385 2443 if node == self.nullid:
2386 2444 return b""
2387 2445
2388 2446 # ``rawtext`` is the text as stored inside the revlog. Might be the
2389 2447 # revision or might need to be processed to retrieve the revision.
2390 2448 rev, rawtext, validated = self._rawtext(node, rev)
2391 2449
2392 2450 if raw and validated:
2393 2451 # if we don't want to process the raw text and that raw
2394 2452 # text is cached, we can exit early.
2395 2453 return rawtext
2396 2454 if rev is None:
2397 2455 rev = self.rev(node)
2398 2456 # the revlog's flag for this revision
2399 2457 # (usually alter its state or content)
2400 2458 flags = self.flags(rev)
2401 2459
2402 2460 if validated and flags == REVIDX_DEFAULT_FLAGS:
2403 2461 # no extra flags set, no flag processor runs, text = rawtext
2404 2462 return rawtext
2405 2463
2406 2464 if raw:
2407 2465 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2408 2466 text = rawtext
2409 2467 else:
2410 2468 r = flagutil.processflagsread(self, rawtext, flags)
2411 2469 text, validatehash = r
2412 2470 if validatehash:
2413 2471 self.checkhash(text, node, rev=rev)
2414 2472 if not validated:
2415 2473 self._revisioncache = (node, rev, rawtext)
2416 2474
2417 2475 return text
2418 2476
2419 2477 def _rawtext(self, node, rev):
2420 2478 """return the possibly unvalidated rawtext for a revision
2421 2479
2422 2480 returns (rev, rawtext, validated)
2423 2481 """
2424 2482
2425 2483 # revision in the cache (could be useful to apply delta)
2426 2484 cachedrev = None
2427 2485 # An intermediate text to apply deltas to
2428 2486 basetext = None
2429 2487
2430 2488 # Check if we have the entry in cache
2431 2489 # The cache entry looks like (node, rev, rawtext)
2432 2490 if self._revisioncache:
2433 2491 if self._revisioncache[0] == node:
2434 2492 return (rev, self._revisioncache[2], True)
2435 2493 cachedrev = self._revisioncache[1]
2436 2494
2437 2495 if rev is None:
2438 2496 rev = self.rev(node)
2439 2497
2440 2498 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2441 2499 if stopped:
2442 2500 basetext = self._revisioncache[2]
2443 2501
2444 2502 # drop cache to save memory, the caller is expected to
2445 2503 # update self._revisioncache after validating the text
2446 2504 self._revisioncache = None
2447 2505
2448 2506 targetsize = None
2449 2507 rawsize = self.index[rev][2]
2450 2508 if 0 <= rawsize:
2451 2509 targetsize = 4 * rawsize
2452 2510
2453 2511 bins = self._chunks(chain, targetsize=targetsize)
2454 2512 if basetext is None:
2455 2513 basetext = bytes(bins[0])
2456 2514 bins = bins[1:]
2457 2515
2458 2516 rawtext = mdiff.patches(basetext, bins)
2459 2517 del basetext # let us have a chance to free memory early
2460 2518 return (rev, rawtext, False)
2461 2519
2462 2520 def _sidedata(self, rev):
2463 2521 """Return the sidedata for a given revision number."""
2464 2522 index_entry = self.index[rev]
2465 2523 sidedata_offset = index_entry[8]
2466 2524 sidedata_size = index_entry[9]
2467 2525
2468 2526 if self._inline:
2469 2527 sidedata_offset += self.index.entry_size * (1 + rev)
2470 2528 if sidedata_size == 0:
2471 2529 return {}
2472 2530
2473 2531 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2474 2532 filename = self._sidedatafile
2475 2533 end = self._docket.sidedata_end
2476 2534 offset = sidedata_offset
2477 2535 length = sidedata_size
2478 2536 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2479 2537 raise error.RevlogError(m)
2480 2538
2481 2539 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2482 2540 sidedata_offset, sidedata_size
2483 2541 )
2484 2542
2485 2543 comp = self.index[rev][11]
2486 2544 if comp == COMP_MODE_PLAIN:
2487 2545 segment = comp_segment
2488 2546 elif comp == COMP_MODE_DEFAULT:
2489 2547 segment = self._decompressor(comp_segment)
2490 2548 elif comp == COMP_MODE_INLINE:
2491 2549 segment = self.decompress(comp_segment)
2492 2550 else:
2493 2551 msg = b'unknown compression mode %d'
2494 2552 msg %= comp
2495 2553 raise error.RevlogError(msg)
2496 2554
2497 2555 sidedata = sidedatautil.deserialize_sidedata(segment)
2498 2556 return sidedata
2499 2557
2500 2558 def rawdata(self, nodeorrev):
2501 2559 """return an uncompressed raw data of a given node or revision number."""
2502 2560 return self._revisiondata(nodeorrev, raw=True)
2503 2561
2504 2562 def hash(self, text, p1, p2):
2505 2563 """Compute a node hash.
2506 2564
2507 2565 Available as a function so that subclasses can replace the hash
2508 2566 as needed.
2509 2567 """
2510 2568 return storageutil.hashrevisionsha1(text, p1, p2)
2511 2569
2512 2570 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2513 2571 """Check node hash integrity.
2514 2572
2515 2573 Available as a function so that subclasses can extend hash mismatch
2516 2574 behaviors as needed.
2517 2575 """
2518 2576 try:
2519 2577 if p1 is None and p2 is None:
2520 2578 p1, p2 = self.parents(node)
2521 2579 if node != self.hash(text, p1, p2):
2522 2580 # Clear the revision cache on hash failure. The revision cache
2523 2581 # only stores the raw revision and clearing the cache does have
2524 2582 # the side-effect that we won't have a cache hit when the raw
2525 2583 # revision data is accessed. But this case should be rare and
2526 2584 # it is extra work to teach the cache about the hash
2527 2585 # verification state.
2528 2586 if self._revisioncache and self._revisioncache[0] == node:
2529 2587 self._revisioncache = None
2530 2588
2531 2589 revornode = rev
2532 2590 if revornode is None:
2533 2591 revornode = templatefilters.short(hex(node))
2534 2592 raise error.RevlogError(
2535 2593 _(b"integrity check failed on %s:%s")
2536 2594 % (self.display_id, pycompat.bytestr(revornode))
2537 2595 )
2538 2596 except error.RevlogError:
2539 2597 if self.feature_config.censorable and storageutil.iscensoredtext(
2540 2598 text
2541 2599 ):
2542 2600 raise error.CensoredNodeError(self.display_id, node, text)
2543 2601 raise
2544 2602
2545 2603 @property
2546 2604 def _split_index_file(self):
2547 2605 """the path where to expect the index of an ongoing splitting operation
2548 2606
2549 2607 The file will only exist if a splitting operation is in progress, but
2550 2608 it is always expected at the same location."""
2551 2609 parts = self.radix.split(b'/')
2552 2610 if len(parts) > 1:
2553 2611 # adds a '-s' prefix to the ``data/` or `meta/` base
2554 2612 head = parts[0] + b'-s'
2555 2613 mids = parts[1:-1]
2556 2614 tail = parts[-1] + b'.i'
2557 2615 pieces = [head] + mids + [tail]
2558 2616 return b'/'.join(pieces)
2559 2617 else:
2560 2618 # the revlog is stored at the root of the store (changelog or
2561 2619 # manifest), no risk of collision.
2562 2620 return self.radix + b'.i.s'
2563 2621
2564 2622 def _enforceinlinesize(self, tr, side_write=True):
2565 2623 """Check if the revlog is too big for inline and convert if so.
2566 2624
2567 2625 This should be called after revisions are added to the revlog. If the
2568 2626 revlog has grown too large to be an inline revlog, it will convert it
2569 2627 to use multiple index and data files.
2570 2628 """
2571 2629 tiprev = len(self) - 1
2572 2630 total_size = self.start(tiprev) + self.length(tiprev)
2573 2631 if not self._inline or total_size < _maxinline:
2574 2632 return
2575 2633
2576 2634 if self._docket is not None:
2577 2635 msg = b"inline revlog should not have a docket"
2578 2636 raise error.ProgrammingError(msg)
2579 2637
2580 2638 troffset = tr.findoffset(self._indexfile)
2581 2639 if troffset is None:
2582 2640 raise error.RevlogError(
2583 2641 _(b"%s not found in the transaction") % self._indexfile
2584 2642 )
2585 2643 if troffset:
2586 2644 tr.addbackup(self._indexfile, for_offset=True)
2587 2645 tr.add(self._datafile, 0)
2588 2646
2589 existing_handles = False
2590 if self._inner._writinghandles is not None:
2591 existing_handles = True
2592 fp = self._inner._writinghandles[0]
2593 fp.flush()
2594 fp.close()
2595 # We can't use the cached file handle after close(). So prevent
2596 # its usage.
2597 self._inner._writinghandles = None
2598 self._inner._segmentfile.writing_handle = None
2599 # No need to deal with sidedata writing handle as it is only
2600 # relevant with revlog-v2 which is never inline, not reaching
2601 # this code
2647 new_index_file_path = None
2602 2648 if side_write:
2603 2649 old_index_file_path = self._indexfile
2604 2650 new_index_file_path = self._split_index_file
2605 2651 opener = self.opener
2606 2652 weak_self = weakref.ref(self)
2607 2653
2608 # the "split" index replace the real index when the transaction is finalized
2654 # the "split" index replace the real index when the transaction is
2655 # finalized
2609 2656 def finalize_callback(tr):
2610 2657 opener.rename(
2611 2658 new_index_file_path,
2612 2659 old_index_file_path,
2613 2660 checkambig=True,
2614 2661 )
2615 2662 maybe_self = weak_self()
2616 2663 if maybe_self is not None:
2617 2664 maybe_self._indexfile = old_index_file_path
2618 2665 maybe_self._inner.index_file = maybe_self._indexfile
2619 2666
2620 2667 def abort_callback(tr):
2621 2668 maybe_self = weak_self()
2622 2669 if maybe_self is not None:
2623 2670 maybe_self._indexfile = old_index_file_path
2671 maybe_self._inner.inline = True
2624 2672 maybe_self._inner.index_file = old_index_file_path
2625 2673
2626 2674 tr.registertmp(new_index_file_path)
2627 2675 if self.target[1] is not None:
2628 2676 callback_id = b'000-revlog-split-%d-%s' % self.target
2629 2677 else:
2630 2678 callback_id = b'000-revlog-split-%d' % self.target[0]
2631 2679 tr.addfinalize(callback_id, finalize_callback)
2632 2680 tr.addabort(callback_id, abort_callback)
2633 2681
2634 new_dfh = self._datafp(b'w+')
2635 new_dfh.truncate(0) # drop any potentially existing data
2636 try:
2637 with self.reading():
2638 for r in self:
2639 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2640 new_dfh.flush()
2641
2642 if side_write:
2643 self._indexfile = new_index_file_path
2644 self._inner.index_file = self._indexfile
2645 with self._inner._InnerRevlog__index_new_fp() as fp:
2646 self._format_flags &= ~FLAG_INLINE_DATA
2647 self._inline = False
2648 self._inner.inline = False
2649 for i in self:
2650 e = self.index.entry_binary(i)
2651 if i == 0:
2652 header = self._format_flags | self._format_version
2653 header = self.index.pack_header(header)
2654 e = header + e
2655 fp.write(e)
2656
2657 # If we don't use side-write, the temp file replace the real
2658 # index when we exit the context manager
2659
2660 nodemaputil.setup_persistent_nodemap(tr, self)
2661 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2662 self.opener,
2663 self._datafile,
2664 self.data_config.chunk_cache_size,
2665 )
2666
2667 if existing_handles:
2668 # switched from inline to conventional reopen the index
2669 index_end = None
2670 ifh = self._inner._InnerRevlog__index_write_fp(
2671 index_end=index_end
2672 )
2673 self._inner._writinghandles = (ifh, new_dfh, None)
2674 self._inner._segmentfile.writing_handle = new_dfh
2675 new_dfh = None
2676 # No need to deal with sidedata writing handle as it is only
2677 # relevant with revlog-v2 which is never inline, not reaching
2678 # this code
2679 finally:
2680 if new_dfh is not None:
2681 new_dfh.close()
2682 self._format_flags &= ~FLAG_INLINE_DATA
2683 self._inner.split_inline(
2684 tr,
2685 self._format_flags | self._format_version,
2686 new_index_file_path=new_index_file_path,
2687 )
2688
2689 self._inline = False
2690 if new_index_file_path is not None:
2691 self._indexfile = new_index_file_path
2692
2693 nodemaputil.setup_persistent_nodemap(tr, self)
2682 2694
2683 2695 def _nodeduplicatecallback(self, transaction, node):
2684 2696 """called when trying to add a node already stored."""
2685 2697
2686 2698 @contextlib.contextmanager
2687 2699 def reading(self):
2688 2700 with self._inner.reading():
2689 2701 yield
2690 2702
2691 2703 @contextlib.contextmanager
2692 2704 def _writing(self, transaction):
2693 2705 if self._trypending:
2694 2706 msg = b'try to write in a `trypending` revlog: %s'
2695 2707 msg %= self.display_id
2696 2708 raise error.ProgrammingError(msg)
2697 2709 if self._inner.is_writing:
2698 2710 yield
2699 2711 else:
2700 2712 data_end = None
2701 2713 sidedata_end = None
2702 2714 if self._docket is not None:
2703 2715 data_end = self._docket.data_end
2704 2716 sidedata_end = self._docket.sidedata_end
2705 2717 with self._inner.writing(
2706 2718 transaction,
2707 2719 data_end=data_end,
2708 2720 sidedata_end=sidedata_end,
2709 2721 ):
2710 2722 yield
2711 2723 if self._docket is not None:
2712 2724 self._write_docket(transaction)
2713 2725
2714 2726 def _write_docket(self, transaction):
2715 2727 """write the current docket on disk
2716 2728
2717 2729 Exist as a method to help changelog to implement transaction logic
2718 2730
2719 2731 We could also imagine using the same transaction logic for all revlog
2720 2732 since docket are cheap."""
2721 2733 self._docket.write(transaction)
2722 2734
2723 2735 def addrevision(
2724 2736 self,
2725 2737 text,
2726 2738 transaction,
2727 2739 link,
2728 2740 p1,
2729 2741 p2,
2730 2742 cachedelta=None,
2731 2743 node=None,
2732 2744 flags=REVIDX_DEFAULT_FLAGS,
2733 2745 deltacomputer=None,
2734 2746 sidedata=None,
2735 2747 ):
2736 2748 """add a revision to the log
2737 2749
2738 2750 text - the revision data to add
2739 2751 transaction - the transaction object used for rollback
2740 2752 link - the linkrev data to add
2741 2753 p1, p2 - the parent nodeids of the revision
2742 2754 cachedelta - an optional precomputed delta
2743 2755 node - nodeid of revision; typically node is not specified, and it is
2744 2756 computed by default as hash(text, p1, p2), however subclasses might
2745 2757 use different hashing method (and override checkhash() in such case)
2746 2758 flags - the known flags to set on the revision
2747 2759 deltacomputer - an optional deltacomputer instance shared between
2748 2760 multiple calls
2749 2761 """
2750 2762 if link == nullrev:
2751 2763 raise error.RevlogError(
2752 2764 _(b"attempted to add linkrev -1 to %s") % self.display_id
2753 2765 )
2754 2766
2755 2767 if sidedata is None:
2756 2768 sidedata = {}
2757 2769 elif sidedata and not self.feature_config.has_side_data:
2758 2770 raise error.ProgrammingError(
2759 2771 _(b"trying to add sidedata to a revlog who don't support them")
2760 2772 )
2761 2773
2762 2774 if flags:
2763 2775 node = node or self.hash(text, p1, p2)
2764 2776
2765 2777 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2766 2778
2767 2779 # If the flag processor modifies the revision data, ignore any provided
2768 2780 # cachedelta.
2769 2781 if rawtext != text:
2770 2782 cachedelta = None
2771 2783
2772 2784 if len(rawtext) > _maxentrysize:
2773 2785 raise error.RevlogError(
2774 2786 _(
2775 2787 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2776 2788 )
2777 2789 % (self.display_id, len(rawtext))
2778 2790 )
2779 2791
2780 2792 node = node or self.hash(rawtext, p1, p2)
2781 2793 rev = self.index.get_rev(node)
2782 2794 if rev is not None:
2783 2795 return rev
2784 2796
2785 2797 if validatehash:
2786 2798 self.checkhash(rawtext, node, p1=p1, p2=p2)
2787 2799
2788 2800 return self.addrawrevision(
2789 2801 rawtext,
2790 2802 transaction,
2791 2803 link,
2792 2804 p1,
2793 2805 p2,
2794 2806 node,
2795 2807 flags,
2796 2808 cachedelta=cachedelta,
2797 2809 deltacomputer=deltacomputer,
2798 2810 sidedata=sidedata,
2799 2811 )
2800 2812
2801 2813 def addrawrevision(
2802 2814 self,
2803 2815 rawtext,
2804 2816 transaction,
2805 2817 link,
2806 2818 p1,
2807 2819 p2,
2808 2820 node,
2809 2821 flags,
2810 2822 cachedelta=None,
2811 2823 deltacomputer=None,
2812 2824 sidedata=None,
2813 2825 ):
2814 2826 """add a raw revision with known flags, node and parents
2815 2827 useful when reusing a revision not stored in this revlog (ex: received
2816 2828 over wire, or read from an external bundle).
2817 2829 """
2818 2830 with self._writing(transaction):
2819 2831 return self._addrevision(
2820 2832 node,
2821 2833 rawtext,
2822 2834 transaction,
2823 2835 link,
2824 2836 p1,
2825 2837 p2,
2826 2838 flags,
2827 2839 cachedelta,
2828 2840 deltacomputer=deltacomputer,
2829 2841 sidedata=sidedata,
2830 2842 )
2831 2843
2832 2844 def compress(self, data):
2833 2845 """Generate a possibly-compressed representation of data."""
2834 2846 if not data:
2835 2847 return b'', data
2836 2848
2837 2849 compressed = self._compressor.compress(data)
2838 2850
2839 2851 if compressed:
2840 2852 # The revlog compressor added the header in the returned data.
2841 2853 return b'', compressed
2842 2854
2843 2855 if data[0:1] == b'\0':
2844 2856 return b'', data
2845 2857 return b'u', data
2846 2858
2847 2859 def decompress(self, data):
2848 2860 """Decompress a revlog chunk.
2849 2861
2850 2862 The chunk is expected to begin with a header identifying the
2851 2863 format type so it can be routed to an appropriate decompressor.
2852 2864 """
2853 2865 if not data:
2854 2866 return data
2855 2867
2856 2868 # Revlogs are read much more frequently than they are written and many
2857 2869 # chunks only take microseconds to decompress, so performance is
2858 2870 # important here.
2859 2871 #
2860 2872 # We can make a few assumptions about revlogs:
2861 2873 #
2862 2874 # 1) the majority of chunks will be compressed (as opposed to inline
2863 2875 # raw data).
2864 2876 # 2) decompressing *any* data will likely by at least 10x slower than
2865 2877 # returning raw inline data.
2866 2878 # 3) we want to prioritize common and officially supported compression
2867 2879 # engines
2868 2880 #
2869 2881 # It follows that we want to optimize for "decompress compressed data
2870 2882 # when encoded with common and officially supported compression engines"
2871 2883 # case over "raw data" and "data encoded by less common or non-official
2872 2884 # compression engines." That is why we have the inline lookup first
2873 2885 # followed by the compengines lookup.
2874 2886 #
2875 2887 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2876 2888 # compressed chunks. And this matters for changelog and manifest reads.
2877 2889 t = data[0:1]
2878 2890
2879 2891 if t == b'x':
2880 2892 try:
2881 2893 return _zlibdecompress(data)
2882 2894 except zlib.error as e:
2883 2895 raise error.RevlogError(
2884 2896 _(b'revlog decompress error: %s')
2885 2897 % stringutil.forcebytestr(e)
2886 2898 )
2887 2899 # '\0' is more common than 'u' so it goes first.
2888 2900 elif t == b'\0':
2889 2901 return data
2890 2902 elif t == b'u':
2891 2903 return util.buffer(data, 1)
2892 2904
2893 2905 compressor = self._get_decompressor(t)
2894 2906
2895 2907 return compressor.decompress(data)
2896 2908
2897 2909 def _addrevision(
2898 2910 self,
2899 2911 node,
2900 2912 rawtext,
2901 2913 transaction,
2902 2914 link,
2903 2915 p1,
2904 2916 p2,
2905 2917 flags,
2906 2918 cachedelta,
2907 2919 alwayscache=False,
2908 2920 deltacomputer=None,
2909 2921 sidedata=None,
2910 2922 ):
2911 2923 """internal function to add revisions to the log
2912 2924
2913 2925 see addrevision for argument descriptions.
2914 2926
2915 2927 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2916 2928
2917 2929 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2918 2930 be used.
2919 2931
2920 2932 invariants:
2921 2933 - rawtext is optional (can be None); if not set, cachedelta must be set.
2922 2934 if both are set, they must correspond to each other.
2923 2935 """
2924 2936 if node == self.nullid:
2925 2937 raise error.RevlogError(
2926 2938 _(b"%s: attempt to add null revision") % self.display_id
2927 2939 )
2928 2940 if (
2929 2941 node == self.nodeconstants.wdirid
2930 2942 or node in self.nodeconstants.wdirfilenodeids
2931 2943 ):
2932 2944 raise error.RevlogError(
2933 2945 _(b"%s: attempt to add wdir revision") % self.display_id
2934 2946 )
2935 2947 if self._inner._writinghandles is None:
2936 2948 msg = b'adding revision outside `revlog._writing` context'
2937 2949 raise error.ProgrammingError(msg)
2938 2950
2939 2951 btext = [rawtext]
2940 2952
2941 2953 curr = len(self)
2942 2954 prev = curr - 1
2943 2955
2944 2956 offset = self._get_data_offset(prev)
2945 2957
2946 2958 if self._concurrencychecker:
2947 2959 ifh, dfh, sdfh = self._inner._writinghandles
2948 2960 # XXX no checking for the sidedata file
2949 2961 if self._inline:
2950 2962 # offset is "as if" it were in the .d file, so we need to add on
2951 2963 # the size of the entry metadata.
2952 2964 self._concurrencychecker(
2953 2965 ifh, self._indexfile, offset + curr * self.index.entry_size
2954 2966 )
2955 2967 else:
2956 2968 # Entries in the .i are a consistent size.
2957 2969 self._concurrencychecker(
2958 2970 ifh, self._indexfile, curr * self.index.entry_size
2959 2971 )
2960 2972 self._concurrencychecker(dfh, self._datafile, offset)
2961 2973
2962 2974 p1r, p2r = self.rev(p1), self.rev(p2)
2963 2975
2964 2976 # full versions are inserted when the needed deltas
2965 2977 # become comparable to the uncompressed text
2966 2978 if rawtext is None:
2967 2979 # need rawtext size, before changed by flag processors, which is
2968 2980 # the non-raw size. use revlog explicitly to avoid filelog's extra
2969 2981 # logic that might remove metadata size.
2970 2982 textlen = mdiff.patchedsize(
2971 2983 revlog.size(self, cachedelta[0]), cachedelta[1]
2972 2984 )
2973 2985 else:
2974 2986 textlen = len(rawtext)
2975 2987
2976 2988 if deltacomputer is None:
2977 2989 write_debug = None
2978 2990 if self.delta_config.debug_delta:
2979 2991 write_debug = transaction._report
2980 2992 deltacomputer = deltautil.deltacomputer(
2981 2993 self, write_debug=write_debug
2982 2994 )
2983 2995
2984 2996 if cachedelta is not None and len(cachedelta) == 2:
2985 2997 # If the cached delta has no information about how it should be
2986 2998 # reused, add the default reuse instruction according to the
2987 2999 # revlog's configuration.
2988 3000 if (
2989 3001 self.delta_config.general_delta
2990 3002 and self.delta_config.lazy_delta_base
2991 3003 ):
2992 3004 delta_base_reuse = DELTA_BASE_REUSE_TRY
2993 3005 else:
2994 3006 delta_base_reuse = DELTA_BASE_REUSE_NO
2995 3007 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2996 3008
2997 3009 revinfo = revlogutils.revisioninfo(
2998 3010 node,
2999 3011 p1,
3000 3012 p2,
3001 3013 btext,
3002 3014 textlen,
3003 3015 cachedelta,
3004 3016 flags,
3005 3017 )
3006 3018
3007 3019 deltainfo = deltacomputer.finddeltainfo(revinfo)
3008 3020
3009 3021 compression_mode = COMP_MODE_INLINE
3010 3022 if self._docket is not None:
3011 3023 default_comp = self._docket.default_compression_header
3012 3024 r = deltautil.delta_compression(default_comp, deltainfo)
3013 3025 compression_mode, deltainfo = r
3014 3026
3015 3027 sidedata_compression_mode = COMP_MODE_INLINE
3016 3028 if sidedata and self.feature_config.has_side_data:
3017 3029 sidedata_compression_mode = COMP_MODE_PLAIN
3018 3030 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3019 3031 sidedata_offset = self._docket.sidedata_end
3020 3032 h, comp_sidedata = self.compress(serialized_sidedata)
3021 3033 if (
3022 3034 h != b'u'
3023 3035 and comp_sidedata[0:1] != b'\0'
3024 3036 and len(comp_sidedata) < len(serialized_sidedata)
3025 3037 ):
3026 3038 assert not h
3027 3039 if (
3028 3040 comp_sidedata[0:1]
3029 3041 == self._docket.default_compression_header
3030 3042 ):
3031 3043 sidedata_compression_mode = COMP_MODE_DEFAULT
3032 3044 serialized_sidedata = comp_sidedata
3033 3045 else:
3034 3046 sidedata_compression_mode = COMP_MODE_INLINE
3035 3047 serialized_sidedata = comp_sidedata
3036 3048 else:
3037 3049 serialized_sidedata = b""
3038 3050 # Don't store the offset if the sidedata is empty, that way
3039 3051 # we can easily detect empty sidedata and they will be no different
3040 3052 # than ones we manually add.
3041 3053 sidedata_offset = 0
3042 3054
3043 3055 rank = RANK_UNKNOWN
3044 3056 if self.feature_config.compute_rank:
3045 3057 if (p1r, p2r) == (nullrev, nullrev):
3046 3058 rank = 1
3047 3059 elif p1r != nullrev and p2r == nullrev:
3048 3060 rank = 1 + self.fast_rank(p1r)
3049 3061 elif p1r == nullrev and p2r != nullrev:
3050 3062 rank = 1 + self.fast_rank(p2r)
3051 3063 else: # merge node
3052 3064 if rustdagop is not None and self.index.rust_ext_compat:
3053 3065 rank = rustdagop.rank(self.index, p1r, p2r)
3054 3066 else:
3055 3067 pmin, pmax = sorted((p1r, p2r))
3056 3068 rank = 1 + self.fast_rank(pmax)
3057 3069 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3058 3070
3059 3071 e = revlogutils.entry(
3060 3072 flags=flags,
3061 3073 data_offset=offset,
3062 3074 data_compressed_length=deltainfo.deltalen,
3063 3075 data_uncompressed_length=textlen,
3064 3076 data_compression_mode=compression_mode,
3065 3077 data_delta_base=deltainfo.base,
3066 3078 link_rev=link,
3067 3079 parent_rev_1=p1r,
3068 3080 parent_rev_2=p2r,
3069 3081 node_id=node,
3070 3082 sidedata_offset=sidedata_offset,
3071 3083 sidedata_compressed_length=len(serialized_sidedata),
3072 3084 sidedata_compression_mode=sidedata_compression_mode,
3073 3085 rank=rank,
3074 3086 )
3075 3087
3076 3088 self.index.append(e)
3077 3089 entry = self.index.entry_binary(curr)
3078 3090 if curr == 0 and self._docket is None:
3079 3091 header = self._format_flags | self._format_version
3080 3092 header = self.index.pack_header(header)
3081 3093 entry = header + entry
3082 3094 self._writeentry(
3083 3095 transaction,
3084 3096 entry,
3085 3097 deltainfo.data,
3086 3098 link,
3087 3099 offset,
3088 3100 serialized_sidedata,
3089 3101 sidedata_offset,
3090 3102 )
3091 3103
3092 3104 rawtext = btext[0]
3093 3105
3094 3106 if alwayscache and rawtext is None:
3095 3107 rawtext = deltacomputer.buildtext(revinfo)
3096 3108
3097 3109 if type(rawtext) == bytes: # only accept immutable objects
3098 3110 self._revisioncache = (node, curr, rawtext)
3099 3111 self._chainbasecache[curr] = deltainfo.chainbase
3100 3112 return curr
3101 3113
3102 3114 def _get_data_offset(self, prev):
3103 3115 """Returns the current offset in the (in-transaction) data file.
3104 3116 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3105 3117 file to store that information: since sidedata can be rewritten to the
3106 3118 end of the data file within a transaction, you can have cases where, for
3107 3119 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3108 3120 to `n - 1`'s sidedata being written after `n`'s data.
3109 3121
3110 3122 TODO cache this in a docket file before getting out of experimental."""
3111 3123 if self._docket is None:
3112 3124 return self.end(prev)
3113 3125 else:
3114 3126 return self._docket.data_end
3115 3127
3116 3128 def _writeentry(
3117 3129 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3118 3130 ):
3119 3131 # Files opened in a+ mode have inconsistent behavior on various
3120 3132 # platforms. Windows requires that a file positioning call be made
3121 3133 # when the file handle transitions between reads and writes. See
3122 3134 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3123 3135 # platforms, Python or the platform itself can be buggy. Some versions
3124 3136 # of Solaris have been observed to not append at the end of the file
3125 3137 # if the file was seeked to before the end. See issue4943 for more.
3126 3138 #
3127 3139 # We work around this issue by inserting a seek() before writing.
3128 3140 # Note: This is likely not necessary on Python 3. However, because
3129 3141 # the file handle is reused for reads and may be seeked there, we need
3130 3142 # to be careful before changing this.
3131 3143 if self._inner._writinghandles is None:
3132 3144 msg = b'adding revision outside `revlog._writing` context'
3133 3145 raise error.ProgrammingError(msg)
3134 3146 ifh, dfh, sdfh = self._inner._writinghandles
3135 3147 if self._docket is None:
3136 3148 ifh.seek(0, os.SEEK_END)
3137 3149 else:
3138 3150 ifh.seek(self._docket.index_end, os.SEEK_SET)
3139 3151 if dfh:
3140 3152 if self._docket is None:
3141 3153 dfh.seek(0, os.SEEK_END)
3142 3154 else:
3143 3155 dfh.seek(self._docket.data_end, os.SEEK_SET)
3144 3156 if sdfh:
3145 3157 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3146 3158
3147 3159 curr = len(self) - 1
3148 3160 if not self._inline:
3149 3161 transaction.add(self._datafile, offset)
3150 3162 if self._sidedatafile:
3151 3163 transaction.add(self._sidedatafile, sidedata_offset)
3152 3164 transaction.add(self._indexfile, curr * len(entry))
3153 3165 if data[0]:
3154 3166 dfh.write(data[0])
3155 3167 dfh.write(data[1])
3156 3168 if sidedata:
3157 3169 sdfh.write(sidedata)
3158 3170 ifh.write(entry)
3159 3171 else:
3160 3172 offset += curr * self.index.entry_size
3161 3173 transaction.add(self._indexfile, offset)
3162 3174 ifh.write(entry)
3163 3175 ifh.write(data[0])
3164 3176 ifh.write(data[1])
3165 3177 assert not sidedata
3166 3178 self._enforceinlinesize(transaction)
3167 3179 if self._docket is not None:
3168 3180 # revlog-v2 always has 3 writing handles, help Pytype
3169 3181 wh1 = self._inner._writinghandles[0]
3170 3182 wh2 = self._inner._writinghandles[1]
3171 3183 wh3 = self._inner._writinghandles[2]
3172 3184 assert wh1 is not None
3173 3185 assert wh2 is not None
3174 3186 assert wh3 is not None
3175 3187 self._docket.index_end = wh1.tell()
3176 3188 self._docket.data_end = wh2.tell()
3177 3189 self._docket.sidedata_end = wh3.tell()
3178 3190
3179 3191 nodemaputil.setup_persistent_nodemap(transaction, self)
3180 3192
3181 3193 def addgroup(
3182 3194 self,
3183 3195 deltas,
3184 3196 linkmapper,
3185 3197 transaction,
3186 3198 alwayscache=False,
3187 3199 addrevisioncb=None,
3188 3200 duplicaterevisioncb=None,
3189 3201 debug_info=None,
3190 3202 delta_base_reuse_policy=None,
3191 3203 ):
3192 3204 """
3193 3205 add a delta group
3194 3206
3195 3207 given a set of deltas, add them to the revision log. the
3196 3208 first delta is against its parent, which should be in our
3197 3209 log, the rest are against the previous delta.
3198 3210
3199 3211 If ``addrevisioncb`` is defined, it will be called with arguments of
3200 3212 this revlog and the node that was added.
3201 3213 """
3202 3214
3203 3215 if self._adding_group:
3204 3216 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3205 3217
3206 3218 # read the default delta-base reuse policy from revlog config if the
3207 3219 # group did not specify one.
3208 3220 if delta_base_reuse_policy is None:
3209 3221 if (
3210 3222 self.delta_config.general_delta
3211 3223 and self.delta_config.lazy_delta_base
3212 3224 ):
3213 3225 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3214 3226 else:
3215 3227 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3216 3228
3217 3229 self._adding_group = True
3218 3230 empty = True
3219 3231 try:
3220 3232 with self._writing(transaction):
3221 3233 write_debug = None
3222 3234 if self.delta_config.debug_delta:
3223 3235 write_debug = transaction._report
3224 3236 deltacomputer = deltautil.deltacomputer(
3225 3237 self,
3226 3238 write_debug=write_debug,
3227 3239 debug_info=debug_info,
3228 3240 )
3229 3241 # loop through our set of deltas
3230 3242 for data in deltas:
3231 3243 (
3232 3244 node,
3233 3245 p1,
3234 3246 p2,
3235 3247 linknode,
3236 3248 deltabase,
3237 3249 delta,
3238 3250 flags,
3239 3251 sidedata,
3240 3252 ) = data
3241 3253 link = linkmapper(linknode)
3242 3254 flags = flags or REVIDX_DEFAULT_FLAGS
3243 3255
3244 3256 rev = self.index.get_rev(node)
3245 3257 if rev is not None:
3246 3258 # this can happen if two branches make the same change
3247 3259 self._nodeduplicatecallback(transaction, rev)
3248 3260 if duplicaterevisioncb:
3249 3261 duplicaterevisioncb(self, rev)
3250 3262 empty = False
3251 3263 continue
3252 3264
3253 3265 for p in (p1, p2):
3254 3266 if not self.index.has_node(p):
3255 3267 raise error.LookupError(
3256 3268 p, self.radix, _(b'unknown parent')
3257 3269 )
3258 3270
3259 3271 if not self.index.has_node(deltabase):
3260 3272 raise error.LookupError(
3261 3273 deltabase, self.display_id, _(b'unknown delta base')
3262 3274 )
3263 3275
3264 3276 baserev = self.rev(deltabase)
3265 3277
3266 3278 if baserev != nullrev and self.iscensored(baserev):
3267 3279 # if base is censored, delta must be full replacement in a
3268 3280 # single patch operation
3269 3281 hlen = struct.calcsize(b">lll")
3270 3282 oldlen = self.rawsize(baserev)
3271 3283 newlen = len(delta) - hlen
3272 3284 if delta[:hlen] != mdiff.replacediffheader(
3273 3285 oldlen, newlen
3274 3286 ):
3275 3287 raise error.CensoredBaseError(
3276 3288 self.display_id, self.node(baserev)
3277 3289 )
3278 3290
3279 3291 if not flags and self._peek_iscensored(baserev, delta):
3280 3292 flags |= REVIDX_ISCENSORED
3281 3293
3282 3294 # We assume consumers of addrevisioncb will want to retrieve
3283 3295 # the added revision, which will require a call to
3284 3296 # revision(). revision() will fast path if there is a cache
3285 3297 # hit. So, we tell _addrevision() to always cache in this case.
3286 3298 # We're only using addgroup() in the context of changegroup
3287 3299 # generation so the revision data can always be handled as raw
3288 3300 # by the flagprocessor.
3289 3301 rev = self._addrevision(
3290 3302 node,
3291 3303 None,
3292 3304 transaction,
3293 3305 link,
3294 3306 p1,
3295 3307 p2,
3296 3308 flags,
3297 3309 (baserev, delta, delta_base_reuse_policy),
3298 3310 alwayscache=alwayscache,
3299 3311 deltacomputer=deltacomputer,
3300 3312 sidedata=sidedata,
3301 3313 )
3302 3314
3303 3315 if addrevisioncb:
3304 3316 addrevisioncb(self, rev)
3305 3317 empty = False
3306 3318 finally:
3307 3319 self._adding_group = False
3308 3320 return not empty
3309 3321
3310 3322 def iscensored(self, rev):
3311 3323 """Check if a file revision is censored."""
3312 3324 if not self.feature_config.censorable:
3313 3325 return False
3314 3326
3315 3327 return self.flags(rev) & REVIDX_ISCENSORED
3316 3328
3317 3329 def _peek_iscensored(self, baserev, delta):
3318 3330 """Quickly check if a delta produces a censored revision."""
3319 3331 if not self.feature_config.censorable:
3320 3332 return False
3321 3333
3322 3334 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3323 3335
3324 3336 def getstrippoint(self, minlink):
3325 3337 """find the minimum rev that must be stripped to strip the linkrev
3326 3338
3327 3339 Returns a tuple containing the minimum rev and a set of all revs that
3328 3340 have linkrevs that will be broken by this strip.
3329 3341 """
3330 3342 return storageutil.resolvestripinfo(
3331 3343 minlink,
3332 3344 len(self) - 1,
3333 3345 self.headrevs(),
3334 3346 self.linkrev,
3335 3347 self.parentrevs,
3336 3348 )
3337 3349
3338 3350 def strip(self, minlink, transaction):
3339 3351 """truncate the revlog on the first revision with a linkrev >= minlink
3340 3352
3341 3353 This function is called when we're stripping revision minlink and
3342 3354 its descendants from the repository.
3343 3355
3344 3356 We have to remove all revisions with linkrev >= minlink, because
3345 3357 the equivalent changelog revisions will be renumbered after the
3346 3358 strip.
3347 3359
3348 3360 So we truncate the revlog on the first of these revisions, and
3349 3361 trust that the caller has saved the revisions that shouldn't be
3350 3362 removed and that it'll re-add them after this truncation.
3351 3363 """
3352 3364 if len(self) == 0:
3353 3365 return
3354 3366
3355 3367 rev, _ = self.getstrippoint(minlink)
3356 3368 if rev == len(self):
3357 3369 return
3358 3370
3359 3371 # first truncate the files on disk
3360 3372 data_end = self.start(rev)
3361 3373 if not self._inline:
3362 3374 transaction.add(self._datafile, data_end)
3363 3375 end = rev * self.index.entry_size
3364 3376 else:
3365 3377 end = data_end + (rev * self.index.entry_size)
3366 3378
3367 3379 if self._sidedatafile:
3368 3380 sidedata_end = self.sidedata_cut_off(rev)
3369 3381 transaction.add(self._sidedatafile, sidedata_end)
3370 3382
3371 3383 transaction.add(self._indexfile, end)
3372 3384 if self._docket is not None:
3373 3385 # XXX we could, leverage the docket while stripping. However it is
3374 3386 # not powerfull enough at the time of this comment
3375 3387 self._docket.index_end = end
3376 3388 self._docket.data_end = data_end
3377 3389 self._docket.sidedata_end = sidedata_end
3378 3390 self._docket.write(transaction, stripping=True)
3379 3391
3380 3392 # then reset internal state in memory to forget those revisions
3381 3393 self._revisioncache = None
3382 3394 self._chaininfocache = util.lrucachedict(500)
3383 3395 self._inner._segmentfile.clear_cache()
3384 3396 self._inner._segmentfile_sidedata.clear_cache()
3385 3397
3386 3398 del self.index[rev:-1]
3387 3399
3388 3400 def checksize(self):
3389 3401 """Check size of index and data files
3390 3402
3391 3403 return a (dd, di) tuple.
3392 3404 - dd: extra bytes for the "data" file
3393 3405 - di: extra bytes for the "index" file
3394 3406
3395 3407 A healthy revlog will return (0, 0).
3396 3408 """
3397 3409 expected = 0
3398 3410 if len(self):
3399 3411 expected = max(0, self.end(len(self) - 1))
3400 3412
3401 3413 try:
3402 3414 with self._datafp() as f:
3403 3415 f.seek(0, io.SEEK_END)
3404 3416 actual = f.tell()
3405 3417 dd = actual - expected
3406 3418 except FileNotFoundError:
3407 3419 dd = 0
3408 3420
3409 3421 try:
3410 3422 f = self.opener(self._indexfile)
3411 3423 f.seek(0, io.SEEK_END)
3412 3424 actual = f.tell()
3413 3425 f.close()
3414 3426 s = self.index.entry_size
3415 3427 i = max(0, actual // s)
3416 3428 di = actual - (i * s)
3417 3429 if self._inline:
3418 3430 databytes = 0
3419 3431 for r in self:
3420 3432 databytes += max(0, self.length(r))
3421 3433 dd = 0
3422 3434 di = actual - len(self) * s - databytes
3423 3435 except FileNotFoundError:
3424 3436 di = 0
3425 3437
3426 3438 return (dd, di)
3427 3439
3428 3440 def files(self):
3429 3441 """return list of files that compose this revlog"""
3430 3442 res = [self._indexfile]
3431 3443 if self._docket_file is None:
3432 3444 if not self._inline:
3433 3445 res.append(self._datafile)
3434 3446 else:
3435 3447 res.append(self._docket_file)
3436 3448 res.extend(self._docket.old_index_filepaths(include_empty=False))
3437 3449 if self._docket.data_end:
3438 3450 res.append(self._datafile)
3439 3451 res.extend(self._docket.old_data_filepaths(include_empty=False))
3440 3452 if self._docket.sidedata_end:
3441 3453 res.append(self._sidedatafile)
3442 3454 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3443 3455 return res
3444 3456
3445 3457 def emitrevisions(
3446 3458 self,
3447 3459 nodes,
3448 3460 nodesorder=None,
3449 3461 revisiondata=False,
3450 3462 assumehaveparentrevisions=False,
3451 3463 deltamode=repository.CG_DELTAMODE_STD,
3452 3464 sidedata_helpers=None,
3453 3465 debug_info=None,
3454 3466 ):
3455 3467 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3456 3468 raise error.ProgrammingError(
3457 3469 b'unhandled value for nodesorder: %s' % nodesorder
3458 3470 )
3459 3471
3460 3472 if nodesorder is None and not self.delta_config.general_delta:
3461 3473 nodesorder = b'storage'
3462 3474
3463 3475 if (
3464 3476 not self._storedeltachains
3465 3477 and deltamode != repository.CG_DELTAMODE_PREV
3466 3478 ):
3467 3479 deltamode = repository.CG_DELTAMODE_FULL
3468 3480
3469 3481 return storageutil.emitrevisions(
3470 3482 self,
3471 3483 nodes,
3472 3484 nodesorder,
3473 3485 revlogrevisiondelta,
3474 3486 deltaparentfn=self.deltaparent,
3475 3487 candeltafn=self._candelta,
3476 3488 rawsizefn=self.rawsize,
3477 3489 revdifffn=self.revdiff,
3478 3490 flagsfn=self.flags,
3479 3491 deltamode=deltamode,
3480 3492 revisiondata=revisiondata,
3481 3493 assumehaveparentrevisions=assumehaveparentrevisions,
3482 3494 sidedata_helpers=sidedata_helpers,
3483 3495 debug_info=debug_info,
3484 3496 )
3485 3497
3486 3498 DELTAREUSEALWAYS = b'always'
3487 3499 DELTAREUSESAMEREVS = b'samerevs'
3488 3500 DELTAREUSENEVER = b'never'
3489 3501
3490 3502 DELTAREUSEFULLADD = b'fulladd'
3491 3503
3492 3504 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3493 3505
3494 3506 def clone(
3495 3507 self,
3496 3508 tr,
3497 3509 destrevlog,
3498 3510 addrevisioncb=None,
3499 3511 deltareuse=DELTAREUSESAMEREVS,
3500 3512 forcedeltabothparents=None,
3501 3513 sidedata_helpers=None,
3502 3514 ):
3503 3515 """Copy this revlog to another, possibly with format changes.
3504 3516
3505 3517 The destination revlog will contain the same revisions and nodes.
3506 3518 However, it may not be bit-for-bit identical due to e.g. delta encoding
3507 3519 differences.
3508 3520
3509 3521 The ``deltareuse`` argument control how deltas from the existing revlog
3510 3522 are preserved in the destination revlog. The argument can have the
3511 3523 following values:
3512 3524
3513 3525 DELTAREUSEALWAYS
3514 3526 Deltas will always be reused (if possible), even if the destination
3515 3527 revlog would not select the same revisions for the delta. This is the
3516 3528 fastest mode of operation.
3517 3529 DELTAREUSESAMEREVS
3518 3530 Deltas will be reused if the destination revlog would pick the same
3519 3531 revisions for the delta. This mode strikes a balance between speed
3520 3532 and optimization.
3521 3533 DELTAREUSENEVER
3522 3534 Deltas will never be reused. This is the slowest mode of execution.
3523 3535 This mode can be used to recompute deltas (e.g. if the diff/delta
3524 3536 algorithm changes).
3525 3537 DELTAREUSEFULLADD
3526 3538 Revision will be re-added as if their were new content. This is
3527 3539 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3528 3540 eg: large file detection and handling.
3529 3541
3530 3542 Delta computation can be slow, so the choice of delta reuse policy can
3531 3543 significantly affect run time.
3532 3544
3533 3545 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3534 3546 two extremes. Deltas will be reused if they are appropriate. But if the
3535 3547 delta could choose a better revision, it will do so. This means if you
3536 3548 are converting a non-generaldelta revlog to a generaldelta revlog,
3537 3549 deltas will be recomputed if the delta's parent isn't a parent of the
3538 3550 revision.
3539 3551
3540 3552 In addition to the delta policy, the ``forcedeltabothparents``
3541 3553 argument controls whether to force compute deltas against both parents
3542 3554 for merges. By default, the current default is used.
3543 3555
3544 3556 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3545 3557 `sidedata_helpers`.
3546 3558 """
3547 3559 if deltareuse not in self.DELTAREUSEALL:
3548 3560 raise ValueError(
3549 3561 _(b'value for deltareuse invalid: %s') % deltareuse
3550 3562 )
3551 3563
3552 3564 if len(destrevlog):
3553 3565 raise ValueError(_(b'destination revlog is not empty'))
3554 3566
3555 3567 if getattr(self, 'filteredrevs', None):
3556 3568 raise ValueError(_(b'source revlog has filtered revisions'))
3557 3569 if getattr(destrevlog, 'filteredrevs', None):
3558 3570 raise ValueError(_(b'destination revlog has filtered revisions'))
3559 3571
3560 3572 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3561 3573 # if possible.
3562 3574 old_delta_config = destrevlog.delta_config
3563 3575 destrevlog.delta_config = destrevlog.delta_config.copy()
3564 3576
3565 3577 try:
3566 3578 if deltareuse == self.DELTAREUSEALWAYS:
3567 3579 destrevlog.delta_config.lazy_delta_base = True
3568 3580 destrevlog.delta_config.lazy_delta = True
3569 3581 elif deltareuse == self.DELTAREUSESAMEREVS:
3570 3582 destrevlog.delta_config.lazy_delta_base = False
3571 3583 destrevlog.delta_config.lazy_delta = True
3572 3584 elif deltareuse == self.DELTAREUSENEVER:
3573 3585 destrevlog.delta_config.lazy_delta_base = False
3574 3586 destrevlog.delta_config.lazy_delta = False
3575 3587
3576 3588 delta_both_parents = (
3577 3589 forcedeltabothparents or old_delta_config.delta_both_parents
3578 3590 )
3579 3591 destrevlog.delta_config.delta_both_parents = delta_both_parents
3580 3592
3581 3593 with self.reading(), destrevlog._writing(tr):
3582 3594 self._clone(
3583 3595 tr,
3584 3596 destrevlog,
3585 3597 addrevisioncb,
3586 3598 deltareuse,
3587 3599 forcedeltabothparents,
3588 3600 sidedata_helpers,
3589 3601 )
3590 3602
3591 3603 finally:
3592 3604 destrevlog.delta_config = old_delta_config
3593 3605
3594 3606 def _clone(
3595 3607 self,
3596 3608 tr,
3597 3609 destrevlog,
3598 3610 addrevisioncb,
3599 3611 deltareuse,
3600 3612 forcedeltabothparents,
3601 3613 sidedata_helpers,
3602 3614 ):
3603 3615 """perform the core duty of `revlog.clone` after parameter processing"""
3604 3616 write_debug = None
3605 3617 if self.delta_config.debug_delta:
3606 3618 write_debug = tr._report
3607 3619 deltacomputer = deltautil.deltacomputer(
3608 3620 destrevlog,
3609 3621 write_debug=write_debug,
3610 3622 )
3611 3623 index = self.index
3612 3624 for rev in self:
3613 3625 entry = index[rev]
3614 3626
3615 3627 # Some classes override linkrev to take filtered revs into
3616 3628 # account. Use raw entry from index.
3617 3629 flags = entry[0] & 0xFFFF
3618 3630 linkrev = entry[4]
3619 3631 p1 = index[entry[5]][7]
3620 3632 p2 = index[entry[6]][7]
3621 3633 node = entry[7]
3622 3634
3623 3635 # (Possibly) reuse the delta from the revlog if allowed and
3624 3636 # the revlog chunk is a delta.
3625 3637 cachedelta = None
3626 3638 rawtext = None
3627 3639 if deltareuse == self.DELTAREUSEFULLADD:
3628 3640 text = self._revisiondata(rev)
3629 3641 sidedata = self.sidedata(rev)
3630 3642
3631 3643 if sidedata_helpers is not None:
3632 3644 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3633 3645 self, sidedata_helpers, sidedata, rev
3634 3646 )
3635 3647 flags = flags | new_flags[0] & ~new_flags[1]
3636 3648
3637 3649 destrevlog.addrevision(
3638 3650 text,
3639 3651 tr,
3640 3652 linkrev,
3641 3653 p1,
3642 3654 p2,
3643 3655 cachedelta=cachedelta,
3644 3656 node=node,
3645 3657 flags=flags,
3646 3658 deltacomputer=deltacomputer,
3647 3659 sidedata=sidedata,
3648 3660 )
3649 3661 else:
3650 3662 if destrevlog.delta_config.lazy_delta:
3651 3663 dp = self.deltaparent(rev)
3652 3664 if dp != nullrev:
3653 3665 cachedelta = (dp, bytes(self._chunk(rev)))
3654 3666
3655 3667 sidedata = None
3656 3668 if not cachedelta:
3657 3669 try:
3658 3670 rawtext = self._revisiondata(rev)
3659 3671 except error.CensoredNodeError as censored:
3660 3672 assert flags & REVIDX_ISCENSORED
3661 3673 rawtext = censored.tombstone
3662 3674 sidedata = self.sidedata(rev)
3663 3675 if sidedata is None:
3664 3676 sidedata = self.sidedata(rev)
3665 3677
3666 3678 if sidedata_helpers is not None:
3667 3679 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3668 3680 self, sidedata_helpers, sidedata, rev
3669 3681 )
3670 3682 flags = flags | new_flags[0] & ~new_flags[1]
3671 3683
3672 3684 destrevlog._addrevision(
3673 3685 node,
3674 3686 rawtext,
3675 3687 tr,
3676 3688 linkrev,
3677 3689 p1,
3678 3690 p2,
3679 3691 flags,
3680 3692 cachedelta,
3681 3693 deltacomputer=deltacomputer,
3682 3694 sidedata=sidedata,
3683 3695 )
3684 3696
3685 3697 if addrevisioncb:
3686 3698 addrevisioncb(self, rev, node)
3687 3699
3688 3700 def censorrevision(self, tr, censornode, tombstone=b''):
3689 3701 if self._format_version == REVLOGV0:
3690 3702 raise error.RevlogError(
3691 3703 _(b'cannot censor with version %d revlogs')
3692 3704 % self._format_version
3693 3705 )
3694 3706 elif self._format_version == REVLOGV1:
3695 3707 rewrite.v1_censor(self, tr, censornode, tombstone)
3696 3708 else:
3697 3709 rewrite.v2_censor(self, tr, censornode, tombstone)
3698 3710
3699 3711 def verifyintegrity(self, state):
3700 3712 """Verifies the integrity of the revlog.
3701 3713
3702 3714 Yields ``revlogproblem`` instances describing problems that are
3703 3715 found.
3704 3716 """
3705 3717 dd, di = self.checksize()
3706 3718 if dd:
3707 3719 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3708 3720 if di:
3709 3721 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3710 3722
3711 3723 version = self._format_version
3712 3724
3713 3725 # The verifier tells us what version revlog we should be.
3714 3726 if version != state[b'expectedversion']:
3715 3727 yield revlogproblem(
3716 3728 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3717 3729 % (self.display_id, version, state[b'expectedversion'])
3718 3730 )
3719 3731
3720 3732 state[b'skipread'] = set()
3721 3733 state[b'safe_renamed'] = set()
3722 3734
3723 3735 for rev in self:
3724 3736 node = self.node(rev)
3725 3737
3726 3738 # Verify contents. 4 cases to care about:
3727 3739 #
3728 3740 # common: the most common case
3729 3741 # rename: with a rename
3730 3742 # meta: file content starts with b'\1\n', the metadata
3731 3743 # header defined in filelog.py, but without a rename
3732 3744 # ext: content stored externally
3733 3745 #
3734 3746 # More formally, their differences are shown below:
3735 3747 #
3736 3748 # | common | rename | meta | ext
3737 3749 # -------------------------------------------------------
3738 3750 # flags() | 0 | 0 | 0 | not 0
3739 3751 # renamed() | False | True | False | ?
3740 3752 # rawtext[0:2]=='\1\n'| False | True | True | ?
3741 3753 #
3742 3754 # "rawtext" means the raw text stored in revlog data, which
3743 3755 # could be retrieved by "rawdata(rev)". "text"
3744 3756 # mentioned below is "revision(rev)".
3745 3757 #
3746 3758 # There are 3 different lengths stored physically:
3747 3759 # 1. L1: rawsize, stored in revlog index
3748 3760 # 2. L2: len(rawtext), stored in revlog data
3749 3761 # 3. L3: len(text), stored in revlog data if flags==0, or
3750 3762 # possibly somewhere else if flags!=0
3751 3763 #
3752 3764 # L1 should be equal to L2. L3 could be different from them.
3753 3765 # "text" may or may not affect commit hash depending on flag
3754 3766 # processors (see flagutil.addflagprocessor).
3755 3767 #
3756 3768 # | common | rename | meta | ext
3757 3769 # -------------------------------------------------
3758 3770 # rawsize() | L1 | L1 | L1 | L1
3759 3771 # size() | L1 | L2-LM | L1(*) | L1 (?)
3760 3772 # len(rawtext) | L2 | L2 | L2 | L2
3761 3773 # len(text) | L2 | L2 | L2 | L3
3762 3774 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3763 3775 #
3764 3776 # LM: length of metadata, depending on rawtext
3765 3777 # (*): not ideal, see comment in filelog.size
3766 3778 # (?): could be "- len(meta)" if the resolved content has
3767 3779 # rename metadata
3768 3780 #
3769 3781 # Checks needed to be done:
3770 3782 # 1. length check: L1 == L2, in all cases.
3771 3783 # 2. hash check: depending on flag processor, we may need to
3772 3784 # use either "text" (external), or "rawtext" (in revlog).
3773 3785
3774 3786 try:
3775 3787 skipflags = state.get(b'skipflags', 0)
3776 3788 if skipflags:
3777 3789 skipflags &= self.flags(rev)
3778 3790
3779 3791 _verify_revision(self, skipflags, state, node)
3780 3792
3781 3793 l1 = self.rawsize(rev)
3782 3794 l2 = len(self.rawdata(node))
3783 3795
3784 3796 if l1 != l2:
3785 3797 yield revlogproblem(
3786 3798 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3787 3799 node=node,
3788 3800 )
3789 3801
3790 3802 except error.CensoredNodeError:
3791 3803 if state[b'erroroncensored']:
3792 3804 yield revlogproblem(
3793 3805 error=_(b'censored file data'), node=node
3794 3806 )
3795 3807 state[b'skipread'].add(node)
3796 3808 except Exception as e:
3797 3809 yield revlogproblem(
3798 3810 error=_(b'unpacking %s: %s')
3799 3811 % (short(node), stringutil.forcebytestr(e)),
3800 3812 node=node,
3801 3813 )
3802 3814 state[b'skipread'].add(node)
3803 3815
3804 3816 def storageinfo(
3805 3817 self,
3806 3818 exclusivefiles=False,
3807 3819 sharedfiles=False,
3808 3820 revisionscount=False,
3809 3821 trackedsize=False,
3810 3822 storedsize=False,
3811 3823 ):
3812 3824 d = {}
3813 3825
3814 3826 if exclusivefiles:
3815 3827 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3816 3828 if not self._inline:
3817 3829 d[b'exclusivefiles'].append((self.opener, self._datafile))
3818 3830
3819 3831 if sharedfiles:
3820 3832 d[b'sharedfiles'] = []
3821 3833
3822 3834 if revisionscount:
3823 3835 d[b'revisionscount'] = len(self)
3824 3836
3825 3837 if trackedsize:
3826 3838 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3827 3839
3828 3840 if storedsize:
3829 3841 d[b'storedsize'] = sum(
3830 3842 self.opener.stat(path).st_size for path in self.files()
3831 3843 )
3832 3844
3833 3845 return d
3834 3846
3835 3847 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3836 3848 if not self.feature_config.has_side_data:
3837 3849 return
3838 3850 # revlog formats with sidedata support does not support inline
3839 3851 assert not self._inline
3840 3852 if not helpers[1] and not helpers[2]:
3841 3853 # Nothing to generate or remove
3842 3854 return
3843 3855
3844 3856 new_entries = []
3845 3857 # append the new sidedata
3846 3858 with self._writing(transaction):
3847 3859 ifh, dfh, sdfh = self._inner._writinghandles
3848 3860 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3849 3861
3850 3862 current_offset = sdfh.tell()
3851 3863 for rev in range(startrev, endrev + 1):
3852 3864 entry = self.index[rev]
3853 3865 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3854 3866 store=self,
3855 3867 sidedata_helpers=helpers,
3856 3868 sidedata={},
3857 3869 rev=rev,
3858 3870 )
3859 3871
3860 3872 serialized_sidedata = sidedatautil.serialize_sidedata(
3861 3873 new_sidedata
3862 3874 )
3863 3875
3864 3876 sidedata_compression_mode = COMP_MODE_INLINE
3865 3877 if serialized_sidedata and self.feature_config.has_side_data:
3866 3878 sidedata_compression_mode = COMP_MODE_PLAIN
3867 3879 h, comp_sidedata = self.compress(serialized_sidedata)
3868 3880 if (
3869 3881 h != b'u'
3870 3882 and comp_sidedata[0] != b'\0'
3871 3883 and len(comp_sidedata) < len(serialized_sidedata)
3872 3884 ):
3873 3885 assert not h
3874 3886 if (
3875 3887 comp_sidedata[0]
3876 3888 == self._docket.default_compression_header
3877 3889 ):
3878 3890 sidedata_compression_mode = COMP_MODE_DEFAULT
3879 3891 serialized_sidedata = comp_sidedata
3880 3892 else:
3881 3893 sidedata_compression_mode = COMP_MODE_INLINE
3882 3894 serialized_sidedata = comp_sidedata
3883 3895 if entry[8] != 0 or entry[9] != 0:
3884 3896 # rewriting entries that already have sidedata is not
3885 3897 # supported yet, because it introduces garbage data in the
3886 3898 # revlog.
3887 3899 msg = b"rewriting existing sidedata is not supported yet"
3888 3900 raise error.Abort(msg)
3889 3901
3890 3902 # Apply (potential) flags to add and to remove after running
3891 3903 # the sidedata helpers
3892 3904 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3893 3905 entry_update = (
3894 3906 current_offset,
3895 3907 len(serialized_sidedata),
3896 3908 new_offset_flags,
3897 3909 sidedata_compression_mode,
3898 3910 )
3899 3911
3900 3912 # the sidedata computation might have move the file cursors around
3901 3913 sdfh.seek(current_offset, os.SEEK_SET)
3902 3914 sdfh.write(serialized_sidedata)
3903 3915 new_entries.append(entry_update)
3904 3916 current_offset += len(serialized_sidedata)
3905 3917 self._docket.sidedata_end = sdfh.tell()
3906 3918
3907 3919 # rewrite the new index entries
3908 3920 ifh.seek(startrev * self.index.entry_size)
3909 3921 for i, e in enumerate(new_entries):
3910 3922 rev = startrev + i
3911 3923 self.index.replace_sidedata_info(rev, *e)
3912 3924 packed = self.index.entry_binary(rev)
3913 3925 if rev == 0 and self._docket is None:
3914 3926 header = self._format_flags | self._format_version
3915 3927 header = self.index.pack_header(header)
3916 3928 packed = header + packed
3917 3929 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now