##// END OF EJS Templates
revlog: document the `clearcaches` method...
marmoute -
r51899:6c3798b4 default
parent child Browse files
Show More
@@ -1,3550 +1,3551 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class revlog:
245 245 """
246 246 the underlying revision storage object
247 247
248 248 A revlog consists of two parts, an index and the revision data.
249 249
250 250 The index is a file with a fixed record size containing
251 251 information on each revision, including its nodeid (hash), the
252 252 nodeids of its parents, the position and offset of its data within
253 253 the data file, and the revision it's based on. Finally, each entry
254 254 contains a linkrev entry that can serve as a pointer to external
255 255 data.
256 256
257 257 The revision data itself is a linear collection of data chunks.
258 258 Each chunk represents a revision and is usually represented as a
259 259 delta against the previous chunk. To bound lookup time, runs of
260 260 deltas are limited to about 2 times the length of the original
261 261 version data. This makes retrieval of a version proportional to
262 262 its size, or O(1) relative to the number of revisions.
263 263
264 264 Both pieces of the revlog are written to in an append-only
265 265 fashion, which means we never need to rewrite a file to insert or
266 266 remove data, and can use some simple techniques to avoid the need
267 267 for locking while reading.
268 268
269 269 If checkambig, indexfile is opened with checkambig=True at
270 270 writing, to avoid file stat ambiguity.
271 271
272 272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 273 index will be mmapped rather than read if it is larger than the
274 274 configured threshold.
275 275
276 276 If censorable is True, the revlog can have censored revisions.
277 277
278 278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 279 compression for the data content.
280 280
281 281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 282 file handle, a filename, and an expected position. It should check whether
283 283 the current position in the file handle is valid, and log/warn/fail (by
284 284 raising).
285 285
286 286 See mercurial/revlogutils/contants.py for details about the content of an
287 287 index entry.
288 288 """
289 289
290 290 _flagserrorclass = error.RevlogError
291 291
292 292 @staticmethod
293 293 def is_inline_index(header_bytes):
294 294 """Determine if a revlog is inline from the initial bytes of the index"""
295 295 header = INDEX_HEADER.unpack(header_bytes)[0]
296 296
297 297 _format_flags = header & ~0xFFFF
298 298 _format_version = header & 0xFFFF
299 299
300 300 features = FEATURES_BY_VERSION[_format_version]
301 301 return features[b'inline'](_format_flags)
302 302
303 303 def __init__(
304 304 self,
305 305 opener,
306 306 target,
307 307 radix,
308 308 postfix=None, # only exist for `tmpcensored` now
309 309 checkambig=False,
310 310 mmaplargeindex=False,
311 311 censorable=False,
312 312 upperboundcomp=None,
313 313 persistentnodemap=False,
314 314 concurrencychecker=None,
315 315 trypending=False,
316 316 try_split=False,
317 317 canonical_parent_order=True,
318 318 ):
319 319 """
320 320 create a revlog object
321 321
322 322 opener is a function that abstracts the file opening operation
323 323 and can be used to implement COW semantics or the like.
324 324
325 325 `target`: a (KIND, ID) tuple that identify the content stored in
326 326 this revlog. It help the rest of the code to understand what the revlog
327 327 is about without having to resort to heuristic and index filename
328 328 analysis. Note: that this must be reliably be set by normal code, but
329 329 that test, debug, or performance measurement code might not set this to
330 330 accurate value.
331 331 """
332 332 self.upperboundcomp = upperboundcomp
333 333
334 334 self.radix = radix
335 335
336 336 self._docket_file = None
337 337 self._indexfile = None
338 338 self._datafile = None
339 339 self._sidedatafile = None
340 340 self._nodemap_file = None
341 341 self.postfix = postfix
342 342 self._trypending = trypending
343 343 self._try_split = try_split
344 344 self.opener = opener
345 345 if persistentnodemap:
346 346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347 347
348 348 assert target[0] in ALL_KINDS
349 349 assert len(target) == 2
350 350 self.target = target
351 351 # When True, indexfile is opened with checkambig=True at writing, to
352 352 # avoid file stat ambiguity.
353 353 self._checkambig = checkambig
354 354 self._mmaplargeindex = mmaplargeindex
355 355 self._censorable = censorable
356 356 # 3-tuple of (node, rev, text) for a raw revision.
357 357 self._revisioncache = None
358 358 # Maps rev to chain base rev.
359 359 self._chainbasecache = util.lrucachedict(100)
360 360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 361 self._chunkcache = (0, b'')
362 362 # How much data to read and cache into the raw revlog data cache.
363 363 self._chunkcachesize = 65536
364 364 self._maxchainlen = None
365 365 self._deltabothparents = True
366 366 self._candidate_group_chunk_size = 0
367 367 self._debug_delta = False
368 368 self.index = None
369 369 self._docket = None
370 370 self._nodemap_docket = None
371 371 # Mapping of partial identifiers to full nodes.
372 372 self._pcache = {}
373 373 # Mapping of revision integer to full node.
374 374 self._compengine = b'zlib'
375 375 self._compengineopts = {}
376 376 self._maxdeltachainspan = -1
377 377 self._withsparseread = False
378 378 self._sparserevlog = False
379 379 self.hassidedata = False
380 380 self._srdensitythreshold = 0.50
381 381 self._srmingapsize = 262144
382 382
383 383 # other optionnals features
384 384
385 385 # might remove rank configuration once the computation has no impact
386 386 self._compute_rank = False
387 387
388 388 # Make copy of flag processors so each revlog instance can support
389 389 # custom flags.
390 390 self._flagprocessors = dict(flagutil.flagprocessors)
391 391
392 392 # 3-tuple of file handles being used for active writing.
393 393 self._writinghandles = None
394 394 # prevent nesting of addgroup
395 395 self._adding_group = None
396 396
397 397 self._loadindex()
398 398
399 399 self._concurrencychecker = concurrencychecker
400 400
401 401 # parent order is supposed to be semantically irrelevant, so we
402 402 # normally resort parents to ensure that the first parent is non-null,
403 403 # if there is a non-null parent at all.
404 404 # filelog abuses the parent order as flag to mark some instances of
405 405 # meta-encoded files, so allow it to disable this behavior.
406 406 self.canonical_parent_order = canonical_parent_order
407 407
408 408 def _init_opts(self):
409 409 """process options (from above/config) to setup associated default revlog mode
410 410
411 411 These values might be affected when actually reading on disk information.
412 412
413 413 The relevant values are returned for use in _loadindex().
414 414
415 415 * newversionflags:
416 416 version header to use if we need to create a new revlog
417 417
418 418 * mmapindexthreshold:
419 419 minimal index size for start to use mmap
420 420
421 421 * force_nodemap:
422 422 force the usage of a "development" version of the nodemap code
423 423 """
424 424 mmapindexthreshold = None
425 425 opts = self.opener.options
426 426
427 427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 428 new_header = CHANGELOGV2
429 429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 430 elif b'revlogv2' in opts:
431 431 new_header = REVLOGV2
432 432 elif b'revlogv1' in opts:
433 433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 434 if b'generaldelta' in opts:
435 435 new_header |= FLAG_GENERALDELTA
436 436 elif b'revlogv0' in self.opener.options:
437 437 new_header = REVLOGV0
438 438 else:
439 439 new_header = REVLOG_DEFAULT_VERSION
440 440
441 441 if b'chunkcachesize' in opts:
442 442 self._chunkcachesize = opts[b'chunkcachesize']
443 443 if b'maxchainlen' in opts:
444 444 self._maxchainlen = opts[b'maxchainlen']
445 445 if b'deltabothparents' in opts:
446 446 self._deltabothparents = opts[b'deltabothparents']
447 447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 448 if dps_cgds:
449 449 self._candidate_group_chunk_size = dps_cgds
450 450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 451 self._lazydeltabase = False
452 452 if self._lazydelta:
453 453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 454 if b'debug-delta' in opts:
455 455 self._debug_delta = opts[b'debug-delta']
456 456 if b'compengine' in opts:
457 457 self._compengine = opts[b'compengine']
458 458 if b'zlib.level' in opts:
459 459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 460 if b'zstd.level' in opts:
461 461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 462 if b'maxdeltachainspan' in opts:
463 463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 468 # sparse-revlog forces sparse-read
469 469 self._withsparseread = self._sparserevlog or withsparseread
470 470 if b'sparse-read-density-threshold' in opts:
471 471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 472 if b'sparse-read-min-gap-size' in opts:
473 473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 474 if opts.get(b'enableellipsis'):
475 475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476 476
477 477 # revlog v0 doesn't have flag processors
478 478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480 480
481 481 if self._chunkcachesize <= 0:
482 482 raise error.RevlogError(
483 483 _(b'revlog chunk cache size %r is not greater than 0')
484 484 % self._chunkcachesize
485 485 )
486 486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 487 raise error.RevlogError(
488 488 _(b'revlog chunk cache size %r is not a power of 2')
489 489 % self._chunkcachesize
490 490 )
491 491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 492 return new_header, mmapindexthreshold, force_nodemap
493 493
494 494 def _get_data(self, filepath, mmap_threshold, size=None):
495 495 """return a file content with or without mmap
496 496
497 497 If the file is missing return the empty string"""
498 498 try:
499 499 with self.opener(filepath) as fp:
500 500 if mmap_threshold is not None:
501 501 file_size = self.opener.fstat(fp).st_size
502 502 if file_size >= mmap_threshold:
503 503 if size is not None:
504 504 # avoid potentiel mmap crash
505 505 size = min(file_size, size)
506 506 # TODO: should .close() to release resources without
507 507 # relying on Python GC
508 508 if size is None:
509 509 return util.buffer(util.mmapread(fp))
510 510 else:
511 511 return util.buffer(util.mmapread(fp, size))
512 512 if size is None:
513 513 return fp.read()
514 514 else:
515 515 return fp.read(size)
516 516 except FileNotFoundError:
517 517 return b''
518 518
519 519 def get_streams(self, max_linkrev, force_inline=False):
520 520 """return a list of streams that represent this revlog
521 521
522 522 This is used by stream-clone to do bytes to bytes copies of a repository.
523 523
524 524 This streams data for all revisions that refer to a changelog revision up
525 525 to `max_linkrev`.
526 526
527 527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528 528
529 529 It returns is a list of three-tuple:
530 530
531 531 [
532 532 (filename, bytes_stream, stream_size),
533 533 …
534 534 ]
535 535 """
536 536 n = len(self)
537 537 index = self.index
538 538 while n > 0:
539 539 linkrev = index[n - 1][4]
540 540 if linkrev < max_linkrev:
541 541 break
542 542 # note: this loop will rarely go through multiple iterations, since
543 543 # it only traverses commits created during the current streaming
544 544 # pull operation.
545 545 #
546 546 # If this become a problem, using a binary search should cap the
547 547 # runtime of this.
548 548 n = n - 1
549 549 if n == 0:
550 550 # no data to send
551 551 return []
552 552 index_size = n * index.entry_size
553 553 data_size = self.end(n - 1)
554 554
555 555 # XXX we might have been split (or stripped) since the object
556 556 # initialization, We need to close this race too, but having a way to
557 557 # pre-open the file we feed to the revlog and never closing them before
558 558 # we are done streaming.
559 559
560 560 if self._inline:
561 561
562 562 def get_stream():
563 563 with self._indexfp() as fp:
564 564 yield None
565 565 size = index_size + data_size
566 566 if size <= 65536:
567 567 yield fp.read(size)
568 568 else:
569 569 yield from util.filechunkiter(fp, limit=size)
570 570
571 571 inline_stream = get_stream()
572 572 next(inline_stream)
573 573 return [
574 574 (self._indexfile, inline_stream, index_size + data_size),
575 575 ]
576 576 elif force_inline:
577 577
578 578 def get_stream():
579 579 with self._datafp() as fp_d:
580 580 yield None
581 581
582 582 for rev in range(n):
583 583 idx = self.index.entry_binary(rev)
584 584 if rev == 0 and self._docket is None:
585 585 # re-inject the inline flag
586 586 header = self._format_flags
587 587 header |= self._format_version
588 588 header |= FLAG_INLINE_DATA
589 589 header = self.index.pack_header(header)
590 590 idx = header + idx
591 591 yield idx
592 592 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
593 593
594 594 inline_stream = get_stream()
595 595 next(inline_stream)
596 596 return [
597 597 (self._indexfile, inline_stream, index_size + data_size),
598 598 ]
599 599 else:
600 600
601 601 def get_index_stream():
602 602 with self._indexfp() as fp:
603 603 yield None
604 604 if index_size <= 65536:
605 605 yield fp.read(index_size)
606 606 else:
607 607 yield from util.filechunkiter(fp, limit=index_size)
608 608
609 609 def get_data_stream():
610 610 with self._datafp() as fp:
611 611 yield None
612 612 if data_size <= 65536:
613 613 yield fp.read(data_size)
614 614 else:
615 615 yield from util.filechunkiter(fp, limit=data_size)
616 616
617 617 index_stream = get_index_stream()
618 618 next(index_stream)
619 619 data_stream = get_data_stream()
620 620 next(data_stream)
621 621 return [
622 622 (self._datafile, data_stream, data_size),
623 623 (self._indexfile, index_stream, index_size),
624 624 ]
625 625
626 626 def _loadindex(self, docket=None):
627 627
628 628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629 629
630 630 if self.postfix is not None:
631 631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 633 entry_point = b'%s.i.a' % self.radix
634 634 elif self._try_split and self.opener.exists(self._split_index_file):
635 635 entry_point = self._split_index_file
636 636 else:
637 637 entry_point = b'%s.i' % self.radix
638 638
639 639 if docket is not None:
640 640 self._docket = docket
641 641 self._docket_file = entry_point
642 642 else:
643 643 self._initempty = True
644 644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 645 if len(entry_data) > 0:
646 646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 647 self._initempty = False
648 648 else:
649 649 header = new_header
650 650
651 651 self._format_flags = header & ~0xFFFF
652 652 self._format_version = header & 0xFFFF
653 653
654 654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 655 if supported_flags is None:
656 656 msg = _(b'unknown version (%d) in revlog %s')
657 657 msg %= (self._format_version, self.display_id)
658 658 raise error.RevlogError(msg)
659 659 elif self._format_flags & ~supported_flags:
660 660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 661 display_flag = self._format_flags >> 16
662 662 msg %= (display_flag, self._format_version, self.display_id)
663 663 raise error.RevlogError(msg)
664 664
665 665 features = FEATURES_BY_VERSION[self._format_version]
666 666 self._inline = features[b'inline'](self._format_flags)
667 667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 668 self.hassidedata = features[b'sidedata']
669 669
670 670 if not features[b'docket']:
671 671 self._indexfile = entry_point
672 672 index_data = entry_data
673 673 else:
674 674 self._docket_file = entry_point
675 675 if self._initempty:
676 676 self._docket = docketutil.default_docket(self, header)
677 677 else:
678 678 self._docket = docketutil.parse_docket(
679 679 self, entry_data, use_pending=self._trypending
680 680 )
681 681
682 682 if self._docket is not None:
683 683 self._indexfile = self._docket.index_filepath()
684 684 index_data = b''
685 685 index_size = self._docket.index_end
686 686 if index_size > 0:
687 687 index_data = self._get_data(
688 688 self._indexfile, mmapindexthreshold, size=index_size
689 689 )
690 690 if len(index_data) < index_size:
691 691 msg = _(b'too few index data for %s: got %d, expected %d')
692 692 msg %= (self.display_id, len(index_data), index_size)
693 693 raise error.RevlogError(msg)
694 694
695 695 self._inline = False
696 696 # generaldelta implied by version 2 revlogs.
697 697 self._generaldelta = True
698 698 # the logic for persistent nodemap will be dealt with within the
699 699 # main docket, so disable it for now.
700 700 self._nodemap_file = None
701 701
702 702 if self._docket is not None:
703 703 self._datafile = self._docket.data_filepath()
704 704 self._sidedatafile = self._docket.sidedata_filepath()
705 705 elif self.postfix is None:
706 706 self._datafile = b'%s.d' % self.radix
707 707 else:
708 708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709 709
710 710 self.nodeconstants = sha1nodeconstants
711 711 self.nullid = self.nodeconstants.nullid
712 712
713 713 # sparse-revlog can't be on without general-delta (issue6056)
714 714 if not self._generaldelta:
715 715 self._sparserevlog = False
716 716
717 717 self._storedeltachains = True
718 718
719 719 devel_nodemap = (
720 720 self._nodemap_file
721 721 and force_nodemap
722 722 and parse_index_v1_nodemap is not None
723 723 )
724 724
725 725 use_rust_index = False
726 726 if rustrevlog is not None:
727 727 if self._nodemap_file is not None:
728 728 use_rust_index = True
729 729 else:
730 730 use_rust_index = self.opener.options.get(b'rust.index')
731 731
732 732 self._parse_index = parse_index_v1
733 733 if self._format_version == REVLOGV0:
734 734 self._parse_index = revlogv0.parse_index_v0
735 735 elif self._format_version == REVLOGV2:
736 736 self._parse_index = parse_index_v2
737 737 elif self._format_version == CHANGELOGV2:
738 738 self._parse_index = parse_index_cl_v2
739 739 elif devel_nodemap:
740 740 self._parse_index = parse_index_v1_nodemap
741 741 elif use_rust_index:
742 742 self._parse_index = parse_index_v1_mixed
743 743 try:
744 744 d = self._parse_index(index_data, self._inline)
745 745 index, chunkcache = d
746 746 use_nodemap = (
747 747 not self._inline
748 748 and self._nodemap_file is not None
749 749 and hasattr(index, 'update_nodemap_data')
750 750 )
751 751 if use_nodemap:
752 752 nodemap_data = nodemaputil.persisted_data(self)
753 753 if nodemap_data is not None:
754 754 docket = nodemap_data[0]
755 755 if (
756 756 len(d[0]) > docket.tip_rev
757 757 and d[0][docket.tip_rev][7] == docket.tip_node
758 758 ):
759 759 # no changelog tampering
760 760 self._nodemap_docket = docket
761 761 index.update_nodemap_data(*nodemap_data)
762 762 except (ValueError, IndexError):
763 763 raise error.RevlogError(
764 764 _(b"index %s is corrupted") % self.display_id
765 765 )
766 766 self.index = index
767 767 self._segmentfile = randomaccessfile.randomaccessfile(
768 768 self.opener,
769 769 (self._indexfile if self._inline else self._datafile),
770 770 self._chunkcachesize,
771 771 chunkcache,
772 772 )
773 773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 774 self.opener,
775 775 self._sidedatafile,
776 776 self._chunkcachesize,
777 777 )
778 778 # revnum -> (chain-length, sum-delta-length)
779 779 self._chaininfocache = util.lrucachedict(500)
780 780 # revlog header -> revlog compressor
781 781 self._decompressors = {}
782 782
783 783 def get_revlog(self):
784 784 """simple function to mirror API of other not-really-revlog API"""
785 785 return self
786 786
787 787 @util.propertycache
788 788 def revlog_kind(self):
789 789 return self.target[0]
790 790
791 791 @util.propertycache
792 792 def display_id(self):
793 793 """The public facing "ID" of the revlog that we use in message"""
794 794 if self.revlog_kind == KIND_FILELOG:
795 795 # Reference the file without the "data/" prefix, so it is familiar
796 796 # to the user.
797 797 return self.target[1]
798 798 else:
799 799 return self.radix
800 800
801 801 def _get_decompressor(self, t):
802 802 try:
803 803 compressor = self._decompressors[t]
804 804 except KeyError:
805 805 try:
806 806 engine = util.compengines.forrevlogheader(t)
807 807 compressor = engine.revlogcompressor(self._compengineopts)
808 808 self._decompressors[t] = compressor
809 809 except KeyError:
810 810 raise error.RevlogError(
811 811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 812 )
813 813 return compressor
814 814
815 815 @util.propertycache
816 816 def _compressor(self):
817 817 engine = util.compengines[self._compengine]
818 818 return engine.revlogcompressor(self._compengineopts)
819 819
820 820 @util.propertycache
821 821 def _decompressor(self):
822 822 """the default decompressor"""
823 823 if self._docket is None:
824 824 return None
825 825 t = self._docket.default_compression_header
826 826 c = self._get_decompressor(t)
827 827 return c.decompress
828 828
829 829 def _indexfp(self):
830 830 """file object for the revlog's index file"""
831 831 return self.opener(self._indexfile, mode=b"r")
832 832
833 833 def __index_write_fp(self):
834 834 # You should not use this directly and use `_writing` instead
835 835 try:
836 836 f = self.opener(
837 837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 838 )
839 839 if self._docket is None:
840 840 f.seek(0, os.SEEK_END)
841 841 else:
842 842 f.seek(self._docket.index_end, os.SEEK_SET)
843 843 return f
844 844 except FileNotFoundError:
845 845 return self.opener(
846 846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 847 )
848 848
849 849 def __index_new_fp(self):
850 850 # You should not use this unless you are upgrading from inline revlog
851 851 return self.opener(
852 852 self._indexfile,
853 853 mode=b"w",
854 854 checkambig=self._checkambig,
855 855 atomictemp=True,
856 856 )
857 857
858 858 def _datafp(self, mode=b'r'):
859 859 """file object for the revlog's data file"""
860 860 return self.opener(self._datafile, mode=mode)
861 861
862 862 @contextlib.contextmanager
863 863 def _sidedatareadfp(self):
864 864 """file object suitable to read sidedata"""
865 865 if self._writinghandles:
866 866 yield self._writinghandles[2]
867 867 else:
868 868 with self.opener(self._sidedatafile) as fp:
869 869 yield fp
870 870
871 871 def tiprev(self):
872 872 return len(self.index) - 1
873 873
874 874 def tip(self):
875 875 return self.node(self.tiprev())
876 876
877 877 def __contains__(self, rev):
878 878 return 0 <= rev < len(self)
879 879
880 880 def __len__(self):
881 881 return len(self.index)
882 882
883 883 def __iter__(self):
884 884 return iter(range(len(self)))
885 885
886 886 def revs(self, start=0, stop=None):
887 887 """iterate over all rev in this revlog (from start to stop)"""
888 888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889 889
890 890 def hasnode(self, node):
891 891 try:
892 892 self.rev(node)
893 893 return True
894 894 except KeyError:
895 895 return False
896 896
897 897 def _candelta(self, baserev, rev):
898 898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 899 # Disable delta if either rev requires a content-changing flag
900 900 # processor (ex. LFS). This is because such flag processor can alter
901 901 # the rawtext content that the delta will be based on, and two clients
902 902 # could have a same revlog node with different flags (i.e. different
903 903 # rawtext contents) and the delta could be incompatible.
904 904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 906 ):
907 907 return False
908 908 return True
909 909
910 910 def update_caches(self, transaction):
911 911 """update on disk cache
912 912
913 913 If a transaction is passed, the update may be delayed to transaction
914 914 commit."""
915 915 if self._nodemap_file is not None:
916 916 if transaction is None:
917 917 nodemaputil.update_persistent_nodemap(self)
918 918 else:
919 919 nodemaputil.setup_persistent_nodemap(transaction, self)
920 920
921 921 def clearcaches(self):
922 """Clear in-memory caches"""
922 923 self._revisioncache = None
923 924 self._chainbasecache.clear()
924 925 self._segmentfile.clear_cache()
925 926 self._segmentfile_sidedata.clear_cache()
926 927 self._pcache = {}
927 928 self._nodemap_docket = None
928 929 self.index.clearcaches()
929 930 # The python code is the one responsible for validating the docket, we
930 931 # end up having to refresh it here.
931 932 use_nodemap = (
932 933 not self._inline
933 934 and self._nodemap_file is not None
934 935 and hasattr(self.index, 'update_nodemap_data')
935 936 )
936 937 if use_nodemap:
937 938 nodemap_data = nodemaputil.persisted_data(self)
938 939 if nodemap_data is not None:
939 940 self._nodemap_docket = nodemap_data[0]
940 941 self.index.update_nodemap_data(*nodemap_data)
941 942
942 943 def rev(self, node):
943 944 try:
944 945 return self.index.rev(node)
945 946 except TypeError:
946 947 raise
947 948 except error.RevlogError:
948 949 # parsers.c radix tree lookup failed
949 950 if (
950 951 node == self.nodeconstants.wdirid
951 952 or node in self.nodeconstants.wdirfilenodeids
952 953 ):
953 954 raise error.WdirUnsupported
954 955 raise error.LookupError(node, self.display_id, _(b'no node'))
955 956
956 957 # Accessors for index entries.
957 958
958 959 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
959 960 # are flags.
960 961 def start(self, rev):
961 962 return int(self.index[rev][0] >> 16)
962 963
963 964 def sidedata_cut_off(self, rev):
964 965 sd_cut_off = self.index[rev][8]
965 966 if sd_cut_off != 0:
966 967 return sd_cut_off
967 968 # This is some annoying dance, because entries without sidedata
968 969 # currently use 0 as their ofsset. (instead of previous-offset +
969 970 # previous-size)
970 971 #
971 972 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
972 973 # In the meantime, we need this.
973 974 while 0 <= rev:
974 975 e = self.index[rev]
975 976 if e[9] != 0:
976 977 return e[8] + e[9]
977 978 rev -= 1
978 979 return 0
979 980
980 981 def flags(self, rev):
981 982 return self.index[rev][0] & 0xFFFF
982 983
983 984 def length(self, rev):
984 985 return self.index[rev][1]
985 986
986 987 def sidedata_length(self, rev):
987 988 if not self.hassidedata:
988 989 return 0
989 990 return self.index[rev][9]
990 991
991 992 def rawsize(self, rev):
992 993 """return the length of the uncompressed text for a given revision"""
993 994 l = self.index[rev][2]
994 995 if l >= 0:
995 996 return l
996 997
997 998 t = self.rawdata(rev)
998 999 return len(t)
999 1000
1000 1001 def size(self, rev):
1001 1002 """length of non-raw text (processed by a "read" flag processor)"""
1002 1003 # fast path: if no "read" flag processor could change the content,
1003 1004 # size is rawsize. note: ELLIPSIS is known to not change the content.
1004 1005 flags = self.flags(rev)
1005 1006 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1006 1007 return self.rawsize(rev)
1007 1008
1008 1009 return len(self.revision(rev))
1009 1010
1010 1011 def fast_rank(self, rev):
1011 1012 """Return the rank of a revision if already known, or None otherwise.
1012 1013
1013 1014 The rank of a revision is the size of the sub-graph it defines as a
1014 1015 head. Equivalently, the rank of a revision `r` is the size of the set
1015 1016 `ancestors(r)`, `r` included.
1016 1017
1017 1018 This method returns the rank retrieved from the revlog in constant
1018 1019 time. It makes no attempt at computing unknown values for versions of
1019 1020 the revlog which do not persist the rank.
1020 1021 """
1021 1022 rank = self.index[rev][ENTRY_RANK]
1022 1023 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1023 1024 return None
1024 1025 if rev == nullrev:
1025 1026 return 0 # convention
1026 1027 return rank
1027 1028
1028 1029 def chainbase(self, rev):
1029 1030 base = self._chainbasecache.get(rev)
1030 1031 if base is not None:
1031 1032 return base
1032 1033
1033 1034 index = self.index
1034 1035 iterrev = rev
1035 1036 base = index[iterrev][3]
1036 1037 while base != iterrev:
1037 1038 iterrev = base
1038 1039 base = index[iterrev][3]
1039 1040
1040 1041 self._chainbasecache[rev] = base
1041 1042 return base
1042 1043
1043 1044 def linkrev(self, rev):
1044 1045 return self.index[rev][4]
1045 1046
1046 1047 def parentrevs(self, rev):
1047 1048 try:
1048 1049 entry = self.index[rev]
1049 1050 except IndexError:
1050 1051 if rev == wdirrev:
1051 1052 raise error.WdirUnsupported
1052 1053 raise
1053 1054
1054 1055 if self.canonical_parent_order and entry[5] == nullrev:
1055 1056 return entry[6], entry[5]
1056 1057 else:
1057 1058 return entry[5], entry[6]
1058 1059
1059 1060 # fast parentrevs(rev) where rev isn't filtered
1060 1061 _uncheckedparentrevs = parentrevs
1061 1062
1062 1063 def node(self, rev):
1063 1064 try:
1064 1065 return self.index[rev][7]
1065 1066 except IndexError:
1066 1067 if rev == wdirrev:
1067 1068 raise error.WdirUnsupported
1068 1069 raise
1069 1070
1070 1071 # Derived from index values.
1071 1072
1072 1073 def end(self, rev):
1073 1074 return self.start(rev) + self.length(rev)
1074 1075
1075 1076 def parents(self, node):
1076 1077 i = self.index
1077 1078 d = i[self.rev(node)]
1078 1079 # inline node() to avoid function call overhead
1079 1080 if self.canonical_parent_order and d[5] == self.nullid:
1080 1081 return i[d[6]][7], i[d[5]][7]
1081 1082 else:
1082 1083 return i[d[5]][7], i[d[6]][7]
1083 1084
1084 1085 def chainlen(self, rev):
1085 1086 return self._chaininfo(rev)[0]
1086 1087
1087 1088 def _chaininfo(self, rev):
1088 1089 chaininfocache = self._chaininfocache
1089 1090 if rev in chaininfocache:
1090 1091 return chaininfocache[rev]
1091 1092 index = self.index
1092 1093 generaldelta = self._generaldelta
1093 1094 iterrev = rev
1094 1095 e = index[iterrev]
1095 1096 clen = 0
1096 1097 compresseddeltalen = 0
1097 1098 while iterrev != e[3]:
1098 1099 clen += 1
1099 1100 compresseddeltalen += e[1]
1100 1101 if generaldelta:
1101 1102 iterrev = e[3]
1102 1103 else:
1103 1104 iterrev -= 1
1104 1105 if iterrev in chaininfocache:
1105 1106 t = chaininfocache[iterrev]
1106 1107 clen += t[0]
1107 1108 compresseddeltalen += t[1]
1108 1109 break
1109 1110 e = index[iterrev]
1110 1111 else:
1111 1112 # Add text length of base since decompressing that also takes
1112 1113 # work. For cache hits the length is already included.
1113 1114 compresseddeltalen += e[1]
1114 1115 r = (clen, compresseddeltalen)
1115 1116 chaininfocache[rev] = r
1116 1117 return r
1117 1118
1118 1119 def _deltachain(self, rev, stoprev=None):
1119 1120 """Obtain the delta chain for a revision.
1120 1121
1121 1122 ``stoprev`` specifies a revision to stop at. If not specified, we
1122 1123 stop at the base of the chain.
1123 1124
1124 1125 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1125 1126 revs in ascending order and ``stopped`` is a bool indicating whether
1126 1127 ``stoprev`` was hit.
1127 1128 """
1128 1129 # Try C implementation.
1129 1130 try:
1130 1131 return self.index.deltachain(rev, stoprev, self._generaldelta)
1131 1132 except AttributeError:
1132 1133 pass
1133 1134
1134 1135 chain = []
1135 1136
1136 1137 # Alias to prevent attribute lookup in tight loop.
1137 1138 index = self.index
1138 1139 generaldelta = self._generaldelta
1139 1140
1140 1141 iterrev = rev
1141 1142 e = index[iterrev]
1142 1143 while iterrev != e[3] and iterrev != stoprev:
1143 1144 chain.append(iterrev)
1144 1145 if generaldelta:
1145 1146 iterrev = e[3]
1146 1147 else:
1147 1148 iterrev -= 1
1148 1149 e = index[iterrev]
1149 1150
1150 1151 if iterrev == stoprev:
1151 1152 stopped = True
1152 1153 else:
1153 1154 chain.append(iterrev)
1154 1155 stopped = False
1155 1156
1156 1157 chain.reverse()
1157 1158 return chain, stopped
1158 1159
1159 1160 def ancestors(self, revs, stoprev=0, inclusive=False):
1160 1161 """Generate the ancestors of 'revs' in reverse revision order.
1161 1162 Does not generate revs lower than stoprev.
1162 1163
1163 1164 See the documentation for ancestor.lazyancestors for more details."""
1164 1165
1165 1166 # first, make sure start revisions aren't filtered
1166 1167 revs = list(revs)
1167 1168 checkrev = self.node
1168 1169 for r in revs:
1169 1170 checkrev(r)
1170 1171 # and we're sure ancestors aren't filtered as well
1171 1172
1172 1173 if rustancestor is not None and self.index.rust_ext_compat:
1173 1174 lazyancestors = rustancestor.LazyAncestors
1174 1175 arg = self.index
1175 1176 else:
1176 1177 lazyancestors = ancestor.lazyancestors
1177 1178 arg = self._uncheckedparentrevs
1178 1179 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1179 1180
1180 1181 def descendants(self, revs):
1181 1182 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1182 1183
1183 1184 def findcommonmissing(self, common=None, heads=None):
1184 1185 """Return a tuple of the ancestors of common and the ancestors of heads
1185 1186 that are not ancestors of common. In revset terminology, we return the
1186 1187 tuple:
1187 1188
1188 1189 ::common, (::heads) - (::common)
1189 1190
1190 1191 The list is sorted by revision number, meaning it is
1191 1192 topologically sorted.
1192 1193
1193 1194 'heads' and 'common' are both lists of node IDs. If heads is
1194 1195 not supplied, uses all of the revlog's heads. If common is not
1195 1196 supplied, uses nullid."""
1196 1197 if common is None:
1197 1198 common = [self.nullid]
1198 1199 if heads is None:
1199 1200 heads = self.heads()
1200 1201
1201 1202 common = [self.rev(n) for n in common]
1202 1203 heads = [self.rev(n) for n in heads]
1203 1204
1204 1205 # we want the ancestors, but inclusive
1205 1206 class lazyset:
1206 1207 def __init__(self, lazyvalues):
1207 1208 self.addedvalues = set()
1208 1209 self.lazyvalues = lazyvalues
1209 1210
1210 1211 def __contains__(self, value):
1211 1212 return value in self.addedvalues or value in self.lazyvalues
1212 1213
1213 1214 def __iter__(self):
1214 1215 added = self.addedvalues
1215 1216 for r in added:
1216 1217 yield r
1217 1218 for r in self.lazyvalues:
1218 1219 if not r in added:
1219 1220 yield r
1220 1221
1221 1222 def add(self, value):
1222 1223 self.addedvalues.add(value)
1223 1224
1224 1225 def update(self, values):
1225 1226 self.addedvalues.update(values)
1226 1227
1227 1228 has = lazyset(self.ancestors(common))
1228 1229 has.add(nullrev)
1229 1230 has.update(common)
1230 1231
1231 1232 # take all ancestors from heads that aren't in has
1232 1233 missing = set()
1233 1234 visit = collections.deque(r for r in heads if r not in has)
1234 1235 while visit:
1235 1236 r = visit.popleft()
1236 1237 if r in missing:
1237 1238 continue
1238 1239 else:
1239 1240 missing.add(r)
1240 1241 for p in self.parentrevs(r):
1241 1242 if p not in has:
1242 1243 visit.append(p)
1243 1244 missing = list(missing)
1244 1245 missing.sort()
1245 1246 return has, [self.node(miss) for miss in missing]
1246 1247
1247 1248 def incrementalmissingrevs(self, common=None):
1248 1249 """Return an object that can be used to incrementally compute the
1249 1250 revision numbers of the ancestors of arbitrary sets that are not
1250 1251 ancestors of common. This is an ancestor.incrementalmissingancestors
1251 1252 object.
1252 1253
1253 1254 'common' is a list of revision numbers. If common is not supplied, uses
1254 1255 nullrev.
1255 1256 """
1256 1257 if common is None:
1257 1258 common = [nullrev]
1258 1259
1259 1260 if rustancestor is not None and self.index.rust_ext_compat:
1260 1261 return rustancestor.MissingAncestors(self.index, common)
1261 1262 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1262 1263
1263 1264 def findmissingrevs(self, common=None, heads=None):
1264 1265 """Return the revision numbers of the ancestors of heads that
1265 1266 are not ancestors of common.
1266 1267
1267 1268 More specifically, return a list of revision numbers corresponding to
1268 1269 nodes N such that every N satisfies the following constraints:
1269 1270
1270 1271 1. N is an ancestor of some node in 'heads'
1271 1272 2. N is not an ancestor of any node in 'common'
1272 1273
1273 1274 The list is sorted by revision number, meaning it is
1274 1275 topologically sorted.
1275 1276
1276 1277 'heads' and 'common' are both lists of revision numbers. If heads is
1277 1278 not supplied, uses all of the revlog's heads. If common is not
1278 1279 supplied, uses nullid."""
1279 1280 if common is None:
1280 1281 common = [nullrev]
1281 1282 if heads is None:
1282 1283 heads = self.headrevs()
1283 1284
1284 1285 inc = self.incrementalmissingrevs(common=common)
1285 1286 return inc.missingancestors(heads)
1286 1287
1287 1288 def findmissing(self, common=None, heads=None):
1288 1289 """Return the ancestors of heads that are not ancestors of common.
1289 1290
1290 1291 More specifically, return a list of nodes N such that every N
1291 1292 satisfies the following constraints:
1292 1293
1293 1294 1. N is an ancestor of some node in 'heads'
1294 1295 2. N is not an ancestor of any node in 'common'
1295 1296
1296 1297 The list is sorted by revision number, meaning it is
1297 1298 topologically sorted.
1298 1299
1299 1300 'heads' and 'common' are both lists of node IDs. If heads is
1300 1301 not supplied, uses all of the revlog's heads. If common is not
1301 1302 supplied, uses nullid."""
1302 1303 if common is None:
1303 1304 common = [self.nullid]
1304 1305 if heads is None:
1305 1306 heads = self.heads()
1306 1307
1307 1308 common = [self.rev(n) for n in common]
1308 1309 heads = [self.rev(n) for n in heads]
1309 1310
1310 1311 inc = self.incrementalmissingrevs(common=common)
1311 1312 return [self.node(r) for r in inc.missingancestors(heads)]
1312 1313
1313 1314 def nodesbetween(self, roots=None, heads=None):
1314 1315 """Return a topological path from 'roots' to 'heads'.
1315 1316
1316 1317 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1317 1318 topologically sorted list of all nodes N that satisfy both of
1318 1319 these constraints:
1319 1320
1320 1321 1. N is a descendant of some node in 'roots'
1321 1322 2. N is an ancestor of some node in 'heads'
1322 1323
1323 1324 Every node is considered to be both a descendant and an ancestor
1324 1325 of itself, so every reachable node in 'roots' and 'heads' will be
1325 1326 included in 'nodes'.
1326 1327
1327 1328 'outroots' is the list of reachable nodes in 'roots', i.e., the
1328 1329 subset of 'roots' that is returned in 'nodes'. Likewise,
1329 1330 'outheads' is the subset of 'heads' that is also in 'nodes'.
1330 1331
1331 1332 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1332 1333 unspecified, uses nullid as the only root. If 'heads' is
1333 1334 unspecified, uses list of all of the revlog's heads."""
1334 1335 nonodes = ([], [], [])
1335 1336 if roots is not None:
1336 1337 roots = list(roots)
1337 1338 if not roots:
1338 1339 return nonodes
1339 1340 lowestrev = min([self.rev(n) for n in roots])
1340 1341 else:
1341 1342 roots = [self.nullid] # Everybody's a descendant of nullid
1342 1343 lowestrev = nullrev
1343 1344 if (lowestrev == nullrev) and (heads is None):
1344 1345 # We want _all_ the nodes!
1345 1346 return (
1346 1347 [self.node(r) for r in self],
1347 1348 [self.nullid],
1348 1349 list(self.heads()),
1349 1350 )
1350 1351 if heads is None:
1351 1352 # All nodes are ancestors, so the latest ancestor is the last
1352 1353 # node.
1353 1354 highestrev = len(self) - 1
1354 1355 # Set ancestors to None to signal that every node is an ancestor.
1355 1356 ancestors = None
1356 1357 # Set heads to an empty dictionary for later discovery of heads
1357 1358 heads = {}
1358 1359 else:
1359 1360 heads = list(heads)
1360 1361 if not heads:
1361 1362 return nonodes
1362 1363 ancestors = set()
1363 1364 # Turn heads into a dictionary so we can remove 'fake' heads.
1364 1365 # Also, later we will be using it to filter out the heads we can't
1365 1366 # find from roots.
1366 1367 heads = dict.fromkeys(heads, False)
1367 1368 # Start at the top and keep marking parents until we're done.
1368 1369 nodestotag = set(heads)
1369 1370 # Remember where the top was so we can use it as a limit later.
1370 1371 highestrev = max([self.rev(n) for n in nodestotag])
1371 1372 while nodestotag:
1372 1373 # grab a node to tag
1373 1374 n = nodestotag.pop()
1374 1375 # Never tag nullid
1375 1376 if n == self.nullid:
1376 1377 continue
1377 1378 # A node's revision number represents its place in a
1378 1379 # topologically sorted list of nodes.
1379 1380 r = self.rev(n)
1380 1381 if r >= lowestrev:
1381 1382 if n not in ancestors:
1382 1383 # If we are possibly a descendant of one of the roots
1383 1384 # and we haven't already been marked as an ancestor
1384 1385 ancestors.add(n) # Mark as ancestor
1385 1386 # Add non-nullid parents to list of nodes to tag.
1386 1387 nodestotag.update(
1387 1388 [p for p in self.parents(n) if p != self.nullid]
1388 1389 )
1389 1390 elif n in heads: # We've seen it before, is it a fake head?
1390 1391 # So it is, real heads should not be the ancestors of
1391 1392 # any other heads.
1392 1393 heads.pop(n)
1393 1394 if not ancestors:
1394 1395 return nonodes
1395 1396 # Now that we have our set of ancestors, we want to remove any
1396 1397 # roots that are not ancestors.
1397 1398
1398 1399 # If one of the roots was nullid, everything is included anyway.
1399 1400 if lowestrev > nullrev:
1400 1401 # But, since we weren't, let's recompute the lowest rev to not
1401 1402 # include roots that aren't ancestors.
1402 1403
1403 1404 # Filter out roots that aren't ancestors of heads
1404 1405 roots = [root for root in roots if root in ancestors]
1405 1406 # Recompute the lowest revision
1406 1407 if roots:
1407 1408 lowestrev = min([self.rev(root) for root in roots])
1408 1409 else:
1409 1410 # No more roots? Return empty list
1410 1411 return nonodes
1411 1412 else:
1412 1413 # We are descending from nullid, and don't need to care about
1413 1414 # any other roots.
1414 1415 lowestrev = nullrev
1415 1416 roots = [self.nullid]
1416 1417 # Transform our roots list into a set.
1417 1418 descendants = set(roots)
1418 1419 # Also, keep the original roots so we can filter out roots that aren't
1419 1420 # 'real' roots (i.e. are descended from other roots).
1420 1421 roots = descendants.copy()
1421 1422 # Our topologically sorted list of output nodes.
1422 1423 orderedout = []
1423 1424 # Don't start at nullid since we don't want nullid in our output list,
1424 1425 # and if nullid shows up in descendants, empty parents will look like
1425 1426 # they're descendants.
1426 1427 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1427 1428 n = self.node(r)
1428 1429 isdescendant = False
1429 1430 if lowestrev == nullrev: # Everybody is a descendant of nullid
1430 1431 isdescendant = True
1431 1432 elif n in descendants:
1432 1433 # n is already a descendant
1433 1434 isdescendant = True
1434 1435 # This check only needs to be done here because all the roots
1435 1436 # will start being marked is descendants before the loop.
1436 1437 if n in roots:
1437 1438 # If n was a root, check if it's a 'real' root.
1438 1439 p = tuple(self.parents(n))
1439 1440 # If any of its parents are descendants, it's not a root.
1440 1441 if (p[0] in descendants) or (p[1] in descendants):
1441 1442 roots.remove(n)
1442 1443 else:
1443 1444 p = tuple(self.parents(n))
1444 1445 # A node is a descendant if either of its parents are
1445 1446 # descendants. (We seeded the dependents list with the roots
1446 1447 # up there, remember?)
1447 1448 if (p[0] in descendants) or (p[1] in descendants):
1448 1449 descendants.add(n)
1449 1450 isdescendant = True
1450 1451 if isdescendant and ((ancestors is None) or (n in ancestors)):
1451 1452 # Only include nodes that are both descendants and ancestors.
1452 1453 orderedout.append(n)
1453 1454 if (ancestors is not None) and (n in heads):
1454 1455 # We're trying to figure out which heads are reachable
1455 1456 # from roots.
1456 1457 # Mark this head as having been reached
1457 1458 heads[n] = True
1458 1459 elif ancestors is None:
1459 1460 # Otherwise, we're trying to discover the heads.
1460 1461 # Assume this is a head because if it isn't, the next step
1461 1462 # will eventually remove it.
1462 1463 heads[n] = True
1463 1464 # But, obviously its parents aren't.
1464 1465 for p in self.parents(n):
1465 1466 heads.pop(p, None)
1466 1467 heads = [head for head, flag in heads.items() if flag]
1467 1468 roots = list(roots)
1468 1469 assert orderedout
1469 1470 assert roots
1470 1471 assert heads
1471 1472 return (orderedout, roots, heads)
1472 1473
1473 1474 def headrevs(self, revs=None):
1474 1475 if revs is None:
1475 1476 try:
1476 1477 return self.index.headrevs()
1477 1478 except AttributeError:
1478 1479 return self._headrevs()
1479 1480 if rustdagop is not None and self.index.rust_ext_compat:
1480 1481 return rustdagop.headrevs(self.index, revs)
1481 1482 return dagop.headrevs(revs, self._uncheckedparentrevs)
1482 1483
1483 1484 def computephases(self, roots):
1484 1485 return self.index.computephasesmapsets(roots)
1485 1486
1486 1487 def _headrevs(self):
1487 1488 count = len(self)
1488 1489 if not count:
1489 1490 return [nullrev]
1490 1491 # we won't iter over filtered rev so nobody is a head at start
1491 1492 ishead = [0] * (count + 1)
1492 1493 index = self.index
1493 1494 for r in self:
1494 1495 ishead[r] = 1 # I may be an head
1495 1496 e = index[r]
1496 1497 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1497 1498 return [r for r, val in enumerate(ishead) if val]
1498 1499
1499 1500 def heads(self, start=None, stop=None):
1500 1501 """return the list of all nodes that have no children
1501 1502
1502 1503 if start is specified, only heads that are descendants of
1503 1504 start will be returned
1504 1505 if stop is specified, it will consider all the revs from stop
1505 1506 as if they had no children
1506 1507 """
1507 1508 if start is None and stop is None:
1508 1509 if not len(self):
1509 1510 return [self.nullid]
1510 1511 return [self.node(r) for r in self.headrevs()]
1511 1512
1512 1513 if start is None:
1513 1514 start = nullrev
1514 1515 else:
1515 1516 start = self.rev(start)
1516 1517
1517 1518 stoprevs = {self.rev(n) for n in stop or []}
1518 1519
1519 1520 revs = dagop.headrevssubset(
1520 1521 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1521 1522 )
1522 1523
1523 1524 return [self.node(rev) for rev in revs]
1524 1525
1525 1526 def children(self, node):
1526 1527 """find the children of a given node"""
1527 1528 c = []
1528 1529 p = self.rev(node)
1529 1530 for r in self.revs(start=p + 1):
1530 1531 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1531 1532 if prevs:
1532 1533 for pr in prevs:
1533 1534 if pr == p:
1534 1535 c.append(self.node(r))
1535 1536 elif p == nullrev:
1536 1537 c.append(self.node(r))
1537 1538 return c
1538 1539
1539 1540 def commonancestorsheads(self, a, b):
1540 1541 """calculate all the heads of the common ancestors of nodes a and b"""
1541 1542 a, b = self.rev(a), self.rev(b)
1542 1543 ancs = self._commonancestorsheads(a, b)
1543 1544 return pycompat.maplist(self.node, ancs)
1544 1545
1545 1546 def _commonancestorsheads(self, *revs):
1546 1547 """calculate all the heads of the common ancestors of revs"""
1547 1548 try:
1548 1549 ancs = self.index.commonancestorsheads(*revs)
1549 1550 except (AttributeError, OverflowError): # C implementation failed
1550 1551 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1551 1552 return ancs
1552 1553
1553 1554 def isancestor(self, a, b):
1554 1555 """return True if node a is an ancestor of node b
1555 1556
1556 1557 A revision is considered an ancestor of itself."""
1557 1558 a, b = self.rev(a), self.rev(b)
1558 1559 return self.isancestorrev(a, b)
1559 1560
1560 1561 def isancestorrev(self, a, b):
1561 1562 """return True if revision a is an ancestor of revision b
1562 1563
1563 1564 A revision is considered an ancestor of itself.
1564 1565
1565 1566 The implementation of this is trivial but the use of
1566 1567 reachableroots is not."""
1567 1568 if a == nullrev:
1568 1569 return True
1569 1570 elif a == b:
1570 1571 return True
1571 1572 elif a > b:
1572 1573 return False
1573 1574 return bool(self.reachableroots(a, [b], [a], includepath=False))
1574 1575
1575 1576 def reachableroots(self, minroot, heads, roots, includepath=False):
1576 1577 """return (heads(::(<roots> and <roots>::<heads>)))
1577 1578
1578 1579 If includepath is True, return (<roots>::<heads>)."""
1579 1580 try:
1580 1581 return self.index.reachableroots2(
1581 1582 minroot, heads, roots, includepath
1582 1583 )
1583 1584 except AttributeError:
1584 1585 return dagop._reachablerootspure(
1585 1586 self.parentrevs, minroot, roots, heads, includepath
1586 1587 )
1587 1588
1588 1589 def ancestor(self, a, b):
1589 1590 """calculate the "best" common ancestor of nodes a and b"""
1590 1591
1591 1592 a, b = self.rev(a), self.rev(b)
1592 1593 try:
1593 1594 ancs = self.index.ancestors(a, b)
1594 1595 except (AttributeError, OverflowError):
1595 1596 ancs = ancestor.ancestors(self.parentrevs, a, b)
1596 1597 if ancs:
1597 1598 # choose a consistent winner when there's a tie
1598 1599 return min(map(self.node, ancs))
1599 1600 return self.nullid
1600 1601
1601 1602 def _match(self, id):
1602 1603 if isinstance(id, int):
1603 1604 # rev
1604 1605 return self.node(id)
1605 1606 if len(id) == self.nodeconstants.nodelen:
1606 1607 # possibly a binary node
1607 1608 # odds of a binary node being all hex in ASCII are 1 in 10**25
1608 1609 try:
1609 1610 node = id
1610 1611 self.rev(node) # quick search the index
1611 1612 return node
1612 1613 except error.LookupError:
1613 1614 pass # may be partial hex id
1614 1615 try:
1615 1616 # str(rev)
1616 1617 rev = int(id)
1617 1618 if b"%d" % rev != id:
1618 1619 raise ValueError
1619 1620 if rev < 0:
1620 1621 rev = len(self) + rev
1621 1622 if rev < 0 or rev >= len(self):
1622 1623 raise ValueError
1623 1624 return self.node(rev)
1624 1625 except (ValueError, OverflowError):
1625 1626 pass
1626 1627 if len(id) == 2 * self.nodeconstants.nodelen:
1627 1628 try:
1628 1629 # a full hex nodeid?
1629 1630 node = bin(id)
1630 1631 self.rev(node)
1631 1632 return node
1632 1633 except (binascii.Error, error.LookupError):
1633 1634 pass
1634 1635
1635 1636 def _partialmatch(self, id):
1636 1637 # we don't care wdirfilenodeids as they should be always full hash
1637 1638 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1638 1639 ambiguous = False
1639 1640 try:
1640 1641 partial = self.index.partialmatch(id)
1641 1642 if partial and self.hasnode(partial):
1642 1643 if maybewdir:
1643 1644 # single 'ff...' match in radix tree, ambiguous with wdir
1644 1645 ambiguous = True
1645 1646 else:
1646 1647 return partial
1647 1648 elif maybewdir:
1648 1649 # no 'ff...' match in radix tree, wdir identified
1649 1650 raise error.WdirUnsupported
1650 1651 else:
1651 1652 return None
1652 1653 except error.RevlogError:
1653 1654 # parsers.c radix tree lookup gave multiple matches
1654 1655 # fast path: for unfiltered changelog, radix tree is accurate
1655 1656 if not getattr(self, 'filteredrevs', None):
1656 1657 ambiguous = True
1657 1658 # fall through to slow path that filters hidden revisions
1658 1659 except (AttributeError, ValueError):
1659 1660 # we are pure python, or key is not hex
1660 1661 pass
1661 1662 if ambiguous:
1662 1663 raise error.AmbiguousPrefixLookupError(
1663 1664 id, self.display_id, _(b'ambiguous identifier')
1664 1665 )
1665 1666
1666 1667 if id in self._pcache:
1667 1668 return self._pcache[id]
1668 1669
1669 1670 if len(id) <= 40:
1670 1671 # hex(node)[:...]
1671 1672 l = len(id) // 2 * 2 # grab an even number of digits
1672 1673 try:
1673 1674 # we're dropping the last digit, so let's check that it's hex,
1674 1675 # to avoid the expensive computation below if it's not
1675 1676 if len(id) % 2 > 0:
1676 1677 if not (id[-1] in hexdigits):
1677 1678 return None
1678 1679 prefix = bin(id[:l])
1679 1680 except binascii.Error:
1680 1681 pass
1681 1682 else:
1682 1683 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1683 1684 nl = [
1684 1685 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1685 1686 ]
1686 1687 if self.nodeconstants.nullhex.startswith(id):
1687 1688 nl.append(self.nullid)
1688 1689 if len(nl) > 0:
1689 1690 if len(nl) == 1 and not maybewdir:
1690 1691 self._pcache[id] = nl[0]
1691 1692 return nl[0]
1692 1693 raise error.AmbiguousPrefixLookupError(
1693 1694 id, self.display_id, _(b'ambiguous identifier')
1694 1695 )
1695 1696 if maybewdir:
1696 1697 raise error.WdirUnsupported
1697 1698 return None
1698 1699
1699 1700 def lookup(self, id):
1700 1701 """locate a node based on:
1701 1702 - revision number or str(revision number)
1702 1703 - nodeid or subset of hex nodeid
1703 1704 """
1704 1705 n = self._match(id)
1705 1706 if n is not None:
1706 1707 return n
1707 1708 n = self._partialmatch(id)
1708 1709 if n:
1709 1710 return n
1710 1711
1711 1712 raise error.LookupError(id, self.display_id, _(b'no match found'))
1712 1713
1713 1714 def shortest(self, node, minlength=1):
1714 1715 """Find the shortest unambiguous prefix that matches node."""
1715 1716
1716 1717 def isvalid(prefix):
1717 1718 try:
1718 1719 matchednode = self._partialmatch(prefix)
1719 1720 except error.AmbiguousPrefixLookupError:
1720 1721 return False
1721 1722 except error.WdirUnsupported:
1722 1723 # single 'ff...' match
1723 1724 return True
1724 1725 if matchednode is None:
1725 1726 raise error.LookupError(node, self.display_id, _(b'no node'))
1726 1727 return True
1727 1728
1728 1729 def maybewdir(prefix):
1729 1730 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1730 1731
1731 1732 hexnode = hex(node)
1732 1733
1733 1734 def disambiguate(hexnode, minlength):
1734 1735 """Disambiguate against wdirid."""
1735 1736 for length in range(minlength, len(hexnode) + 1):
1736 1737 prefix = hexnode[:length]
1737 1738 if not maybewdir(prefix):
1738 1739 return prefix
1739 1740
1740 1741 if not getattr(self, 'filteredrevs', None):
1741 1742 try:
1742 1743 length = max(self.index.shortest(node), minlength)
1743 1744 return disambiguate(hexnode, length)
1744 1745 except error.RevlogError:
1745 1746 if node != self.nodeconstants.wdirid:
1746 1747 raise error.LookupError(
1747 1748 node, self.display_id, _(b'no node')
1748 1749 )
1749 1750 except AttributeError:
1750 1751 # Fall through to pure code
1751 1752 pass
1752 1753
1753 1754 if node == self.nodeconstants.wdirid:
1754 1755 for length in range(minlength, len(hexnode) + 1):
1755 1756 prefix = hexnode[:length]
1756 1757 if isvalid(prefix):
1757 1758 return prefix
1758 1759
1759 1760 for length in range(minlength, len(hexnode) + 1):
1760 1761 prefix = hexnode[:length]
1761 1762 if isvalid(prefix):
1762 1763 return disambiguate(hexnode, length)
1763 1764
1764 1765 def cmp(self, node, text):
1765 1766 """compare text with a given file revision
1766 1767
1767 1768 returns True if text is different than what is stored.
1768 1769 """
1769 1770 p1, p2 = self.parents(node)
1770 1771 return storageutil.hashrevisionsha1(text, p1, p2) != node
1771 1772
1772 1773 def _getsegmentforrevs(self, startrev, endrev, df=None):
1773 1774 """Obtain a segment of raw data corresponding to a range of revisions.
1774 1775
1775 1776 Accepts the start and end revisions and an optional already-open
1776 1777 file handle to be used for reading. If the file handle is read, its
1777 1778 seek position will not be preserved.
1778 1779
1779 1780 Requests for data may be satisfied by a cache.
1780 1781
1781 1782 Returns a 2-tuple of (offset, data) for the requested range of
1782 1783 revisions. Offset is the integer offset from the beginning of the
1783 1784 revlog and data is a str or buffer of the raw byte data.
1784 1785
1785 1786 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1786 1787 to determine where each revision's data begins and ends.
1787 1788 """
1788 1789 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1789 1790 # (functions are expensive).
1790 1791 index = self.index
1791 1792 istart = index[startrev]
1792 1793 start = int(istart[0] >> 16)
1793 1794 if startrev == endrev:
1794 1795 end = start + istart[1]
1795 1796 else:
1796 1797 iend = index[endrev]
1797 1798 end = int(iend[0] >> 16) + iend[1]
1798 1799
1799 1800 if self._inline:
1800 1801 start += (startrev + 1) * self.index.entry_size
1801 1802 end += (endrev + 1) * self.index.entry_size
1802 1803 length = end - start
1803 1804
1804 1805 return start, self._segmentfile.read_chunk(start, length, df)
1805 1806
1806 1807 def _chunk(self, rev, df=None):
1807 1808 """Obtain a single decompressed chunk for a revision.
1808 1809
1809 1810 Accepts an integer revision and an optional already-open file handle
1810 1811 to be used for reading. If used, the seek position of the file will not
1811 1812 be preserved.
1812 1813
1813 1814 Returns a str holding uncompressed data for the requested revision.
1814 1815 """
1815 1816 compression_mode = self.index[rev][10]
1816 1817 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1817 1818 if compression_mode == COMP_MODE_PLAIN:
1818 1819 return data
1819 1820 elif compression_mode == COMP_MODE_DEFAULT:
1820 1821 return self._decompressor(data)
1821 1822 elif compression_mode == COMP_MODE_INLINE:
1822 1823 return self.decompress(data)
1823 1824 else:
1824 1825 msg = b'unknown compression mode %d'
1825 1826 msg %= compression_mode
1826 1827 raise error.RevlogError(msg)
1827 1828
1828 1829 def _chunks(self, revs, df=None, targetsize=None):
1829 1830 """Obtain decompressed chunks for the specified revisions.
1830 1831
1831 1832 Accepts an iterable of numeric revisions that are assumed to be in
1832 1833 ascending order. Also accepts an optional already-open file handle
1833 1834 to be used for reading. If used, the seek position of the file will
1834 1835 not be preserved.
1835 1836
1836 1837 This function is similar to calling ``self._chunk()`` multiple times,
1837 1838 but is faster.
1838 1839
1839 1840 Returns a list with decompressed data for each requested revision.
1840 1841 """
1841 1842 if not revs:
1842 1843 return []
1843 1844 start = self.start
1844 1845 length = self.length
1845 1846 inline = self._inline
1846 1847 iosize = self.index.entry_size
1847 1848 buffer = util.buffer
1848 1849
1849 1850 l = []
1850 1851 ladd = l.append
1851 1852
1852 1853 if not self._withsparseread:
1853 1854 slicedchunks = (revs,)
1854 1855 else:
1855 1856 slicedchunks = deltautil.slicechunk(
1856 1857 self, revs, targetsize=targetsize
1857 1858 )
1858 1859
1859 1860 for revschunk in slicedchunks:
1860 1861 firstrev = revschunk[0]
1861 1862 # Skip trailing revisions with empty diff
1862 1863 for lastrev in revschunk[::-1]:
1863 1864 if length(lastrev) != 0:
1864 1865 break
1865 1866
1866 1867 try:
1867 1868 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1868 1869 except OverflowError:
1869 1870 # issue4215 - we can't cache a run of chunks greater than
1870 1871 # 2G on Windows
1871 1872 return [self._chunk(rev, df=df) for rev in revschunk]
1872 1873
1873 1874 decomp = self.decompress
1874 1875 # self._decompressor might be None, but will not be used in that case
1875 1876 def_decomp = self._decompressor
1876 1877 for rev in revschunk:
1877 1878 chunkstart = start(rev)
1878 1879 if inline:
1879 1880 chunkstart += (rev + 1) * iosize
1880 1881 chunklength = length(rev)
1881 1882 comp_mode = self.index[rev][10]
1882 1883 c = buffer(data, chunkstart - offset, chunklength)
1883 1884 if comp_mode == COMP_MODE_PLAIN:
1884 1885 ladd(c)
1885 1886 elif comp_mode == COMP_MODE_INLINE:
1886 1887 ladd(decomp(c))
1887 1888 elif comp_mode == COMP_MODE_DEFAULT:
1888 1889 ladd(def_decomp(c))
1889 1890 else:
1890 1891 msg = b'unknown compression mode %d'
1891 1892 msg %= comp_mode
1892 1893 raise error.RevlogError(msg)
1893 1894
1894 1895 return l
1895 1896
1896 1897 def deltaparent(self, rev):
1897 1898 """return deltaparent of the given revision"""
1898 1899 base = self.index[rev][3]
1899 1900 if base == rev:
1900 1901 return nullrev
1901 1902 elif self._generaldelta:
1902 1903 return base
1903 1904 else:
1904 1905 return rev - 1
1905 1906
1906 1907 def issnapshot(self, rev):
1907 1908 """tells whether rev is a snapshot"""
1908 1909 if not self._sparserevlog:
1909 1910 return self.deltaparent(rev) == nullrev
1910 1911 elif hasattr(self.index, 'issnapshot'):
1911 1912 # directly assign the method to cache the testing and access
1912 1913 self.issnapshot = self.index.issnapshot
1913 1914 return self.issnapshot(rev)
1914 1915 if rev == nullrev:
1915 1916 return True
1916 1917 entry = self.index[rev]
1917 1918 base = entry[3]
1918 1919 if base == rev:
1919 1920 return True
1920 1921 if base == nullrev:
1921 1922 return True
1922 1923 p1 = entry[5]
1923 1924 while self.length(p1) == 0:
1924 1925 b = self.deltaparent(p1)
1925 1926 if b == p1:
1926 1927 break
1927 1928 p1 = b
1928 1929 p2 = entry[6]
1929 1930 while self.length(p2) == 0:
1930 1931 b = self.deltaparent(p2)
1931 1932 if b == p2:
1932 1933 break
1933 1934 p2 = b
1934 1935 if base == p1 or base == p2:
1935 1936 return False
1936 1937 return self.issnapshot(base)
1937 1938
1938 1939 def snapshotdepth(self, rev):
1939 1940 """number of snapshot in the chain before this one"""
1940 1941 if not self.issnapshot(rev):
1941 1942 raise error.ProgrammingError(b'revision %d not a snapshot')
1942 1943 return len(self._deltachain(rev)[0]) - 1
1943 1944
1944 1945 def revdiff(self, rev1, rev2):
1945 1946 """return or calculate a delta between two revisions
1946 1947
1947 1948 The delta calculated is in binary form and is intended to be written to
1948 1949 revlog data directly. So this function needs raw revision data.
1949 1950 """
1950 1951 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1951 1952 return bytes(self._chunk(rev2))
1952 1953
1953 1954 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1954 1955
1955 1956 def revision(self, nodeorrev, _df=None):
1956 1957 """return an uncompressed revision of a given node or revision
1957 1958 number.
1958 1959
1959 1960 _df - an existing file handle to read from. (internal-only)
1960 1961 """
1961 1962 return self._revisiondata(nodeorrev, _df)
1962 1963
1963 1964 def sidedata(self, nodeorrev, _df=None):
1964 1965 """a map of extra data related to the changeset but not part of the hash
1965 1966
1966 1967 This function currently return a dictionary. However, more advanced
1967 1968 mapping object will likely be used in the future for a more
1968 1969 efficient/lazy code.
1969 1970 """
1970 1971 # deal with <nodeorrev> argument type
1971 1972 if isinstance(nodeorrev, int):
1972 1973 rev = nodeorrev
1973 1974 else:
1974 1975 rev = self.rev(nodeorrev)
1975 1976 return self._sidedata(rev)
1976 1977
1977 1978 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1978 1979 # deal with <nodeorrev> argument type
1979 1980 if isinstance(nodeorrev, int):
1980 1981 rev = nodeorrev
1981 1982 node = self.node(rev)
1982 1983 else:
1983 1984 node = nodeorrev
1984 1985 rev = None
1985 1986
1986 1987 # fast path the special `nullid` rev
1987 1988 if node == self.nullid:
1988 1989 return b""
1989 1990
1990 1991 # ``rawtext`` is the text as stored inside the revlog. Might be the
1991 1992 # revision or might need to be processed to retrieve the revision.
1992 1993 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1993 1994
1994 1995 if raw and validated:
1995 1996 # if we don't want to process the raw text and that raw
1996 1997 # text is cached, we can exit early.
1997 1998 return rawtext
1998 1999 if rev is None:
1999 2000 rev = self.rev(node)
2000 2001 # the revlog's flag for this revision
2001 2002 # (usually alter its state or content)
2002 2003 flags = self.flags(rev)
2003 2004
2004 2005 if validated and flags == REVIDX_DEFAULT_FLAGS:
2005 2006 # no extra flags set, no flag processor runs, text = rawtext
2006 2007 return rawtext
2007 2008
2008 2009 if raw:
2009 2010 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2010 2011 text = rawtext
2011 2012 else:
2012 2013 r = flagutil.processflagsread(self, rawtext, flags)
2013 2014 text, validatehash = r
2014 2015 if validatehash:
2015 2016 self.checkhash(text, node, rev=rev)
2016 2017 if not validated:
2017 2018 self._revisioncache = (node, rev, rawtext)
2018 2019
2019 2020 return text
2020 2021
2021 2022 def _rawtext(self, node, rev, _df=None):
2022 2023 """return the possibly unvalidated rawtext for a revision
2023 2024
2024 2025 returns (rev, rawtext, validated)
2025 2026 """
2026 2027
2027 2028 # revision in the cache (could be useful to apply delta)
2028 2029 cachedrev = None
2029 2030 # An intermediate text to apply deltas to
2030 2031 basetext = None
2031 2032
2032 2033 # Check if we have the entry in cache
2033 2034 # The cache entry looks like (node, rev, rawtext)
2034 2035 if self._revisioncache:
2035 2036 if self._revisioncache[0] == node:
2036 2037 return (rev, self._revisioncache[2], True)
2037 2038 cachedrev = self._revisioncache[1]
2038 2039
2039 2040 if rev is None:
2040 2041 rev = self.rev(node)
2041 2042
2042 2043 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2043 2044 if stopped:
2044 2045 basetext = self._revisioncache[2]
2045 2046
2046 2047 # drop cache to save memory, the caller is expected to
2047 2048 # update self._revisioncache after validating the text
2048 2049 self._revisioncache = None
2049 2050
2050 2051 targetsize = None
2051 2052 rawsize = self.index[rev][2]
2052 2053 if 0 <= rawsize:
2053 2054 targetsize = 4 * rawsize
2054 2055
2055 2056 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2056 2057 if basetext is None:
2057 2058 basetext = bytes(bins[0])
2058 2059 bins = bins[1:]
2059 2060
2060 2061 rawtext = mdiff.patches(basetext, bins)
2061 2062 del basetext # let us have a chance to free memory early
2062 2063 return (rev, rawtext, False)
2063 2064
2064 2065 def _sidedata(self, rev):
2065 2066 """Return the sidedata for a given revision number."""
2066 2067 index_entry = self.index[rev]
2067 2068 sidedata_offset = index_entry[8]
2068 2069 sidedata_size = index_entry[9]
2069 2070
2070 2071 if self._inline:
2071 2072 sidedata_offset += self.index.entry_size * (1 + rev)
2072 2073 if sidedata_size == 0:
2073 2074 return {}
2074 2075
2075 2076 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2076 2077 filename = self._sidedatafile
2077 2078 end = self._docket.sidedata_end
2078 2079 offset = sidedata_offset
2079 2080 length = sidedata_size
2080 2081 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2081 2082 raise error.RevlogError(m)
2082 2083
2083 2084 comp_segment = self._segmentfile_sidedata.read_chunk(
2084 2085 sidedata_offset, sidedata_size
2085 2086 )
2086 2087
2087 2088 comp = self.index[rev][11]
2088 2089 if comp == COMP_MODE_PLAIN:
2089 2090 segment = comp_segment
2090 2091 elif comp == COMP_MODE_DEFAULT:
2091 2092 segment = self._decompressor(comp_segment)
2092 2093 elif comp == COMP_MODE_INLINE:
2093 2094 segment = self.decompress(comp_segment)
2094 2095 else:
2095 2096 msg = b'unknown compression mode %d'
2096 2097 msg %= comp
2097 2098 raise error.RevlogError(msg)
2098 2099
2099 2100 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 2101 return sidedata
2101 2102
2102 2103 def rawdata(self, nodeorrev, _df=None):
2103 2104 """return an uncompressed raw data of a given node or revision number.
2104 2105
2105 2106 _df - an existing file handle to read from. (internal-only)
2106 2107 """
2107 2108 return self._revisiondata(nodeorrev, _df, raw=True)
2108 2109
2109 2110 def hash(self, text, p1, p2):
2110 2111 """Compute a node hash.
2111 2112
2112 2113 Available as a function so that subclasses can replace the hash
2113 2114 as needed.
2114 2115 """
2115 2116 return storageutil.hashrevisionsha1(text, p1, p2)
2116 2117
2117 2118 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 2119 """Check node hash integrity.
2119 2120
2120 2121 Available as a function so that subclasses can extend hash mismatch
2121 2122 behaviors as needed.
2122 2123 """
2123 2124 try:
2124 2125 if p1 is None and p2 is None:
2125 2126 p1, p2 = self.parents(node)
2126 2127 if node != self.hash(text, p1, p2):
2127 2128 # Clear the revision cache on hash failure. The revision cache
2128 2129 # only stores the raw revision and clearing the cache does have
2129 2130 # the side-effect that we won't have a cache hit when the raw
2130 2131 # revision data is accessed. But this case should be rare and
2131 2132 # it is extra work to teach the cache about the hash
2132 2133 # verification state.
2133 2134 if self._revisioncache and self._revisioncache[0] == node:
2134 2135 self._revisioncache = None
2135 2136
2136 2137 revornode = rev
2137 2138 if revornode is None:
2138 2139 revornode = templatefilters.short(hex(node))
2139 2140 raise error.RevlogError(
2140 2141 _(b"integrity check failed on %s:%s")
2141 2142 % (self.display_id, pycompat.bytestr(revornode))
2142 2143 )
2143 2144 except error.RevlogError:
2144 2145 if self._censorable and storageutil.iscensoredtext(text):
2145 2146 raise error.CensoredNodeError(self.display_id, node, text)
2146 2147 raise
2147 2148
2148 2149 @property
2149 2150 def _split_index_file(self):
2150 2151 """the path where to expect the index of an ongoing splitting operation
2151 2152
2152 2153 The file will only exist if a splitting operation is in progress, but
2153 2154 it is always expected at the same location."""
2154 2155 parts = self.radix.split(b'/')
2155 2156 if len(parts) > 1:
2156 2157 # adds a '-s' prefix to the ``data/` or `meta/` base
2157 2158 head = parts[0] + b'-s'
2158 2159 mids = parts[1:-1]
2159 2160 tail = parts[-1] + b'.i'
2160 2161 pieces = [head] + mids + [tail]
2161 2162 return b'/'.join(pieces)
2162 2163 else:
2163 2164 # the revlog is stored at the root of the store (changelog or
2164 2165 # manifest), no risk of collision.
2165 2166 return self.radix + b'.i.s'
2166 2167
2167 2168 def _enforceinlinesize(self, tr, side_write=True):
2168 2169 """Check if the revlog is too big for inline and convert if so.
2169 2170
2170 2171 This should be called after revisions are added to the revlog. If the
2171 2172 revlog has grown too large to be an inline revlog, it will convert it
2172 2173 to use multiple index and data files.
2173 2174 """
2174 2175 tiprev = len(self) - 1
2175 2176 total_size = self.start(tiprev) + self.length(tiprev)
2176 2177 if not self._inline or total_size < _maxinline:
2177 2178 return
2178 2179
2179 2180 troffset = tr.findoffset(self._indexfile)
2180 2181 if troffset is None:
2181 2182 raise error.RevlogError(
2182 2183 _(b"%s not found in the transaction") % self._indexfile
2183 2184 )
2184 2185 if troffset:
2185 2186 tr.addbackup(self._indexfile, for_offset=True)
2186 2187 tr.add(self._datafile, 0)
2187 2188
2188 2189 existing_handles = False
2189 2190 if self._writinghandles is not None:
2190 2191 existing_handles = True
2191 2192 fp = self._writinghandles[0]
2192 2193 fp.flush()
2193 2194 fp.close()
2194 2195 # We can't use the cached file handle after close(). So prevent
2195 2196 # its usage.
2196 2197 self._writinghandles = None
2197 2198 self._segmentfile.writing_handle = None
2198 2199 # No need to deal with sidedata writing handle as it is only
2199 2200 # relevant with revlog-v2 which is never inline, not reaching
2200 2201 # this code
2201 2202 if side_write:
2202 2203 old_index_file_path = self._indexfile
2203 2204 new_index_file_path = self._split_index_file
2204 2205 opener = self.opener
2205 2206 weak_self = weakref.ref(self)
2206 2207
2207 2208 # the "split" index replace the real index when the transaction is finalized
2208 2209 def finalize_callback(tr):
2209 2210 opener.rename(
2210 2211 new_index_file_path,
2211 2212 old_index_file_path,
2212 2213 checkambig=True,
2213 2214 )
2214 2215 maybe_self = weak_self()
2215 2216 if maybe_self is not None:
2216 2217 maybe_self._indexfile = old_index_file_path
2217 2218
2218 2219 def abort_callback(tr):
2219 2220 maybe_self = weak_self()
2220 2221 if maybe_self is not None:
2221 2222 maybe_self._indexfile = old_index_file_path
2222 2223
2223 2224 tr.registertmp(new_index_file_path)
2224 2225 if self.target[1] is not None:
2225 2226 callback_id = b'000-revlog-split-%d-%s' % self.target
2226 2227 else:
2227 2228 callback_id = b'000-revlog-split-%d' % self.target[0]
2228 2229 tr.addfinalize(callback_id, finalize_callback)
2229 2230 tr.addabort(callback_id, abort_callback)
2230 2231
2231 2232 new_dfh = self._datafp(b'w+')
2232 2233 new_dfh.truncate(0) # drop any potentially existing data
2233 2234 try:
2234 2235 with self._indexfp() as read_ifh:
2235 2236 for r in self:
2236 2237 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2237 2238 new_dfh.flush()
2238 2239
2239 2240 if side_write:
2240 2241 self._indexfile = new_index_file_path
2241 2242 with self.__index_new_fp() as fp:
2242 2243 self._format_flags &= ~FLAG_INLINE_DATA
2243 2244 self._inline = False
2244 2245 for i in self:
2245 2246 e = self.index.entry_binary(i)
2246 2247 if i == 0 and self._docket is None:
2247 2248 header = self._format_flags | self._format_version
2248 2249 header = self.index.pack_header(header)
2249 2250 e = header + e
2250 2251 fp.write(e)
2251 2252 if self._docket is not None:
2252 2253 self._docket.index_end = fp.tell()
2253 2254
2254 2255 # If we don't use side-write, the temp file replace the real
2255 2256 # index when we exit the context manager
2256 2257
2257 2258 nodemaputil.setup_persistent_nodemap(tr, self)
2258 2259 self._segmentfile = randomaccessfile.randomaccessfile(
2259 2260 self.opener,
2260 2261 self._datafile,
2261 2262 self._chunkcachesize,
2262 2263 )
2263 2264
2264 2265 if existing_handles:
2265 2266 # switched from inline to conventional reopen the index
2266 2267 ifh = self.__index_write_fp()
2267 2268 self._writinghandles = (ifh, new_dfh, None)
2268 2269 self._segmentfile.writing_handle = new_dfh
2269 2270 new_dfh = None
2270 2271 # No need to deal with sidedata writing handle as it is only
2271 2272 # relevant with revlog-v2 which is never inline, not reaching
2272 2273 # this code
2273 2274 finally:
2274 2275 if new_dfh is not None:
2275 2276 new_dfh.close()
2276 2277
2277 2278 def _nodeduplicatecallback(self, transaction, node):
2278 2279 """called when trying to add a node already stored."""
2279 2280
2280 2281 @contextlib.contextmanager
2281 2282 def reading(self):
2282 2283 """Context manager that keeps data and sidedata files open for reading"""
2283 2284 with self._segmentfile.reading():
2284 2285 with self._segmentfile_sidedata.reading():
2285 2286 yield
2286 2287
2287 2288 @contextlib.contextmanager
2288 2289 def _writing(self, transaction):
2289 2290 if self._trypending:
2290 2291 msg = b'try to write in a `trypending` revlog: %s'
2291 2292 msg %= self.display_id
2292 2293 raise error.ProgrammingError(msg)
2293 2294 if self._writinghandles is not None:
2294 2295 yield
2295 2296 else:
2296 2297 ifh = dfh = sdfh = None
2297 2298 try:
2298 2299 r = len(self)
2299 2300 # opening the data file.
2300 2301 dsize = 0
2301 2302 if r:
2302 2303 dsize = self.end(r - 1)
2303 2304 dfh = None
2304 2305 if not self._inline:
2305 2306 try:
2306 2307 dfh = self._datafp(b"r+")
2307 2308 if self._docket is None:
2308 2309 dfh.seek(0, os.SEEK_END)
2309 2310 else:
2310 2311 dfh.seek(self._docket.data_end, os.SEEK_SET)
2311 2312 except FileNotFoundError:
2312 2313 dfh = self._datafp(b"w+")
2313 2314 transaction.add(self._datafile, dsize)
2314 2315 if self._sidedatafile is not None:
2315 2316 # revlog-v2 does not inline, help Pytype
2316 2317 assert dfh is not None
2317 2318 try:
2318 2319 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2319 2320 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2320 2321 except FileNotFoundError:
2321 2322 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2322 2323 transaction.add(
2323 2324 self._sidedatafile, self._docket.sidedata_end
2324 2325 )
2325 2326
2326 2327 # opening the index file.
2327 2328 isize = r * self.index.entry_size
2328 2329 ifh = self.__index_write_fp()
2329 2330 if self._inline:
2330 2331 transaction.add(self._indexfile, dsize + isize)
2331 2332 else:
2332 2333 transaction.add(self._indexfile, isize)
2333 2334 # exposing all file handle for writing.
2334 2335 self._writinghandles = (ifh, dfh, sdfh)
2335 2336 self._segmentfile.writing_handle = ifh if self._inline else dfh
2336 2337 self._segmentfile_sidedata.writing_handle = sdfh
2337 2338 yield
2338 2339 if self._docket is not None:
2339 2340 self._write_docket(transaction)
2340 2341 finally:
2341 2342 self._writinghandles = None
2342 2343 self._segmentfile.writing_handle = None
2343 2344 self._segmentfile_sidedata.writing_handle = None
2344 2345 if dfh is not None:
2345 2346 dfh.close()
2346 2347 if sdfh is not None:
2347 2348 sdfh.close()
2348 2349 # closing the index file last to avoid exposing referent to
2349 2350 # potential unflushed data content.
2350 2351 if ifh is not None:
2351 2352 ifh.close()
2352 2353
2353 2354 def _write_docket(self, transaction):
2354 2355 """write the current docket on disk
2355 2356
2356 2357 Exist as a method to help changelog to implement transaction logic
2357 2358
2358 2359 We could also imagine using the same transaction logic for all revlog
2359 2360 since docket are cheap."""
2360 2361 self._docket.write(transaction)
2361 2362
2362 2363 def addrevision(
2363 2364 self,
2364 2365 text,
2365 2366 transaction,
2366 2367 link,
2367 2368 p1,
2368 2369 p2,
2369 2370 cachedelta=None,
2370 2371 node=None,
2371 2372 flags=REVIDX_DEFAULT_FLAGS,
2372 2373 deltacomputer=None,
2373 2374 sidedata=None,
2374 2375 ):
2375 2376 """add a revision to the log
2376 2377
2377 2378 text - the revision data to add
2378 2379 transaction - the transaction object used for rollback
2379 2380 link - the linkrev data to add
2380 2381 p1, p2 - the parent nodeids of the revision
2381 2382 cachedelta - an optional precomputed delta
2382 2383 node - nodeid of revision; typically node is not specified, and it is
2383 2384 computed by default as hash(text, p1, p2), however subclasses might
2384 2385 use different hashing method (and override checkhash() in such case)
2385 2386 flags - the known flags to set on the revision
2386 2387 deltacomputer - an optional deltacomputer instance shared between
2387 2388 multiple calls
2388 2389 """
2389 2390 if link == nullrev:
2390 2391 raise error.RevlogError(
2391 2392 _(b"attempted to add linkrev -1 to %s") % self.display_id
2392 2393 )
2393 2394
2394 2395 if sidedata is None:
2395 2396 sidedata = {}
2396 2397 elif sidedata and not self.hassidedata:
2397 2398 raise error.ProgrammingError(
2398 2399 _(b"trying to add sidedata to a revlog who don't support them")
2399 2400 )
2400 2401
2401 2402 if flags:
2402 2403 node = node or self.hash(text, p1, p2)
2403 2404
2404 2405 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2405 2406
2406 2407 # If the flag processor modifies the revision data, ignore any provided
2407 2408 # cachedelta.
2408 2409 if rawtext != text:
2409 2410 cachedelta = None
2410 2411
2411 2412 if len(rawtext) > _maxentrysize:
2412 2413 raise error.RevlogError(
2413 2414 _(
2414 2415 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2415 2416 )
2416 2417 % (self.display_id, len(rawtext))
2417 2418 )
2418 2419
2419 2420 node = node or self.hash(rawtext, p1, p2)
2420 2421 rev = self.index.get_rev(node)
2421 2422 if rev is not None:
2422 2423 return rev
2423 2424
2424 2425 if validatehash:
2425 2426 self.checkhash(rawtext, node, p1=p1, p2=p2)
2426 2427
2427 2428 return self.addrawrevision(
2428 2429 rawtext,
2429 2430 transaction,
2430 2431 link,
2431 2432 p1,
2432 2433 p2,
2433 2434 node,
2434 2435 flags,
2435 2436 cachedelta=cachedelta,
2436 2437 deltacomputer=deltacomputer,
2437 2438 sidedata=sidedata,
2438 2439 )
2439 2440
2440 2441 def addrawrevision(
2441 2442 self,
2442 2443 rawtext,
2443 2444 transaction,
2444 2445 link,
2445 2446 p1,
2446 2447 p2,
2447 2448 node,
2448 2449 flags,
2449 2450 cachedelta=None,
2450 2451 deltacomputer=None,
2451 2452 sidedata=None,
2452 2453 ):
2453 2454 """add a raw revision with known flags, node and parents
2454 2455 useful when reusing a revision not stored in this revlog (ex: received
2455 2456 over wire, or read from an external bundle).
2456 2457 """
2457 2458 with self._writing(transaction):
2458 2459 return self._addrevision(
2459 2460 node,
2460 2461 rawtext,
2461 2462 transaction,
2462 2463 link,
2463 2464 p1,
2464 2465 p2,
2465 2466 flags,
2466 2467 cachedelta,
2467 2468 deltacomputer=deltacomputer,
2468 2469 sidedata=sidedata,
2469 2470 )
2470 2471
2471 2472 def compress(self, data):
2472 2473 """Generate a possibly-compressed representation of data."""
2473 2474 if not data:
2474 2475 return b'', data
2475 2476
2476 2477 compressed = self._compressor.compress(data)
2477 2478
2478 2479 if compressed:
2479 2480 # The revlog compressor added the header in the returned data.
2480 2481 return b'', compressed
2481 2482
2482 2483 if data[0:1] == b'\0':
2483 2484 return b'', data
2484 2485 return b'u', data
2485 2486
2486 2487 def decompress(self, data):
2487 2488 """Decompress a revlog chunk.
2488 2489
2489 2490 The chunk is expected to begin with a header identifying the
2490 2491 format type so it can be routed to an appropriate decompressor.
2491 2492 """
2492 2493 if not data:
2493 2494 return data
2494 2495
2495 2496 # Revlogs are read much more frequently than they are written and many
2496 2497 # chunks only take microseconds to decompress, so performance is
2497 2498 # important here.
2498 2499 #
2499 2500 # We can make a few assumptions about revlogs:
2500 2501 #
2501 2502 # 1) the majority of chunks will be compressed (as opposed to inline
2502 2503 # raw data).
2503 2504 # 2) decompressing *any* data will likely by at least 10x slower than
2504 2505 # returning raw inline data.
2505 2506 # 3) we want to prioritize common and officially supported compression
2506 2507 # engines
2507 2508 #
2508 2509 # It follows that we want to optimize for "decompress compressed data
2509 2510 # when encoded with common and officially supported compression engines"
2510 2511 # case over "raw data" and "data encoded by less common or non-official
2511 2512 # compression engines." That is why we have the inline lookup first
2512 2513 # followed by the compengines lookup.
2513 2514 #
2514 2515 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2515 2516 # compressed chunks. And this matters for changelog and manifest reads.
2516 2517 t = data[0:1]
2517 2518
2518 2519 if t == b'x':
2519 2520 try:
2520 2521 return _zlibdecompress(data)
2521 2522 except zlib.error as e:
2522 2523 raise error.RevlogError(
2523 2524 _(b'revlog decompress error: %s')
2524 2525 % stringutil.forcebytestr(e)
2525 2526 )
2526 2527 # '\0' is more common than 'u' so it goes first.
2527 2528 elif t == b'\0':
2528 2529 return data
2529 2530 elif t == b'u':
2530 2531 return util.buffer(data, 1)
2531 2532
2532 2533 compressor = self._get_decompressor(t)
2533 2534
2534 2535 return compressor.decompress(data)
2535 2536
2536 2537 def _addrevision(
2537 2538 self,
2538 2539 node,
2539 2540 rawtext,
2540 2541 transaction,
2541 2542 link,
2542 2543 p1,
2543 2544 p2,
2544 2545 flags,
2545 2546 cachedelta,
2546 2547 alwayscache=False,
2547 2548 deltacomputer=None,
2548 2549 sidedata=None,
2549 2550 ):
2550 2551 """internal function to add revisions to the log
2551 2552
2552 2553 see addrevision for argument descriptions.
2553 2554
2554 2555 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2555 2556
2556 2557 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2557 2558 be used.
2558 2559
2559 2560 invariants:
2560 2561 - rawtext is optional (can be None); if not set, cachedelta must be set.
2561 2562 if both are set, they must correspond to each other.
2562 2563 """
2563 2564 if node == self.nullid:
2564 2565 raise error.RevlogError(
2565 2566 _(b"%s: attempt to add null revision") % self.display_id
2566 2567 )
2567 2568 if (
2568 2569 node == self.nodeconstants.wdirid
2569 2570 or node in self.nodeconstants.wdirfilenodeids
2570 2571 ):
2571 2572 raise error.RevlogError(
2572 2573 _(b"%s: attempt to add wdir revision") % self.display_id
2573 2574 )
2574 2575 if self._writinghandles is None:
2575 2576 msg = b'adding revision outside `revlog._writing` context'
2576 2577 raise error.ProgrammingError(msg)
2577 2578
2578 2579 if self._inline:
2579 2580 fh = self._writinghandles[0]
2580 2581 else:
2581 2582 fh = self._writinghandles[1]
2582 2583
2583 2584 btext = [rawtext]
2584 2585
2585 2586 curr = len(self)
2586 2587 prev = curr - 1
2587 2588
2588 2589 offset = self._get_data_offset(prev)
2589 2590
2590 2591 if self._concurrencychecker:
2591 2592 ifh, dfh, sdfh = self._writinghandles
2592 2593 # XXX no checking for the sidedata file
2593 2594 if self._inline:
2594 2595 # offset is "as if" it were in the .d file, so we need to add on
2595 2596 # the size of the entry metadata.
2596 2597 self._concurrencychecker(
2597 2598 ifh, self._indexfile, offset + curr * self.index.entry_size
2598 2599 )
2599 2600 else:
2600 2601 # Entries in the .i are a consistent size.
2601 2602 self._concurrencychecker(
2602 2603 ifh, self._indexfile, curr * self.index.entry_size
2603 2604 )
2604 2605 self._concurrencychecker(dfh, self._datafile, offset)
2605 2606
2606 2607 p1r, p2r = self.rev(p1), self.rev(p2)
2607 2608
2608 2609 # full versions are inserted when the needed deltas
2609 2610 # become comparable to the uncompressed text
2610 2611 if rawtext is None:
2611 2612 # need rawtext size, before changed by flag processors, which is
2612 2613 # the non-raw size. use revlog explicitly to avoid filelog's extra
2613 2614 # logic that might remove metadata size.
2614 2615 textlen = mdiff.patchedsize(
2615 2616 revlog.size(self, cachedelta[0]), cachedelta[1]
2616 2617 )
2617 2618 else:
2618 2619 textlen = len(rawtext)
2619 2620
2620 2621 if deltacomputer is None:
2621 2622 write_debug = None
2622 2623 if self._debug_delta:
2623 2624 write_debug = transaction._report
2624 2625 deltacomputer = deltautil.deltacomputer(
2625 2626 self, write_debug=write_debug
2626 2627 )
2627 2628
2628 2629 if cachedelta is not None and len(cachedelta) == 2:
2629 2630 # If the cached delta has no information about how it should be
2630 2631 # reused, add the default reuse instruction according to the
2631 2632 # revlog's configuration.
2632 2633 if self._generaldelta and self._lazydeltabase:
2633 2634 delta_base_reuse = DELTA_BASE_REUSE_TRY
2634 2635 else:
2635 2636 delta_base_reuse = DELTA_BASE_REUSE_NO
2636 2637 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2637 2638
2638 2639 revinfo = revlogutils.revisioninfo(
2639 2640 node,
2640 2641 p1,
2641 2642 p2,
2642 2643 btext,
2643 2644 textlen,
2644 2645 cachedelta,
2645 2646 flags,
2646 2647 )
2647 2648
2648 2649 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2649 2650
2650 2651 compression_mode = COMP_MODE_INLINE
2651 2652 if self._docket is not None:
2652 2653 default_comp = self._docket.default_compression_header
2653 2654 r = deltautil.delta_compression(default_comp, deltainfo)
2654 2655 compression_mode, deltainfo = r
2655 2656
2656 2657 sidedata_compression_mode = COMP_MODE_INLINE
2657 2658 if sidedata and self.hassidedata:
2658 2659 sidedata_compression_mode = COMP_MODE_PLAIN
2659 2660 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2660 2661 sidedata_offset = self._docket.sidedata_end
2661 2662 h, comp_sidedata = self.compress(serialized_sidedata)
2662 2663 if (
2663 2664 h != b'u'
2664 2665 and comp_sidedata[0:1] != b'\0'
2665 2666 and len(comp_sidedata) < len(serialized_sidedata)
2666 2667 ):
2667 2668 assert not h
2668 2669 if (
2669 2670 comp_sidedata[0:1]
2670 2671 == self._docket.default_compression_header
2671 2672 ):
2672 2673 sidedata_compression_mode = COMP_MODE_DEFAULT
2673 2674 serialized_sidedata = comp_sidedata
2674 2675 else:
2675 2676 sidedata_compression_mode = COMP_MODE_INLINE
2676 2677 serialized_sidedata = comp_sidedata
2677 2678 else:
2678 2679 serialized_sidedata = b""
2679 2680 # Don't store the offset if the sidedata is empty, that way
2680 2681 # we can easily detect empty sidedata and they will be no different
2681 2682 # than ones we manually add.
2682 2683 sidedata_offset = 0
2683 2684
2684 2685 rank = RANK_UNKNOWN
2685 2686 if self._compute_rank:
2686 2687 if (p1r, p2r) == (nullrev, nullrev):
2687 2688 rank = 1
2688 2689 elif p1r != nullrev and p2r == nullrev:
2689 2690 rank = 1 + self.fast_rank(p1r)
2690 2691 elif p1r == nullrev and p2r != nullrev:
2691 2692 rank = 1 + self.fast_rank(p2r)
2692 2693 else: # merge node
2693 2694 if rustdagop is not None and self.index.rust_ext_compat:
2694 2695 rank = rustdagop.rank(self.index, p1r, p2r)
2695 2696 else:
2696 2697 pmin, pmax = sorted((p1r, p2r))
2697 2698 rank = 1 + self.fast_rank(pmax)
2698 2699 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2699 2700
2700 2701 e = revlogutils.entry(
2701 2702 flags=flags,
2702 2703 data_offset=offset,
2703 2704 data_compressed_length=deltainfo.deltalen,
2704 2705 data_uncompressed_length=textlen,
2705 2706 data_compression_mode=compression_mode,
2706 2707 data_delta_base=deltainfo.base,
2707 2708 link_rev=link,
2708 2709 parent_rev_1=p1r,
2709 2710 parent_rev_2=p2r,
2710 2711 node_id=node,
2711 2712 sidedata_offset=sidedata_offset,
2712 2713 sidedata_compressed_length=len(serialized_sidedata),
2713 2714 sidedata_compression_mode=sidedata_compression_mode,
2714 2715 rank=rank,
2715 2716 )
2716 2717
2717 2718 self.index.append(e)
2718 2719 entry = self.index.entry_binary(curr)
2719 2720 if curr == 0 and self._docket is None:
2720 2721 header = self._format_flags | self._format_version
2721 2722 header = self.index.pack_header(header)
2722 2723 entry = header + entry
2723 2724 self._writeentry(
2724 2725 transaction,
2725 2726 entry,
2726 2727 deltainfo.data,
2727 2728 link,
2728 2729 offset,
2729 2730 serialized_sidedata,
2730 2731 sidedata_offset,
2731 2732 )
2732 2733
2733 2734 rawtext = btext[0]
2734 2735
2735 2736 if alwayscache and rawtext is None:
2736 2737 rawtext = deltacomputer.buildtext(revinfo, fh)
2737 2738
2738 2739 if type(rawtext) == bytes: # only accept immutable objects
2739 2740 self._revisioncache = (node, curr, rawtext)
2740 2741 self._chainbasecache[curr] = deltainfo.chainbase
2741 2742 return curr
2742 2743
2743 2744 def _get_data_offset(self, prev):
2744 2745 """Returns the current offset in the (in-transaction) data file.
2745 2746 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2746 2747 file to store that information: since sidedata can be rewritten to the
2747 2748 end of the data file within a transaction, you can have cases where, for
2748 2749 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2749 2750 to `n - 1`'s sidedata being written after `n`'s data.
2750 2751
2751 2752 TODO cache this in a docket file before getting out of experimental."""
2752 2753 if self._docket is None:
2753 2754 return self.end(prev)
2754 2755 else:
2755 2756 return self._docket.data_end
2756 2757
2757 2758 def _writeentry(
2758 2759 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2759 2760 ):
2760 2761 # Files opened in a+ mode have inconsistent behavior on various
2761 2762 # platforms. Windows requires that a file positioning call be made
2762 2763 # when the file handle transitions between reads and writes. See
2763 2764 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2764 2765 # platforms, Python or the platform itself can be buggy. Some versions
2765 2766 # of Solaris have been observed to not append at the end of the file
2766 2767 # if the file was seeked to before the end. See issue4943 for more.
2767 2768 #
2768 2769 # We work around this issue by inserting a seek() before writing.
2769 2770 # Note: This is likely not necessary on Python 3. However, because
2770 2771 # the file handle is reused for reads and may be seeked there, we need
2771 2772 # to be careful before changing this.
2772 2773 if self._writinghandles is None:
2773 2774 msg = b'adding revision outside `revlog._writing` context'
2774 2775 raise error.ProgrammingError(msg)
2775 2776 ifh, dfh, sdfh = self._writinghandles
2776 2777 if self._docket is None:
2777 2778 ifh.seek(0, os.SEEK_END)
2778 2779 else:
2779 2780 ifh.seek(self._docket.index_end, os.SEEK_SET)
2780 2781 if dfh:
2781 2782 if self._docket is None:
2782 2783 dfh.seek(0, os.SEEK_END)
2783 2784 else:
2784 2785 dfh.seek(self._docket.data_end, os.SEEK_SET)
2785 2786 if sdfh:
2786 2787 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2787 2788
2788 2789 curr = len(self) - 1
2789 2790 if not self._inline:
2790 2791 transaction.add(self._datafile, offset)
2791 2792 if self._sidedatafile:
2792 2793 transaction.add(self._sidedatafile, sidedata_offset)
2793 2794 transaction.add(self._indexfile, curr * len(entry))
2794 2795 if data[0]:
2795 2796 dfh.write(data[0])
2796 2797 dfh.write(data[1])
2797 2798 if sidedata:
2798 2799 sdfh.write(sidedata)
2799 2800 ifh.write(entry)
2800 2801 else:
2801 2802 offset += curr * self.index.entry_size
2802 2803 transaction.add(self._indexfile, offset)
2803 2804 ifh.write(entry)
2804 2805 ifh.write(data[0])
2805 2806 ifh.write(data[1])
2806 2807 assert not sidedata
2807 2808 self._enforceinlinesize(transaction)
2808 2809 if self._docket is not None:
2809 2810 # revlog-v2 always has 3 writing handles, help Pytype
2810 2811 wh1 = self._writinghandles[0]
2811 2812 wh2 = self._writinghandles[1]
2812 2813 wh3 = self._writinghandles[2]
2813 2814 assert wh1 is not None
2814 2815 assert wh2 is not None
2815 2816 assert wh3 is not None
2816 2817 self._docket.index_end = wh1.tell()
2817 2818 self._docket.data_end = wh2.tell()
2818 2819 self._docket.sidedata_end = wh3.tell()
2819 2820
2820 2821 nodemaputil.setup_persistent_nodemap(transaction, self)
2821 2822
2822 2823 def addgroup(
2823 2824 self,
2824 2825 deltas,
2825 2826 linkmapper,
2826 2827 transaction,
2827 2828 alwayscache=False,
2828 2829 addrevisioncb=None,
2829 2830 duplicaterevisioncb=None,
2830 2831 debug_info=None,
2831 2832 delta_base_reuse_policy=None,
2832 2833 ):
2833 2834 """
2834 2835 add a delta group
2835 2836
2836 2837 given a set of deltas, add them to the revision log. the
2837 2838 first delta is against its parent, which should be in our
2838 2839 log, the rest are against the previous delta.
2839 2840
2840 2841 If ``addrevisioncb`` is defined, it will be called with arguments of
2841 2842 this revlog and the node that was added.
2842 2843 """
2843 2844
2844 2845 if self._adding_group:
2845 2846 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2846 2847
2847 2848 # read the default delta-base reuse policy from revlog config if the
2848 2849 # group did not specify one.
2849 2850 if delta_base_reuse_policy is None:
2850 2851 if self._generaldelta and self._lazydeltabase:
2851 2852 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2852 2853 else:
2853 2854 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2854 2855
2855 2856 self._adding_group = True
2856 2857 empty = True
2857 2858 try:
2858 2859 with self._writing(transaction):
2859 2860 write_debug = None
2860 2861 if self._debug_delta:
2861 2862 write_debug = transaction._report
2862 2863 deltacomputer = deltautil.deltacomputer(
2863 2864 self,
2864 2865 write_debug=write_debug,
2865 2866 debug_info=debug_info,
2866 2867 )
2867 2868 # loop through our set of deltas
2868 2869 for data in deltas:
2869 2870 (
2870 2871 node,
2871 2872 p1,
2872 2873 p2,
2873 2874 linknode,
2874 2875 deltabase,
2875 2876 delta,
2876 2877 flags,
2877 2878 sidedata,
2878 2879 ) = data
2879 2880 link = linkmapper(linknode)
2880 2881 flags = flags or REVIDX_DEFAULT_FLAGS
2881 2882
2882 2883 rev = self.index.get_rev(node)
2883 2884 if rev is not None:
2884 2885 # this can happen if two branches make the same change
2885 2886 self._nodeduplicatecallback(transaction, rev)
2886 2887 if duplicaterevisioncb:
2887 2888 duplicaterevisioncb(self, rev)
2888 2889 empty = False
2889 2890 continue
2890 2891
2891 2892 for p in (p1, p2):
2892 2893 if not self.index.has_node(p):
2893 2894 raise error.LookupError(
2894 2895 p, self.radix, _(b'unknown parent')
2895 2896 )
2896 2897
2897 2898 if not self.index.has_node(deltabase):
2898 2899 raise error.LookupError(
2899 2900 deltabase, self.display_id, _(b'unknown delta base')
2900 2901 )
2901 2902
2902 2903 baserev = self.rev(deltabase)
2903 2904
2904 2905 if baserev != nullrev and self.iscensored(baserev):
2905 2906 # if base is censored, delta must be full replacement in a
2906 2907 # single patch operation
2907 2908 hlen = struct.calcsize(b">lll")
2908 2909 oldlen = self.rawsize(baserev)
2909 2910 newlen = len(delta) - hlen
2910 2911 if delta[:hlen] != mdiff.replacediffheader(
2911 2912 oldlen, newlen
2912 2913 ):
2913 2914 raise error.CensoredBaseError(
2914 2915 self.display_id, self.node(baserev)
2915 2916 )
2916 2917
2917 2918 if not flags and self._peek_iscensored(baserev, delta):
2918 2919 flags |= REVIDX_ISCENSORED
2919 2920
2920 2921 # We assume consumers of addrevisioncb will want to retrieve
2921 2922 # the added revision, which will require a call to
2922 2923 # revision(). revision() will fast path if there is a cache
2923 2924 # hit. So, we tell _addrevision() to always cache in this case.
2924 2925 # We're only using addgroup() in the context of changegroup
2925 2926 # generation so the revision data can always be handled as raw
2926 2927 # by the flagprocessor.
2927 2928 rev = self._addrevision(
2928 2929 node,
2929 2930 None,
2930 2931 transaction,
2931 2932 link,
2932 2933 p1,
2933 2934 p2,
2934 2935 flags,
2935 2936 (baserev, delta, delta_base_reuse_policy),
2936 2937 alwayscache=alwayscache,
2937 2938 deltacomputer=deltacomputer,
2938 2939 sidedata=sidedata,
2939 2940 )
2940 2941
2941 2942 if addrevisioncb:
2942 2943 addrevisioncb(self, rev)
2943 2944 empty = False
2944 2945 finally:
2945 2946 self._adding_group = False
2946 2947 return not empty
2947 2948
2948 2949 def iscensored(self, rev):
2949 2950 """Check if a file revision is censored."""
2950 2951 if not self._censorable:
2951 2952 return False
2952 2953
2953 2954 return self.flags(rev) & REVIDX_ISCENSORED
2954 2955
2955 2956 def _peek_iscensored(self, baserev, delta):
2956 2957 """Quickly check if a delta produces a censored revision."""
2957 2958 if not self._censorable:
2958 2959 return False
2959 2960
2960 2961 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2961 2962
2962 2963 def getstrippoint(self, minlink):
2963 2964 """find the minimum rev that must be stripped to strip the linkrev
2964 2965
2965 2966 Returns a tuple containing the minimum rev and a set of all revs that
2966 2967 have linkrevs that will be broken by this strip.
2967 2968 """
2968 2969 return storageutil.resolvestripinfo(
2969 2970 minlink,
2970 2971 len(self) - 1,
2971 2972 self.headrevs(),
2972 2973 self.linkrev,
2973 2974 self.parentrevs,
2974 2975 )
2975 2976
2976 2977 def strip(self, minlink, transaction):
2977 2978 """truncate the revlog on the first revision with a linkrev >= minlink
2978 2979
2979 2980 This function is called when we're stripping revision minlink and
2980 2981 its descendants from the repository.
2981 2982
2982 2983 We have to remove all revisions with linkrev >= minlink, because
2983 2984 the equivalent changelog revisions will be renumbered after the
2984 2985 strip.
2985 2986
2986 2987 So we truncate the revlog on the first of these revisions, and
2987 2988 trust that the caller has saved the revisions that shouldn't be
2988 2989 removed and that it'll re-add them after this truncation.
2989 2990 """
2990 2991 if len(self) == 0:
2991 2992 return
2992 2993
2993 2994 rev, _ = self.getstrippoint(minlink)
2994 2995 if rev == len(self):
2995 2996 return
2996 2997
2997 2998 # first truncate the files on disk
2998 2999 data_end = self.start(rev)
2999 3000 if not self._inline:
3000 3001 transaction.add(self._datafile, data_end)
3001 3002 end = rev * self.index.entry_size
3002 3003 else:
3003 3004 end = data_end + (rev * self.index.entry_size)
3004 3005
3005 3006 if self._sidedatafile:
3006 3007 sidedata_end = self.sidedata_cut_off(rev)
3007 3008 transaction.add(self._sidedatafile, sidedata_end)
3008 3009
3009 3010 transaction.add(self._indexfile, end)
3010 3011 if self._docket is not None:
3011 3012 # XXX we could, leverage the docket while stripping. However it is
3012 3013 # not powerfull enough at the time of this comment
3013 3014 self._docket.index_end = end
3014 3015 self._docket.data_end = data_end
3015 3016 self._docket.sidedata_end = sidedata_end
3016 3017 self._docket.write(transaction, stripping=True)
3017 3018
3018 3019 # then reset internal state in memory to forget those revisions
3019 3020 self._revisioncache = None
3020 3021 self._chaininfocache = util.lrucachedict(500)
3021 3022 self._segmentfile.clear_cache()
3022 3023 self._segmentfile_sidedata.clear_cache()
3023 3024
3024 3025 del self.index[rev:-1]
3025 3026
3026 3027 def checksize(self):
3027 3028 """Check size of index and data files
3028 3029
3029 3030 return a (dd, di) tuple.
3030 3031 - dd: extra bytes for the "data" file
3031 3032 - di: extra bytes for the "index" file
3032 3033
3033 3034 A healthy revlog will return (0, 0).
3034 3035 """
3035 3036 expected = 0
3036 3037 if len(self):
3037 3038 expected = max(0, self.end(len(self) - 1))
3038 3039
3039 3040 try:
3040 3041 with self._datafp() as f:
3041 3042 f.seek(0, io.SEEK_END)
3042 3043 actual = f.tell()
3043 3044 dd = actual - expected
3044 3045 except FileNotFoundError:
3045 3046 dd = 0
3046 3047
3047 3048 try:
3048 3049 f = self.opener(self._indexfile)
3049 3050 f.seek(0, io.SEEK_END)
3050 3051 actual = f.tell()
3051 3052 f.close()
3052 3053 s = self.index.entry_size
3053 3054 i = max(0, actual // s)
3054 3055 di = actual - (i * s)
3055 3056 if self._inline:
3056 3057 databytes = 0
3057 3058 for r in self:
3058 3059 databytes += max(0, self.length(r))
3059 3060 dd = 0
3060 3061 di = actual - len(self) * s - databytes
3061 3062 except FileNotFoundError:
3062 3063 di = 0
3063 3064
3064 3065 return (dd, di)
3065 3066
3066 3067 def files(self):
3067 3068 res = [self._indexfile]
3068 3069 if self._docket_file is None:
3069 3070 if not self._inline:
3070 3071 res.append(self._datafile)
3071 3072 else:
3072 3073 res.append(self._docket_file)
3073 3074 res.extend(self._docket.old_index_filepaths(include_empty=False))
3074 3075 if self._docket.data_end:
3075 3076 res.append(self._datafile)
3076 3077 res.extend(self._docket.old_data_filepaths(include_empty=False))
3077 3078 if self._docket.sidedata_end:
3078 3079 res.append(self._sidedatafile)
3079 3080 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3080 3081 return res
3081 3082
3082 3083 def emitrevisions(
3083 3084 self,
3084 3085 nodes,
3085 3086 nodesorder=None,
3086 3087 revisiondata=False,
3087 3088 assumehaveparentrevisions=False,
3088 3089 deltamode=repository.CG_DELTAMODE_STD,
3089 3090 sidedata_helpers=None,
3090 3091 debug_info=None,
3091 3092 ):
3092 3093 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3093 3094 raise error.ProgrammingError(
3094 3095 b'unhandled value for nodesorder: %s' % nodesorder
3095 3096 )
3096 3097
3097 3098 if nodesorder is None and not self._generaldelta:
3098 3099 nodesorder = b'storage'
3099 3100
3100 3101 if (
3101 3102 not self._storedeltachains
3102 3103 and deltamode != repository.CG_DELTAMODE_PREV
3103 3104 ):
3104 3105 deltamode = repository.CG_DELTAMODE_FULL
3105 3106
3106 3107 return storageutil.emitrevisions(
3107 3108 self,
3108 3109 nodes,
3109 3110 nodesorder,
3110 3111 revlogrevisiondelta,
3111 3112 deltaparentfn=self.deltaparent,
3112 3113 candeltafn=self._candelta,
3113 3114 rawsizefn=self.rawsize,
3114 3115 revdifffn=self.revdiff,
3115 3116 flagsfn=self.flags,
3116 3117 deltamode=deltamode,
3117 3118 revisiondata=revisiondata,
3118 3119 assumehaveparentrevisions=assumehaveparentrevisions,
3119 3120 sidedata_helpers=sidedata_helpers,
3120 3121 debug_info=debug_info,
3121 3122 )
3122 3123
3123 3124 DELTAREUSEALWAYS = b'always'
3124 3125 DELTAREUSESAMEREVS = b'samerevs'
3125 3126 DELTAREUSENEVER = b'never'
3126 3127
3127 3128 DELTAREUSEFULLADD = b'fulladd'
3128 3129
3129 3130 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3130 3131
3131 3132 def clone(
3132 3133 self,
3133 3134 tr,
3134 3135 destrevlog,
3135 3136 addrevisioncb=None,
3136 3137 deltareuse=DELTAREUSESAMEREVS,
3137 3138 forcedeltabothparents=None,
3138 3139 sidedata_helpers=None,
3139 3140 ):
3140 3141 """Copy this revlog to another, possibly with format changes.
3141 3142
3142 3143 The destination revlog will contain the same revisions and nodes.
3143 3144 However, it may not be bit-for-bit identical due to e.g. delta encoding
3144 3145 differences.
3145 3146
3146 3147 The ``deltareuse`` argument control how deltas from the existing revlog
3147 3148 are preserved in the destination revlog. The argument can have the
3148 3149 following values:
3149 3150
3150 3151 DELTAREUSEALWAYS
3151 3152 Deltas will always be reused (if possible), even if the destination
3152 3153 revlog would not select the same revisions for the delta. This is the
3153 3154 fastest mode of operation.
3154 3155 DELTAREUSESAMEREVS
3155 3156 Deltas will be reused if the destination revlog would pick the same
3156 3157 revisions for the delta. This mode strikes a balance between speed
3157 3158 and optimization.
3158 3159 DELTAREUSENEVER
3159 3160 Deltas will never be reused. This is the slowest mode of execution.
3160 3161 This mode can be used to recompute deltas (e.g. if the diff/delta
3161 3162 algorithm changes).
3162 3163 DELTAREUSEFULLADD
3163 3164 Revision will be re-added as if their were new content. This is
3164 3165 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3165 3166 eg: large file detection and handling.
3166 3167
3167 3168 Delta computation can be slow, so the choice of delta reuse policy can
3168 3169 significantly affect run time.
3169 3170
3170 3171 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3171 3172 two extremes. Deltas will be reused if they are appropriate. But if the
3172 3173 delta could choose a better revision, it will do so. This means if you
3173 3174 are converting a non-generaldelta revlog to a generaldelta revlog,
3174 3175 deltas will be recomputed if the delta's parent isn't a parent of the
3175 3176 revision.
3176 3177
3177 3178 In addition to the delta policy, the ``forcedeltabothparents``
3178 3179 argument controls whether to force compute deltas against both parents
3179 3180 for merges. By default, the current default is used.
3180 3181
3181 3182 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3182 3183 `sidedata_helpers`.
3183 3184 """
3184 3185 if deltareuse not in self.DELTAREUSEALL:
3185 3186 raise ValueError(
3186 3187 _(b'value for deltareuse invalid: %s') % deltareuse
3187 3188 )
3188 3189
3189 3190 if len(destrevlog):
3190 3191 raise ValueError(_(b'destination revlog is not empty'))
3191 3192
3192 3193 if getattr(self, 'filteredrevs', None):
3193 3194 raise ValueError(_(b'source revlog has filtered revisions'))
3194 3195 if getattr(destrevlog, 'filteredrevs', None):
3195 3196 raise ValueError(_(b'destination revlog has filtered revisions'))
3196 3197
3197 3198 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3198 3199 # if possible.
3199 3200 oldlazydelta = destrevlog._lazydelta
3200 3201 oldlazydeltabase = destrevlog._lazydeltabase
3201 3202 oldamd = destrevlog._deltabothparents
3202 3203
3203 3204 try:
3204 3205 if deltareuse == self.DELTAREUSEALWAYS:
3205 3206 destrevlog._lazydeltabase = True
3206 3207 destrevlog._lazydelta = True
3207 3208 elif deltareuse == self.DELTAREUSESAMEREVS:
3208 3209 destrevlog._lazydeltabase = False
3209 3210 destrevlog._lazydelta = True
3210 3211 elif deltareuse == self.DELTAREUSENEVER:
3211 3212 destrevlog._lazydeltabase = False
3212 3213 destrevlog._lazydelta = False
3213 3214
3214 3215 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3215 3216
3216 3217 self._clone(
3217 3218 tr,
3218 3219 destrevlog,
3219 3220 addrevisioncb,
3220 3221 deltareuse,
3221 3222 forcedeltabothparents,
3222 3223 sidedata_helpers,
3223 3224 )
3224 3225
3225 3226 finally:
3226 3227 destrevlog._lazydelta = oldlazydelta
3227 3228 destrevlog._lazydeltabase = oldlazydeltabase
3228 3229 destrevlog._deltabothparents = oldamd
3229 3230
3230 3231 def _clone(
3231 3232 self,
3232 3233 tr,
3233 3234 destrevlog,
3234 3235 addrevisioncb,
3235 3236 deltareuse,
3236 3237 forcedeltabothparents,
3237 3238 sidedata_helpers,
3238 3239 ):
3239 3240 """perform the core duty of `revlog.clone` after parameter processing"""
3240 3241 write_debug = None
3241 3242 if self._debug_delta:
3242 3243 write_debug = tr._report
3243 3244 deltacomputer = deltautil.deltacomputer(
3244 3245 destrevlog,
3245 3246 write_debug=write_debug,
3246 3247 )
3247 3248 index = self.index
3248 3249 for rev in self:
3249 3250 entry = index[rev]
3250 3251
3251 3252 # Some classes override linkrev to take filtered revs into
3252 3253 # account. Use raw entry from index.
3253 3254 flags = entry[0] & 0xFFFF
3254 3255 linkrev = entry[4]
3255 3256 p1 = index[entry[5]][7]
3256 3257 p2 = index[entry[6]][7]
3257 3258 node = entry[7]
3258 3259
3259 3260 # (Possibly) reuse the delta from the revlog if allowed and
3260 3261 # the revlog chunk is a delta.
3261 3262 cachedelta = None
3262 3263 rawtext = None
3263 3264 if deltareuse == self.DELTAREUSEFULLADD:
3264 3265 text = self._revisiondata(rev)
3265 3266 sidedata = self.sidedata(rev)
3266 3267
3267 3268 if sidedata_helpers is not None:
3268 3269 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3269 3270 self, sidedata_helpers, sidedata, rev
3270 3271 )
3271 3272 flags = flags | new_flags[0] & ~new_flags[1]
3272 3273
3273 3274 destrevlog.addrevision(
3274 3275 text,
3275 3276 tr,
3276 3277 linkrev,
3277 3278 p1,
3278 3279 p2,
3279 3280 cachedelta=cachedelta,
3280 3281 node=node,
3281 3282 flags=flags,
3282 3283 deltacomputer=deltacomputer,
3283 3284 sidedata=sidedata,
3284 3285 )
3285 3286 else:
3286 3287 if destrevlog._lazydelta:
3287 3288 dp = self.deltaparent(rev)
3288 3289 if dp != nullrev:
3289 3290 cachedelta = (dp, bytes(self._chunk(rev)))
3290 3291
3291 3292 sidedata = None
3292 3293 if not cachedelta:
3293 3294 rawtext = self._revisiondata(rev)
3294 3295 sidedata = self.sidedata(rev)
3295 3296 if sidedata is None:
3296 3297 sidedata = self.sidedata(rev)
3297 3298
3298 3299 if sidedata_helpers is not None:
3299 3300 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3300 3301 self, sidedata_helpers, sidedata, rev
3301 3302 )
3302 3303 flags = flags | new_flags[0] & ~new_flags[1]
3303 3304
3304 3305 with destrevlog._writing(tr):
3305 3306 destrevlog._addrevision(
3306 3307 node,
3307 3308 rawtext,
3308 3309 tr,
3309 3310 linkrev,
3310 3311 p1,
3311 3312 p2,
3312 3313 flags,
3313 3314 cachedelta,
3314 3315 deltacomputer=deltacomputer,
3315 3316 sidedata=sidedata,
3316 3317 )
3317 3318
3318 3319 if addrevisioncb:
3319 3320 addrevisioncb(self, rev, node)
3320 3321
3321 3322 def censorrevision(self, tr, censornode, tombstone=b''):
3322 3323 if self._format_version == REVLOGV0:
3323 3324 raise error.RevlogError(
3324 3325 _(b'cannot censor with version %d revlogs')
3325 3326 % self._format_version
3326 3327 )
3327 3328 elif self._format_version == REVLOGV1:
3328 3329 rewrite.v1_censor(self, tr, censornode, tombstone)
3329 3330 else:
3330 3331 rewrite.v2_censor(self, tr, censornode, tombstone)
3331 3332
3332 3333 def verifyintegrity(self, state):
3333 3334 """Verifies the integrity of the revlog.
3334 3335
3335 3336 Yields ``revlogproblem`` instances describing problems that are
3336 3337 found.
3337 3338 """
3338 3339 dd, di = self.checksize()
3339 3340 if dd:
3340 3341 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3341 3342 if di:
3342 3343 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3343 3344
3344 3345 version = self._format_version
3345 3346
3346 3347 # The verifier tells us what version revlog we should be.
3347 3348 if version != state[b'expectedversion']:
3348 3349 yield revlogproblem(
3349 3350 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3350 3351 % (self.display_id, version, state[b'expectedversion'])
3351 3352 )
3352 3353
3353 3354 state[b'skipread'] = set()
3354 3355 state[b'safe_renamed'] = set()
3355 3356
3356 3357 for rev in self:
3357 3358 node = self.node(rev)
3358 3359
3359 3360 # Verify contents. 4 cases to care about:
3360 3361 #
3361 3362 # common: the most common case
3362 3363 # rename: with a rename
3363 3364 # meta: file content starts with b'\1\n', the metadata
3364 3365 # header defined in filelog.py, but without a rename
3365 3366 # ext: content stored externally
3366 3367 #
3367 3368 # More formally, their differences are shown below:
3368 3369 #
3369 3370 # | common | rename | meta | ext
3370 3371 # -------------------------------------------------------
3371 3372 # flags() | 0 | 0 | 0 | not 0
3372 3373 # renamed() | False | True | False | ?
3373 3374 # rawtext[0:2]=='\1\n'| False | True | True | ?
3374 3375 #
3375 3376 # "rawtext" means the raw text stored in revlog data, which
3376 3377 # could be retrieved by "rawdata(rev)". "text"
3377 3378 # mentioned below is "revision(rev)".
3378 3379 #
3379 3380 # There are 3 different lengths stored physically:
3380 3381 # 1. L1: rawsize, stored in revlog index
3381 3382 # 2. L2: len(rawtext), stored in revlog data
3382 3383 # 3. L3: len(text), stored in revlog data if flags==0, or
3383 3384 # possibly somewhere else if flags!=0
3384 3385 #
3385 3386 # L1 should be equal to L2. L3 could be different from them.
3386 3387 # "text" may or may not affect commit hash depending on flag
3387 3388 # processors (see flagutil.addflagprocessor).
3388 3389 #
3389 3390 # | common | rename | meta | ext
3390 3391 # -------------------------------------------------
3391 3392 # rawsize() | L1 | L1 | L1 | L1
3392 3393 # size() | L1 | L2-LM | L1(*) | L1 (?)
3393 3394 # len(rawtext) | L2 | L2 | L2 | L2
3394 3395 # len(text) | L2 | L2 | L2 | L3
3395 3396 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3396 3397 #
3397 3398 # LM: length of metadata, depending on rawtext
3398 3399 # (*): not ideal, see comment in filelog.size
3399 3400 # (?): could be "- len(meta)" if the resolved content has
3400 3401 # rename metadata
3401 3402 #
3402 3403 # Checks needed to be done:
3403 3404 # 1. length check: L1 == L2, in all cases.
3404 3405 # 2. hash check: depending on flag processor, we may need to
3405 3406 # use either "text" (external), or "rawtext" (in revlog).
3406 3407
3407 3408 try:
3408 3409 skipflags = state.get(b'skipflags', 0)
3409 3410 if skipflags:
3410 3411 skipflags &= self.flags(rev)
3411 3412
3412 3413 _verify_revision(self, skipflags, state, node)
3413 3414
3414 3415 l1 = self.rawsize(rev)
3415 3416 l2 = len(self.rawdata(node))
3416 3417
3417 3418 if l1 != l2:
3418 3419 yield revlogproblem(
3419 3420 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3420 3421 node=node,
3421 3422 )
3422 3423
3423 3424 except error.CensoredNodeError:
3424 3425 if state[b'erroroncensored']:
3425 3426 yield revlogproblem(
3426 3427 error=_(b'censored file data'), node=node
3427 3428 )
3428 3429 state[b'skipread'].add(node)
3429 3430 except Exception as e:
3430 3431 yield revlogproblem(
3431 3432 error=_(b'unpacking %s: %s')
3432 3433 % (short(node), stringutil.forcebytestr(e)),
3433 3434 node=node,
3434 3435 )
3435 3436 state[b'skipread'].add(node)
3436 3437
3437 3438 def storageinfo(
3438 3439 self,
3439 3440 exclusivefiles=False,
3440 3441 sharedfiles=False,
3441 3442 revisionscount=False,
3442 3443 trackedsize=False,
3443 3444 storedsize=False,
3444 3445 ):
3445 3446 d = {}
3446 3447
3447 3448 if exclusivefiles:
3448 3449 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3449 3450 if not self._inline:
3450 3451 d[b'exclusivefiles'].append((self.opener, self._datafile))
3451 3452
3452 3453 if sharedfiles:
3453 3454 d[b'sharedfiles'] = []
3454 3455
3455 3456 if revisionscount:
3456 3457 d[b'revisionscount'] = len(self)
3457 3458
3458 3459 if trackedsize:
3459 3460 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3460 3461
3461 3462 if storedsize:
3462 3463 d[b'storedsize'] = sum(
3463 3464 self.opener.stat(path).st_size for path in self.files()
3464 3465 )
3465 3466
3466 3467 return d
3467 3468
3468 3469 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3469 3470 if not self.hassidedata:
3470 3471 return
3471 3472 # revlog formats with sidedata support does not support inline
3472 3473 assert not self._inline
3473 3474 if not helpers[1] and not helpers[2]:
3474 3475 # Nothing to generate or remove
3475 3476 return
3476 3477
3477 3478 new_entries = []
3478 3479 # append the new sidedata
3479 3480 with self._writing(transaction):
3480 3481 ifh, dfh, sdfh = self._writinghandles
3481 3482 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3482 3483
3483 3484 current_offset = sdfh.tell()
3484 3485 for rev in range(startrev, endrev + 1):
3485 3486 entry = self.index[rev]
3486 3487 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3487 3488 store=self,
3488 3489 sidedata_helpers=helpers,
3489 3490 sidedata={},
3490 3491 rev=rev,
3491 3492 )
3492 3493
3493 3494 serialized_sidedata = sidedatautil.serialize_sidedata(
3494 3495 new_sidedata
3495 3496 )
3496 3497
3497 3498 sidedata_compression_mode = COMP_MODE_INLINE
3498 3499 if serialized_sidedata and self.hassidedata:
3499 3500 sidedata_compression_mode = COMP_MODE_PLAIN
3500 3501 h, comp_sidedata = self.compress(serialized_sidedata)
3501 3502 if (
3502 3503 h != b'u'
3503 3504 and comp_sidedata[0] != b'\0'
3504 3505 and len(comp_sidedata) < len(serialized_sidedata)
3505 3506 ):
3506 3507 assert not h
3507 3508 if (
3508 3509 comp_sidedata[0]
3509 3510 == self._docket.default_compression_header
3510 3511 ):
3511 3512 sidedata_compression_mode = COMP_MODE_DEFAULT
3512 3513 serialized_sidedata = comp_sidedata
3513 3514 else:
3514 3515 sidedata_compression_mode = COMP_MODE_INLINE
3515 3516 serialized_sidedata = comp_sidedata
3516 3517 if entry[8] != 0 or entry[9] != 0:
3517 3518 # rewriting entries that already have sidedata is not
3518 3519 # supported yet, because it introduces garbage data in the
3519 3520 # revlog.
3520 3521 msg = b"rewriting existing sidedata is not supported yet"
3521 3522 raise error.Abort(msg)
3522 3523
3523 3524 # Apply (potential) flags to add and to remove after running
3524 3525 # the sidedata helpers
3525 3526 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3526 3527 entry_update = (
3527 3528 current_offset,
3528 3529 len(serialized_sidedata),
3529 3530 new_offset_flags,
3530 3531 sidedata_compression_mode,
3531 3532 )
3532 3533
3533 3534 # the sidedata computation might have move the file cursors around
3534 3535 sdfh.seek(current_offset, os.SEEK_SET)
3535 3536 sdfh.write(serialized_sidedata)
3536 3537 new_entries.append(entry_update)
3537 3538 current_offset += len(serialized_sidedata)
3538 3539 self._docket.sidedata_end = sdfh.tell()
3539 3540
3540 3541 # rewrite the new index entries
3541 3542 ifh.seek(startrev * self.index.entry_size)
3542 3543 for i, e in enumerate(new_entries):
3543 3544 rev = startrev + i
3544 3545 self.index.replace_sidedata_info(rev, *e)
3545 3546 packed = self.index.entry_binary(rev)
3546 3547 if rev == 0 and self._docket is None:
3547 3548 header = self._format_flags | self._format_version
3548 3549 header = self.index.pack_header(header)
3549 3550 packed = header + packed
3550 3551 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now