##// END OF EJS Templates
revlog: document the `is_inline_index` method...
marmoute -
r51895:4ea60656 default
parent child Browse files
Show More
@@ -1,3529 +1,3530 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class revlog:
245 245 """
246 246 the underlying revision storage object
247 247
248 248 A revlog consists of two parts, an index and the revision data.
249 249
250 250 The index is a file with a fixed record size containing
251 251 information on each revision, including its nodeid (hash), the
252 252 nodeids of its parents, the position and offset of its data within
253 253 the data file, and the revision it's based on. Finally, each entry
254 254 contains a linkrev entry that can serve as a pointer to external
255 255 data.
256 256
257 257 The revision data itself is a linear collection of data chunks.
258 258 Each chunk represents a revision and is usually represented as a
259 259 delta against the previous chunk. To bound lookup time, runs of
260 260 deltas are limited to about 2 times the length of the original
261 261 version data. This makes retrieval of a version proportional to
262 262 its size, or O(1) relative to the number of revisions.
263 263
264 264 Both pieces of the revlog are written to in an append-only
265 265 fashion, which means we never need to rewrite a file to insert or
266 266 remove data, and can use some simple techniques to avoid the need
267 267 for locking while reading.
268 268
269 269 If checkambig, indexfile is opened with checkambig=True at
270 270 writing, to avoid file stat ambiguity.
271 271
272 272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 273 index will be mmapped rather than read if it is larger than the
274 274 configured threshold.
275 275
276 276 If censorable is True, the revlog can have censored revisions.
277 277
278 278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 279 compression for the data content.
280 280
281 281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 282 file handle, a filename, and an expected position. It should check whether
283 283 the current position in the file handle is valid, and log/warn/fail (by
284 284 raising).
285 285
286 286 See mercurial/revlogutils/contants.py for details about the content of an
287 287 index entry.
288 288 """
289 289
290 290 _flagserrorclass = error.RevlogError
291 291
292 292 @staticmethod
293 293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 296
296 297 _format_flags = header & ~0xFFFF
297 298 _format_version = header & 0xFFFF
298 299
299 300 features = FEATURES_BY_VERSION[_format_version]
300 301 return features[b'inline'](_format_flags)
301 302
302 303 def __init__(
303 304 self,
304 305 opener,
305 306 target,
306 307 radix,
307 308 postfix=None, # only exist for `tmpcensored` now
308 309 checkambig=False,
309 310 mmaplargeindex=False,
310 311 censorable=False,
311 312 upperboundcomp=None,
312 313 persistentnodemap=False,
313 314 concurrencychecker=None,
314 315 trypending=False,
315 316 try_split=False,
316 317 canonical_parent_order=True,
317 318 ):
318 319 """
319 320 create a revlog object
320 321
321 322 opener is a function that abstracts the file opening operation
322 323 and can be used to implement COW semantics or the like.
323 324
324 325 `target`: a (KIND, ID) tuple that identify the content stored in
325 326 this revlog. It help the rest of the code to understand what the revlog
326 327 is about without having to resort to heuristic and index filename
327 328 analysis. Note: that this must be reliably be set by normal code, but
328 329 that test, debug, or performance measurement code might not set this to
329 330 accurate value.
330 331 """
331 332 self.upperboundcomp = upperboundcomp
332 333
333 334 self.radix = radix
334 335
335 336 self._docket_file = None
336 337 self._indexfile = None
337 338 self._datafile = None
338 339 self._sidedatafile = None
339 340 self._nodemap_file = None
340 341 self.postfix = postfix
341 342 self._trypending = trypending
342 343 self._try_split = try_split
343 344 self.opener = opener
344 345 if persistentnodemap:
345 346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 347
347 348 assert target[0] in ALL_KINDS
348 349 assert len(target) == 2
349 350 self.target = target
350 351 # When True, indexfile is opened with checkambig=True at writing, to
351 352 # avoid file stat ambiguity.
352 353 self._checkambig = checkambig
353 354 self._mmaplargeindex = mmaplargeindex
354 355 self._censorable = censorable
355 356 # 3-tuple of (node, rev, text) for a raw revision.
356 357 self._revisioncache = None
357 358 # Maps rev to chain base rev.
358 359 self._chainbasecache = util.lrucachedict(100)
359 360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 361 self._chunkcache = (0, b'')
361 362 # How much data to read and cache into the raw revlog data cache.
362 363 self._chunkcachesize = 65536
363 364 self._maxchainlen = None
364 365 self._deltabothparents = True
365 366 self._candidate_group_chunk_size = 0
366 367 self._debug_delta = False
367 368 self.index = None
368 369 self._docket = None
369 370 self._nodemap_docket = None
370 371 # Mapping of partial identifiers to full nodes.
371 372 self._pcache = {}
372 373 # Mapping of revision integer to full node.
373 374 self._compengine = b'zlib'
374 375 self._compengineopts = {}
375 376 self._maxdeltachainspan = -1
376 377 self._withsparseread = False
377 378 self._sparserevlog = False
378 379 self.hassidedata = False
379 380 self._srdensitythreshold = 0.50
380 381 self._srmingapsize = 262144
381 382
382 383 # other optionnals features
383 384
384 385 # might remove rank configuration once the computation has no impact
385 386 self._compute_rank = False
386 387
387 388 # Make copy of flag processors so each revlog instance can support
388 389 # custom flags.
389 390 self._flagprocessors = dict(flagutil.flagprocessors)
390 391
391 392 # 3-tuple of file handles being used for active writing.
392 393 self._writinghandles = None
393 394 # prevent nesting of addgroup
394 395 self._adding_group = None
395 396
396 397 self._loadindex()
397 398
398 399 self._concurrencychecker = concurrencychecker
399 400
400 401 # parent order is supposed to be semantically irrelevant, so we
401 402 # normally resort parents to ensure that the first parent is non-null,
402 403 # if there is a non-null parent at all.
403 404 # filelog abuses the parent order as flag to mark some instances of
404 405 # meta-encoded files, so allow it to disable this behavior.
405 406 self.canonical_parent_order = canonical_parent_order
406 407
407 408 def _init_opts(self):
408 409 """process options (from above/config) to setup associated default revlog mode
409 410
410 411 These values might be affected when actually reading on disk information.
411 412
412 413 The relevant values are returned for use in _loadindex().
413 414
414 415 * newversionflags:
415 416 version header to use if we need to create a new revlog
416 417
417 418 * mmapindexthreshold:
418 419 minimal index size for start to use mmap
419 420
420 421 * force_nodemap:
421 422 force the usage of a "development" version of the nodemap code
422 423 """
423 424 mmapindexthreshold = None
424 425 opts = self.opener.options
425 426
426 427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 428 new_header = CHANGELOGV2
428 429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 430 elif b'revlogv2' in opts:
430 431 new_header = REVLOGV2
431 432 elif b'revlogv1' in opts:
432 433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 434 if b'generaldelta' in opts:
434 435 new_header |= FLAG_GENERALDELTA
435 436 elif b'revlogv0' in self.opener.options:
436 437 new_header = REVLOGV0
437 438 else:
438 439 new_header = REVLOG_DEFAULT_VERSION
439 440
440 441 if b'chunkcachesize' in opts:
441 442 self._chunkcachesize = opts[b'chunkcachesize']
442 443 if b'maxchainlen' in opts:
443 444 self._maxchainlen = opts[b'maxchainlen']
444 445 if b'deltabothparents' in opts:
445 446 self._deltabothparents = opts[b'deltabothparents']
446 447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 448 if dps_cgds:
448 449 self._candidate_group_chunk_size = dps_cgds
449 450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 451 self._lazydeltabase = False
451 452 if self._lazydelta:
452 453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 454 if b'debug-delta' in opts:
454 455 self._debug_delta = opts[b'debug-delta']
455 456 if b'compengine' in opts:
456 457 self._compengine = opts[b'compengine']
457 458 if b'zlib.level' in opts:
458 459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 460 if b'zstd.level' in opts:
460 461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 462 if b'maxdeltachainspan' in opts:
462 463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 468 # sparse-revlog forces sparse-read
468 469 self._withsparseread = self._sparserevlog or withsparseread
469 470 if b'sparse-read-density-threshold' in opts:
470 471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 472 if b'sparse-read-min-gap-size' in opts:
472 473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 474 if opts.get(b'enableellipsis'):
474 475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 476
476 477 # revlog v0 doesn't have flag processors
477 478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 480
480 481 if self._chunkcachesize <= 0:
481 482 raise error.RevlogError(
482 483 _(b'revlog chunk cache size %r is not greater than 0')
483 484 % self._chunkcachesize
484 485 )
485 486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 487 raise error.RevlogError(
487 488 _(b'revlog chunk cache size %r is not a power of 2')
488 489 % self._chunkcachesize
489 490 )
490 491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 492 return new_header, mmapindexthreshold, force_nodemap
492 493
493 494 def _get_data(self, filepath, mmap_threshold, size=None):
494 495 """return a file content with or without mmap
495 496
496 497 If the file is missing return the empty string"""
497 498 try:
498 499 with self.opener(filepath) as fp:
499 500 if mmap_threshold is not None:
500 501 file_size = self.opener.fstat(fp).st_size
501 502 if file_size >= mmap_threshold:
502 503 if size is not None:
503 504 # avoid potentiel mmap crash
504 505 size = min(file_size, size)
505 506 # TODO: should .close() to release resources without
506 507 # relying on Python GC
507 508 if size is None:
508 509 return util.buffer(util.mmapread(fp))
509 510 else:
510 511 return util.buffer(util.mmapread(fp, size))
511 512 if size is None:
512 513 return fp.read()
513 514 else:
514 515 return fp.read(size)
515 516 except FileNotFoundError:
516 517 return b''
517 518
518 519 def get_streams(self, max_linkrev, force_inline=False):
519 520 n = len(self)
520 521 index = self.index
521 522 while n > 0:
522 523 linkrev = index[n - 1][4]
523 524 if linkrev < max_linkrev:
524 525 break
525 526 # note: this loop will rarely go through multiple iterations, since
526 527 # it only traverses commits created during the current streaming
527 528 # pull operation.
528 529 #
529 530 # If this become a problem, using a binary search should cap the
530 531 # runtime of this.
531 532 n = n - 1
532 533 if n == 0:
533 534 # no data to send
534 535 return []
535 536 index_size = n * index.entry_size
536 537 data_size = self.end(n - 1)
537 538
538 539 # XXX we might have been split (or stripped) since the object
539 540 # initialization, We need to close this race too, but having a way to
540 541 # pre-open the file we feed to the revlog and never closing them before
541 542 # we are done streaming.
542 543
543 544 if self._inline:
544 545
545 546 def get_stream():
546 547 with self._indexfp() as fp:
547 548 yield None
548 549 size = index_size + data_size
549 550 if size <= 65536:
550 551 yield fp.read(size)
551 552 else:
552 553 yield from util.filechunkiter(fp, limit=size)
553 554
554 555 inline_stream = get_stream()
555 556 next(inline_stream)
556 557 return [
557 558 (self._indexfile, inline_stream, index_size + data_size),
558 559 ]
559 560 elif force_inline:
560 561
561 562 def get_stream():
562 563 with self._datafp() as fp_d:
563 564 yield None
564 565
565 566 for rev in range(n):
566 567 idx = self.index.entry_binary(rev)
567 568 if rev == 0 and self._docket is None:
568 569 # re-inject the inline flag
569 570 header = self._format_flags
570 571 header |= self._format_version
571 572 header |= FLAG_INLINE_DATA
572 573 header = self.index.pack_header(header)
573 574 idx = header + idx
574 575 yield idx
575 576 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
576 577
577 578 inline_stream = get_stream()
578 579 next(inline_stream)
579 580 return [
580 581 (self._indexfile, inline_stream, index_size + data_size),
581 582 ]
582 583 else:
583 584
584 585 def get_index_stream():
585 586 with self._indexfp() as fp:
586 587 yield None
587 588 if index_size <= 65536:
588 589 yield fp.read(index_size)
589 590 else:
590 591 yield from util.filechunkiter(fp, limit=index_size)
591 592
592 593 def get_data_stream():
593 594 with self._datafp() as fp:
594 595 yield None
595 596 if data_size <= 65536:
596 597 yield fp.read(data_size)
597 598 else:
598 599 yield from util.filechunkiter(fp, limit=data_size)
599 600
600 601 index_stream = get_index_stream()
601 602 next(index_stream)
602 603 data_stream = get_data_stream()
603 604 next(data_stream)
604 605 return [
605 606 (self._datafile, data_stream, data_size),
606 607 (self._indexfile, index_stream, index_size),
607 608 ]
608 609
609 610 def _loadindex(self, docket=None):
610 611
611 612 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
612 613
613 614 if self.postfix is not None:
614 615 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
615 616 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
616 617 entry_point = b'%s.i.a' % self.radix
617 618 elif self._try_split and self.opener.exists(self._split_index_file):
618 619 entry_point = self._split_index_file
619 620 else:
620 621 entry_point = b'%s.i' % self.radix
621 622
622 623 if docket is not None:
623 624 self._docket = docket
624 625 self._docket_file = entry_point
625 626 else:
626 627 self._initempty = True
627 628 entry_data = self._get_data(entry_point, mmapindexthreshold)
628 629 if len(entry_data) > 0:
629 630 header = INDEX_HEADER.unpack(entry_data[:4])[0]
630 631 self._initempty = False
631 632 else:
632 633 header = new_header
633 634
634 635 self._format_flags = header & ~0xFFFF
635 636 self._format_version = header & 0xFFFF
636 637
637 638 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
638 639 if supported_flags is None:
639 640 msg = _(b'unknown version (%d) in revlog %s')
640 641 msg %= (self._format_version, self.display_id)
641 642 raise error.RevlogError(msg)
642 643 elif self._format_flags & ~supported_flags:
643 644 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
644 645 display_flag = self._format_flags >> 16
645 646 msg %= (display_flag, self._format_version, self.display_id)
646 647 raise error.RevlogError(msg)
647 648
648 649 features = FEATURES_BY_VERSION[self._format_version]
649 650 self._inline = features[b'inline'](self._format_flags)
650 651 self._generaldelta = features[b'generaldelta'](self._format_flags)
651 652 self.hassidedata = features[b'sidedata']
652 653
653 654 if not features[b'docket']:
654 655 self._indexfile = entry_point
655 656 index_data = entry_data
656 657 else:
657 658 self._docket_file = entry_point
658 659 if self._initempty:
659 660 self._docket = docketutil.default_docket(self, header)
660 661 else:
661 662 self._docket = docketutil.parse_docket(
662 663 self, entry_data, use_pending=self._trypending
663 664 )
664 665
665 666 if self._docket is not None:
666 667 self._indexfile = self._docket.index_filepath()
667 668 index_data = b''
668 669 index_size = self._docket.index_end
669 670 if index_size > 0:
670 671 index_data = self._get_data(
671 672 self._indexfile, mmapindexthreshold, size=index_size
672 673 )
673 674 if len(index_data) < index_size:
674 675 msg = _(b'too few index data for %s: got %d, expected %d')
675 676 msg %= (self.display_id, len(index_data), index_size)
676 677 raise error.RevlogError(msg)
677 678
678 679 self._inline = False
679 680 # generaldelta implied by version 2 revlogs.
680 681 self._generaldelta = True
681 682 # the logic for persistent nodemap will be dealt with within the
682 683 # main docket, so disable it for now.
683 684 self._nodemap_file = None
684 685
685 686 if self._docket is not None:
686 687 self._datafile = self._docket.data_filepath()
687 688 self._sidedatafile = self._docket.sidedata_filepath()
688 689 elif self.postfix is None:
689 690 self._datafile = b'%s.d' % self.radix
690 691 else:
691 692 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
692 693
693 694 self.nodeconstants = sha1nodeconstants
694 695 self.nullid = self.nodeconstants.nullid
695 696
696 697 # sparse-revlog can't be on without general-delta (issue6056)
697 698 if not self._generaldelta:
698 699 self._sparserevlog = False
699 700
700 701 self._storedeltachains = True
701 702
702 703 devel_nodemap = (
703 704 self._nodemap_file
704 705 and force_nodemap
705 706 and parse_index_v1_nodemap is not None
706 707 )
707 708
708 709 use_rust_index = False
709 710 if rustrevlog is not None:
710 711 if self._nodemap_file is not None:
711 712 use_rust_index = True
712 713 else:
713 714 use_rust_index = self.opener.options.get(b'rust.index')
714 715
715 716 self._parse_index = parse_index_v1
716 717 if self._format_version == REVLOGV0:
717 718 self._parse_index = revlogv0.parse_index_v0
718 719 elif self._format_version == REVLOGV2:
719 720 self._parse_index = parse_index_v2
720 721 elif self._format_version == CHANGELOGV2:
721 722 self._parse_index = parse_index_cl_v2
722 723 elif devel_nodemap:
723 724 self._parse_index = parse_index_v1_nodemap
724 725 elif use_rust_index:
725 726 self._parse_index = parse_index_v1_mixed
726 727 try:
727 728 d = self._parse_index(index_data, self._inline)
728 729 index, chunkcache = d
729 730 use_nodemap = (
730 731 not self._inline
731 732 and self._nodemap_file is not None
732 733 and hasattr(index, 'update_nodemap_data')
733 734 )
734 735 if use_nodemap:
735 736 nodemap_data = nodemaputil.persisted_data(self)
736 737 if nodemap_data is not None:
737 738 docket = nodemap_data[0]
738 739 if (
739 740 len(d[0]) > docket.tip_rev
740 741 and d[0][docket.tip_rev][7] == docket.tip_node
741 742 ):
742 743 # no changelog tampering
743 744 self._nodemap_docket = docket
744 745 index.update_nodemap_data(*nodemap_data)
745 746 except (ValueError, IndexError):
746 747 raise error.RevlogError(
747 748 _(b"index %s is corrupted") % self.display_id
748 749 )
749 750 self.index = index
750 751 self._segmentfile = randomaccessfile.randomaccessfile(
751 752 self.opener,
752 753 (self._indexfile if self._inline else self._datafile),
753 754 self._chunkcachesize,
754 755 chunkcache,
755 756 )
756 757 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
757 758 self.opener,
758 759 self._sidedatafile,
759 760 self._chunkcachesize,
760 761 )
761 762 # revnum -> (chain-length, sum-delta-length)
762 763 self._chaininfocache = util.lrucachedict(500)
763 764 # revlog header -> revlog compressor
764 765 self._decompressors = {}
765 766
766 767 def get_revlog(self):
767 768 """simple function to mirror API of other not-really-revlog API"""
768 769 return self
769 770
770 771 @util.propertycache
771 772 def revlog_kind(self):
772 773 return self.target[0]
773 774
774 775 @util.propertycache
775 776 def display_id(self):
776 777 """The public facing "ID" of the revlog that we use in message"""
777 778 if self.revlog_kind == KIND_FILELOG:
778 779 # Reference the file without the "data/" prefix, so it is familiar
779 780 # to the user.
780 781 return self.target[1]
781 782 else:
782 783 return self.radix
783 784
784 785 def _get_decompressor(self, t):
785 786 try:
786 787 compressor = self._decompressors[t]
787 788 except KeyError:
788 789 try:
789 790 engine = util.compengines.forrevlogheader(t)
790 791 compressor = engine.revlogcompressor(self._compengineopts)
791 792 self._decompressors[t] = compressor
792 793 except KeyError:
793 794 raise error.RevlogError(
794 795 _(b'unknown compression type %s') % binascii.hexlify(t)
795 796 )
796 797 return compressor
797 798
798 799 @util.propertycache
799 800 def _compressor(self):
800 801 engine = util.compengines[self._compengine]
801 802 return engine.revlogcompressor(self._compengineopts)
802 803
803 804 @util.propertycache
804 805 def _decompressor(self):
805 806 """the default decompressor"""
806 807 if self._docket is None:
807 808 return None
808 809 t = self._docket.default_compression_header
809 810 c = self._get_decompressor(t)
810 811 return c.decompress
811 812
812 813 def _indexfp(self):
813 814 """file object for the revlog's index file"""
814 815 return self.opener(self._indexfile, mode=b"r")
815 816
816 817 def __index_write_fp(self):
817 818 # You should not use this directly and use `_writing` instead
818 819 try:
819 820 f = self.opener(
820 821 self._indexfile, mode=b"r+", checkambig=self._checkambig
821 822 )
822 823 if self._docket is None:
823 824 f.seek(0, os.SEEK_END)
824 825 else:
825 826 f.seek(self._docket.index_end, os.SEEK_SET)
826 827 return f
827 828 except FileNotFoundError:
828 829 return self.opener(
829 830 self._indexfile, mode=b"w+", checkambig=self._checkambig
830 831 )
831 832
832 833 def __index_new_fp(self):
833 834 # You should not use this unless you are upgrading from inline revlog
834 835 return self.opener(
835 836 self._indexfile,
836 837 mode=b"w",
837 838 checkambig=self._checkambig,
838 839 atomictemp=True,
839 840 )
840 841
841 842 def _datafp(self, mode=b'r'):
842 843 """file object for the revlog's data file"""
843 844 return self.opener(self._datafile, mode=mode)
844 845
845 846 @contextlib.contextmanager
846 847 def _sidedatareadfp(self):
847 848 """file object suitable to read sidedata"""
848 849 if self._writinghandles:
849 850 yield self._writinghandles[2]
850 851 else:
851 852 with self.opener(self._sidedatafile) as fp:
852 853 yield fp
853 854
854 855 def tiprev(self):
855 856 return len(self.index) - 1
856 857
857 858 def tip(self):
858 859 return self.node(self.tiprev())
859 860
860 861 def __contains__(self, rev):
861 862 return 0 <= rev < len(self)
862 863
863 864 def __len__(self):
864 865 return len(self.index)
865 866
866 867 def __iter__(self):
867 868 return iter(range(len(self)))
868 869
869 870 def revs(self, start=0, stop=None):
870 871 """iterate over all rev in this revlog (from start to stop)"""
871 872 return storageutil.iterrevs(len(self), start=start, stop=stop)
872 873
873 874 def hasnode(self, node):
874 875 try:
875 876 self.rev(node)
876 877 return True
877 878 except KeyError:
878 879 return False
879 880
880 881 def candelta(self, baserev, rev):
881 882 """whether two revisions (baserev, rev) can be delta-ed or not"""
882 883 # Disable delta if either rev requires a content-changing flag
883 884 # processor (ex. LFS). This is because such flag processor can alter
884 885 # the rawtext content that the delta will be based on, and two clients
885 886 # could have a same revlog node with different flags (i.e. different
886 887 # rawtext contents) and the delta could be incompatible.
887 888 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
888 889 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
889 890 ):
890 891 return False
891 892 return True
892 893
893 894 def update_caches(self, transaction):
894 895 if self._nodemap_file is not None:
895 896 if transaction is None:
896 897 nodemaputil.update_persistent_nodemap(self)
897 898 else:
898 899 nodemaputil.setup_persistent_nodemap(transaction, self)
899 900
900 901 def clearcaches(self):
901 902 self._revisioncache = None
902 903 self._chainbasecache.clear()
903 904 self._segmentfile.clear_cache()
904 905 self._segmentfile_sidedata.clear_cache()
905 906 self._pcache = {}
906 907 self._nodemap_docket = None
907 908 self.index.clearcaches()
908 909 # The python code is the one responsible for validating the docket, we
909 910 # end up having to refresh it here.
910 911 use_nodemap = (
911 912 not self._inline
912 913 and self._nodemap_file is not None
913 914 and hasattr(self.index, 'update_nodemap_data')
914 915 )
915 916 if use_nodemap:
916 917 nodemap_data = nodemaputil.persisted_data(self)
917 918 if nodemap_data is not None:
918 919 self._nodemap_docket = nodemap_data[0]
919 920 self.index.update_nodemap_data(*nodemap_data)
920 921
921 922 def rev(self, node):
922 923 try:
923 924 return self.index.rev(node)
924 925 except TypeError:
925 926 raise
926 927 except error.RevlogError:
927 928 # parsers.c radix tree lookup failed
928 929 if (
929 930 node == self.nodeconstants.wdirid
930 931 or node in self.nodeconstants.wdirfilenodeids
931 932 ):
932 933 raise error.WdirUnsupported
933 934 raise error.LookupError(node, self.display_id, _(b'no node'))
934 935
935 936 # Accessors for index entries.
936 937
937 938 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
938 939 # are flags.
939 940 def start(self, rev):
940 941 return int(self.index[rev][0] >> 16)
941 942
942 943 def sidedata_cut_off(self, rev):
943 944 sd_cut_off = self.index[rev][8]
944 945 if sd_cut_off != 0:
945 946 return sd_cut_off
946 947 # This is some annoying dance, because entries without sidedata
947 948 # currently use 0 as their ofsset. (instead of previous-offset +
948 949 # previous-size)
949 950 #
950 951 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
951 952 # In the meantime, we need this.
952 953 while 0 <= rev:
953 954 e = self.index[rev]
954 955 if e[9] != 0:
955 956 return e[8] + e[9]
956 957 rev -= 1
957 958 return 0
958 959
959 960 def flags(self, rev):
960 961 return self.index[rev][0] & 0xFFFF
961 962
962 963 def length(self, rev):
963 964 return self.index[rev][1]
964 965
965 966 def sidedata_length(self, rev):
966 967 if not self.hassidedata:
967 968 return 0
968 969 return self.index[rev][9]
969 970
970 971 def rawsize(self, rev):
971 972 """return the length of the uncompressed text for a given revision"""
972 973 l = self.index[rev][2]
973 974 if l >= 0:
974 975 return l
975 976
976 977 t = self.rawdata(rev)
977 978 return len(t)
978 979
979 980 def size(self, rev):
980 981 """length of non-raw text (processed by a "read" flag processor)"""
981 982 # fast path: if no "read" flag processor could change the content,
982 983 # size is rawsize. note: ELLIPSIS is known to not change the content.
983 984 flags = self.flags(rev)
984 985 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
985 986 return self.rawsize(rev)
986 987
987 988 return len(self.revision(rev))
988 989
989 990 def fast_rank(self, rev):
990 991 """Return the rank of a revision if already known, or None otherwise.
991 992
992 993 The rank of a revision is the size of the sub-graph it defines as a
993 994 head. Equivalently, the rank of a revision `r` is the size of the set
994 995 `ancestors(r)`, `r` included.
995 996
996 997 This method returns the rank retrieved from the revlog in constant
997 998 time. It makes no attempt at computing unknown values for versions of
998 999 the revlog which do not persist the rank.
999 1000 """
1000 1001 rank = self.index[rev][ENTRY_RANK]
1001 1002 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1002 1003 return None
1003 1004 if rev == nullrev:
1004 1005 return 0 # convention
1005 1006 return rank
1006 1007
1007 1008 def chainbase(self, rev):
1008 1009 base = self._chainbasecache.get(rev)
1009 1010 if base is not None:
1010 1011 return base
1011 1012
1012 1013 index = self.index
1013 1014 iterrev = rev
1014 1015 base = index[iterrev][3]
1015 1016 while base != iterrev:
1016 1017 iterrev = base
1017 1018 base = index[iterrev][3]
1018 1019
1019 1020 self._chainbasecache[rev] = base
1020 1021 return base
1021 1022
1022 1023 def linkrev(self, rev):
1023 1024 return self.index[rev][4]
1024 1025
1025 1026 def parentrevs(self, rev):
1026 1027 try:
1027 1028 entry = self.index[rev]
1028 1029 except IndexError:
1029 1030 if rev == wdirrev:
1030 1031 raise error.WdirUnsupported
1031 1032 raise
1032 1033
1033 1034 if self.canonical_parent_order and entry[5] == nullrev:
1034 1035 return entry[6], entry[5]
1035 1036 else:
1036 1037 return entry[5], entry[6]
1037 1038
1038 1039 # fast parentrevs(rev) where rev isn't filtered
1039 1040 _uncheckedparentrevs = parentrevs
1040 1041
1041 1042 def node(self, rev):
1042 1043 try:
1043 1044 return self.index[rev][7]
1044 1045 except IndexError:
1045 1046 if rev == wdirrev:
1046 1047 raise error.WdirUnsupported
1047 1048 raise
1048 1049
1049 1050 # Derived from index values.
1050 1051
1051 1052 def end(self, rev):
1052 1053 return self.start(rev) + self.length(rev)
1053 1054
1054 1055 def parents(self, node):
1055 1056 i = self.index
1056 1057 d = i[self.rev(node)]
1057 1058 # inline node() to avoid function call overhead
1058 1059 if self.canonical_parent_order and d[5] == self.nullid:
1059 1060 return i[d[6]][7], i[d[5]][7]
1060 1061 else:
1061 1062 return i[d[5]][7], i[d[6]][7]
1062 1063
1063 1064 def chainlen(self, rev):
1064 1065 return self._chaininfo(rev)[0]
1065 1066
1066 1067 def _chaininfo(self, rev):
1067 1068 chaininfocache = self._chaininfocache
1068 1069 if rev in chaininfocache:
1069 1070 return chaininfocache[rev]
1070 1071 index = self.index
1071 1072 generaldelta = self._generaldelta
1072 1073 iterrev = rev
1073 1074 e = index[iterrev]
1074 1075 clen = 0
1075 1076 compresseddeltalen = 0
1076 1077 while iterrev != e[3]:
1077 1078 clen += 1
1078 1079 compresseddeltalen += e[1]
1079 1080 if generaldelta:
1080 1081 iterrev = e[3]
1081 1082 else:
1082 1083 iterrev -= 1
1083 1084 if iterrev in chaininfocache:
1084 1085 t = chaininfocache[iterrev]
1085 1086 clen += t[0]
1086 1087 compresseddeltalen += t[1]
1087 1088 break
1088 1089 e = index[iterrev]
1089 1090 else:
1090 1091 # Add text length of base since decompressing that also takes
1091 1092 # work. For cache hits the length is already included.
1092 1093 compresseddeltalen += e[1]
1093 1094 r = (clen, compresseddeltalen)
1094 1095 chaininfocache[rev] = r
1095 1096 return r
1096 1097
1097 1098 def _deltachain(self, rev, stoprev=None):
1098 1099 """Obtain the delta chain for a revision.
1099 1100
1100 1101 ``stoprev`` specifies a revision to stop at. If not specified, we
1101 1102 stop at the base of the chain.
1102 1103
1103 1104 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1104 1105 revs in ascending order and ``stopped`` is a bool indicating whether
1105 1106 ``stoprev`` was hit.
1106 1107 """
1107 1108 # Try C implementation.
1108 1109 try:
1109 1110 return self.index.deltachain(rev, stoprev, self._generaldelta)
1110 1111 except AttributeError:
1111 1112 pass
1112 1113
1113 1114 chain = []
1114 1115
1115 1116 # Alias to prevent attribute lookup in tight loop.
1116 1117 index = self.index
1117 1118 generaldelta = self._generaldelta
1118 1119
1119 1120 iterrev = rev
1120 1121 e = index[iterrev]
1121 1122 while iterrev != e[3] and iterrev != stoprev:
1122 1123 chain.append(iterrev)
1123 1124 if generaldelta:
1124 1125 iterrev = e[3]
1125 1126 else:
1126 1127 iterrev -= 1
1127 1128 e = index[iterrev]
1128 1129
1129 1130 if iterrev == stoprev:
1130 1131 stopped = True
1131 1132 else:
1132 1133 chain.append(iterrev)
1133 1134 stopped = False
1134 1135
1135 1136 chain.reverse()
1136 1137 return chain, stopped
1137 1138
1138 1139 def ancestors(self, revs, stoprev=0, inclusive=False):
1139 1140 """Generate the ancestors of 'revs' in reverse revision order.
1140 1141 Does not generate revs lower than stoprev.
1141 1142
1142 1143 See the documentation for ancestor.lazyancestors for more details."""
1143 1144
1144 1145 # first, make sure start revisions aren't filtered
1145 1146 revs = list(revs)
1146 1147 checkrev = self.node
1147 1148 for r in revs:
1148 1149 checkrev(r)
1149 1150 # and we're sure ancestors aren't filtered as well
1150 1151
1151 1152 if rustancestor is not None and self.index.rust_ext_compat:
1152 1153 lazyancestors = rustancestor.LazyAncestors
1153 1154 arg = self.index
1154 1155 else:
1155 1156 lazyancestors = ancestor.lazyancestors
1156 1157 arg = self._uncheckedparentrevs
1157 1158 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1158 1159
1159 1160 def descendants(self, revs):
1160 1161 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1161 1162
1162 1163 def findcommonmissing(self, common=None, heads=None):
1163 1164 """Return a tuple of the ancestors of common and the ancestors of heads
1164 1165 that are not ancestors of common. In revset terminology, we return the
1165 1166 tuple:
1166 1167
1167 1168 ::common, (::heads) - (::common)
1168 1169
1169 1170 The list is sorted by revision number, meaning it is
1170 1171 topologically sorted.
1171 1172
1172 1173 'heads' and 'common' are both lists of node IDs. If heads is
1173 1174 not supplied, uses all of the revlog's heads. If common is not
1174 1175 supplied, uses nullid."""
1175 1176 if common is None:
1176 1177 common = [self.nullid]
1177 1178 if heads is None:
1178 1179 heads = self.heads()
1179 1180
1180 1181 common = [self.rev(n) for n in common]
1181 1182 heads = [self.rev(n) for n in heads]
1182 1183
1183 1184 # we want the ancestors, but inclusive
1184 1185 class lazyset:
1185 1186 def __init__(self, lazyvalues):
1186 1187 self.addedvalues = set()
1187 1188 self.lazyvalues = lazyvalues
1188 1189
1189 1190 def __contains__(self, value):
1190 1191 return value in self.addedvalues or value in self.lazyvalues
1191 1192
1192 1193 def __iter__(self):
1193 1194 added = self.addedvalues
1194 1195 for r in added:
1195 1196 yield r
1196 1197 for r in self.lazyvalues:
1197 1198 if not r in added:
1198 1199 yield r
1199 1200
1200 1201 def add(self, value):
1201 1202 self.addedvalues.add(value)
1202 1203
1203 1204 def update(self, values):
1204 1205 self.addedvalues.update(values)
1205 1206
1206 1207 has = lazyset(self.ancestors(common))
1207 1208 has.add(nullrev)
1208 1209 has.update(common)
1209 1210
1210 1211 # take all ancestors from heads that aren't in has
1211 1212 missing = set()
1212 1213 visit = collections.deque(r for r in heads if r not in has)
1213 1214 while visit:
1214 1215 r = visit.popleft()
1215 1216 if r in missing:
1216 1217 continue
1217 1218 else:
1218 1219 missing.add(r)
1219 1220 for p in self.parentrevs(r):
1220 1221 if p not in has:
1221 1222 visit.append(p)
1222 1223 missing = list(missing)
1223 1224 missing.sort()
1224 1225 return has, [self.node(miss) for miss in missing]
1225 1226
1226 1227 def incrementalmissingrevs(self, common=None):
1227 1228 """Return an object that can be used to incrementally compute the
1228 1229 revision numbers of the ancestors of arbitrary sets that are not
1229 1230 ancestors of common. This is an ancestor.incrementalmissingancestors
1230 1231 object.
1231 1232
1232 1233 'common' is a list of revision numbers. If common is not supplied, uses
1233 1234 nullrev.
1234 1235 """
1235 1236 if common is None:
1236 1237 common = [nullrev]
1237 1238
1238 1239 if rustancestor is not None and self.index.rust_ext_compat:
1239 1240 return rustancestor.MissingAncestors(self.index, common)
1240 1241 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1241 1242
1242 1243 def findmissingrevs(self, common=None, heads=None):
1243 1244 """Return the revision numbers of the ancestors of heads that
1244 1245 are not ancestors of common.
1245 1246
1246 1247 More specifically, return a list of revision numbers corresponding to
1247 1248 nodes N such that every N satisfies the following constraints:
1248 1249
1249 1250 1. N is an ancestor of some node in 'heads'
1250 1251 2. N is not an ancestor of any node in 'common'
1251 1252
1252 1253 The list is sorted by revision number, meaning it is
1253 1254 topologically sorted.
1254 1255
1255 1256 'heads' and 'common' are both lists of revision numbers. If heads is
1256 1257 not supplied, uses all of the revlog's heads. If common is not
1257 1258 supplied, uses nullid."""
1258 1259 if common is None:
1259 1260 common = [nullrev]
1260 1261 if heads is None:
1261 1262 heads = self.headrevs()
1262 1263
1263 1264 inc = self.incrementalmissingrevs(common=common)
1264 1265 return inc.missingancestors(heads)
1265 1266
1266 1267 def findmissing(self, common=None, heads=None):
1267 1268 """Return the ancestors of heads that are not ancestors of common.
1268 1269
1269 1270 More specifically, return a list of nodes N such that every N
1270 1271 satisfies the following constraints:
1271 1272
1272 1273 1. N is an ancestor of some node in 'heads'
1273 1274 2. N is not an ancestor of any node in 'common'
1274 1275
1275 1276 The list is sorted by revision number, meaning it is
1276 1277 topologically sorted.
1277 1278
1278 1279 'heads' and 'common' are both lists of node IDs. If heads is
1279 1280 not supplied, uses all of the revlog's heads. If common is not
1280 1281 supplied, uses nullid."""
1281 1282 if common is None:
1282 1283 common = [self.nullid]
1283 1284 if heads is None:
1284 1285 heads = self.heads()
1285 1286
1286 1287 common = [self.rev(n) for n in common]
1287 1288 heads = [self.rev(n) for n in heads]
1288 1289
1289 1290 inc = self.incrementalmissingrevs(common=common)
1290 1291 return [self.node(r) for r in inc.missingancestors(heads)]
1291 1292
1292 1293 def nodesbetween(self, roots=None, heads=None):
1293 1294 """Return a topological path from 'roots' to 'heads'.
1294 1295
1295 1296 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1296 1297 topologically sorted list of all nodes N that satisfy both of
1297 1298 these constraints:
1298 1299
1299 1300 1. N is a descendant of some node in 'roots'
1300 1301 2. N is an ancestor of some node in 'heads'
1301 1302
1302 1303 Every node is considered to be both a descendant and an ancestor
1303 1304 of itself, so every reachable node in 'roots' and 'heads' will be
1304 1305 included in 'nodes'.
1305 1306
1306 1307 'outroots' is the list of reachable nodes in 'roots', i.e., the
1307 1308 subset of 'roots' that is returned in 'nodes'. Likewise,
1308 1309 'outheads' is the subset of 'heads' that is also in 'nodes'.
1309 1310
1310 1311 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1311 1312 unspecified, uses nullid as the only root. If 'heads' is
1312 1313 unspecified, uses list of all of the revlog's heads."""
1313 1314 nonodes = ([], [], [])
1314 1315 if roots is not None:
1315 1316 roots = list(roots)
1316 1317 if not roots:
1317 1318 return nonodes
1318 1319 lowestrev = min([self.rev(n) for n in roots])
1319 1320 else:
1320 1321 roots = [self.nullid] # Everybody's a descendant of nullid
1321 1322 lowestrev = nullrev
1322 1323 if (lowestrev == nullrev) and (heads is None):
1323 1324 # We want _all_ the nodes!
1324 1325 return (
1325 1326 [self.node(r) for r in self],
1326 1327 [self.nullid],
1327 1328 list(self.heads()),
1328 1329 )
1329 1330 if heads is None:
1330 1331 # All nodes are ancestors, so the latest ancestor is the last
1331 1332 # node.
1332 1333 highestrev = len(self) - 1
1333 1334 # Set ancestors to None to signal that every node is an ancestor.
1334 1335 ancestors = None
1335 1336 # Set heads to an empty dictionary for later discovery of heads
1336 1337 heads = {}
1337 1338 else:
1338 1339 heads = list(heads)
1339 1340 if not heads:
1340 1341 return nonodes
1341 1342 ancestors = set()
1342 1343 # Turn heads into a dictionary so we can remove 'fake' heads.
1343 1344 # Also, later we will be using it to filter out the heads we can't
1344 1345 # find from roots.
1345 1346 heads = dict.fromkeys(heads, False)
1346 1347 # Start at the top and keep marking parents until we're done.
1347 1348 nodestotag = set(heads)
1348 1349 # Remember where the top was so we can use it as a limit later.
1349 1350 highestrev = max([self.rev(n) for n in nodestotag])
1350 1351 while nodestotag:
1351 1352 # grab a node to tag
1352 1353 n = nodestotag.pop()
1353 1354 # Never tag nullid
1354 1355 if n == self.nullid:
1355 1356 continue
1356 1357 # A node's revision number represents its place in a
1357 1358 # topologically sorted list of nodes.
1358 1359 r = self.rev(n)
1359 1360 if r >= lowestrev:
1360 1361 if n not in ancestors:
1361 1362 # If we are possibly a descendant of one of the roots
1362 1363 # and we haven't already been marked as an ancestor
1363 1364 ancestors.add(n) # Mark as ancestor
1364 1365 # Add non-nullid parents to list of nodes to tag.
1365 1366 nodestotag.update(
1366 1367 [p for p in self.parents(n) if p != self.nullid]
1367 1368 )
1368 1369 elif n in heads: # We've seen it before, is it a fake head?
1369 1370 # So it is, real heads should not be the ancestors of
1370 1371 # any other heads.
1371 1372 heads.pop(n)
1372 1373 if not ancestors:
1373 1374 return nonodes
1374 1375 # Now that we have our set of ancestors, we want to remove any
1375 1376 # roots that are not ancestors.
1376 1377
1377 1378 # If one of the roots was nullid, everything is included anyway.
1378 1379 if lowestrev > nullrev:
1379 1380 # But, since we weren't, let's recompute the lowest rev to not
1380 1381 # include roots that aren't ancestors.
1381 1382
1382 1383 # Filter out roots that aren't ancestors of heads
1383 1384 roots = [root for root in roots if root in ancestors]
1384 1385 # Recompute the lowest revision
1385 1386 if roots:
1386 1387 lowestrev = min([self.rev(root) for root in roots])
1387 1388 else:
1388 1389 # No more roots? Return empty list
1389 1390 return nonodes
1390 1391 else:
1391 1392 # We are descending from nullid, and don't need to care about
1392 1393 # any other roots.
1393 1394 lowestrev = nullrev
1394 1395 roots = [self.nullid]
1395 1396 # Transform our roots list into a set.
1396 1397 descendants = set(roots)
1397 1398 # Also, keep the original roots so we can filter out roots that aren't
1398 1399 # 'real' roots (i.e. are descended from other roots).
1399 1400 roots = descendants.copy()
1400 1401 # Our topologically sorted list of output nodes.
1401 1402 orderedout = []
1402 1403 # Don't start at nullid since we don't want nullid in our output list,
1403 1404 # and if nullid shows up in descendants, empty parents will look like
1404 1405 # they're descendants.
1405 1406 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1406 1407 n = self.node(r)
1407 1408 isdescendant = False
1408 1409 if lowestrev == nullrev: # Everybody is a descendant of nullid
1409 1410 isdescendant = True
1410 1411 elif n in descendants:
1411 1412 # n is already a descendant
1412 1413 isdescendant = True
1413 1414 # This check only needs to be done here because all the roots
1414 1415 # will start being marked is descendants before the loop.
1415 1416 if n in roots:
1416 1417 # If n was a root, check if it's a 'real' root.
1417 1418 p = tuple(self.parents(n))
1418 1419 # If any of its parents are descendants, it's not a root.
1419 1420 if (p[0] in descendants) or (p[1] in descendants):
1420 1421 roots.remove(n)
1421 1422 else:
1422 1423 p = tuple(self.parents(n))
1423 1424 # A node is a descendant if either of its parents are
1424 1425 # descendants. (We seeded the dependents list with the roots
1425 1426 # up there, remember?)
1426 1427 if (p[0] in descendants) or (p[1] in descendants):
1427 1428 descendants.add(n)
1428 1429 isdescendant = True
1429 1430 if isdescendant and ((ancestors is None) or (n in ancestors)):
1430 1431 # Only include nodes that are both descendants and ancestors.
1431 1432 orderedout.append(n)
1432 1433 if (ancestors is not None) and (n in heads):
1433 1434 # We're trying to figure out which heads are reachable
1434 1435 # from roots.
1435 1436 # Mark this head as having been reached
1436 1437 heads[n] = True
1437 1438 elif ancestors is None:
1438 1439 # Otherwise, we're trying to discover the heads.
1439 1440 # Assume this is a head because if it isn't, the next step
1440 1441 # will eventually remove it.
1441 1442 heads[n] = True
1442 1443 # But, obviously its parents aren't.
1443 1444 for p in self.parents(n):
1444 1445 heads.pop(p, None)
1445 1446 heads = [head for head, flag in heads.items() if flag]
1446 1447 roots = list(roots)
1447 1448 assert orderedout
1448 1449 assert roots
1449 1450 assert heads
1450 1451 return (orderedout, roots, heads)
1451 1452
1452 1453 def headrevs(self, revs=None):
1453 1454 if revs is None:
1454 1455 try:
1455 1456 return self.index.headrevs()
1456 1457 except AttributeError:
1457 1458 return self._headrevs()
1458 1459 if rustdagop is not None and self.index.rust_ext_compat:
1459 1460 return rustdagop.headrevs(self.index, revs)
1460 1461 return dagop.headrevs(revs, self._uncheckedparentrevs)
1461 1462
1462 1463 def computephases(self, roots):
1463 1464 return self.index.computephasesmapsets(roots)
1464 1465
1465 1466 def _headrevs(self):
1466 1467 count = len(self)
1467 1468 if not count:
1468 1469 return [nullrev]
1469 1470 # we won't iter over filtered rev so nobody is a head at start
1470 1471 ishead = [0] * (count + 1)
1471 1472 index = self.index
1472 1473 for r in self:
1473 1474 ishead[r] = 1 # I may be an head
1474 1475 e = index[r]
1475 1476 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1476 1477 return [r for r, val in enumerate(ishead) if val]
1477 1478
1478 1479 def heads(self, start=None, stop=None):
1479 1480 """return the list of all nodes that have no children
1480 1481
1481 1482 if start is specified, only heads that are descendants of
1482 1483 start will be returned
1483 1484 if stop is specified, it will consider all the revs from stop
1484 1485 as if they had no children
1485 1486 """
1486 1487 if start is None and stop is None:
1487 1488 if not len(self):
1488 1489 return [self.nullid]
1489 1490 return [self.node(r) for r in self.headrevs()]
1490 1491
1491 1492 if start is None:
1492 1493 start = nullrev
1493 1494 else:
1494 1495 start = self.rev(start)
1495 1496
1496 1497 stoprevs = {self.rev(n) for n in stop or []}
1497 1498
1498 1499 revs = dagop.headrevssubset(
1499 1500 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1500 1501 )
1501 1502
1502 1503 return [self.node(rev) for rev in revs]
1503 1504
1504 1505 def children(self, node):
1505 1506 """find the children of a given node"""
1506 1507 c = []
1507 1508 p = self.rev(node)
1508 1509 for r in self.revs(start=p + 1):
1509 1510 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1510 1511 if prevs:
1511 1512 for pr in prevs:
1512 1513 if pr == p:
1513 1514 c.append(self.node(r))
1514 1515 elif p == nullrev:
1515 1516 c.append(self.node(r))
1516 1517 return c
1517 1518
1518 1519 def commonancestorsheads(self, a, b):
1519 1520 """calculate all the heads of the common ancestors of nodes a and b"""
1520 1521 a, b = self.rev(a), self.rev(b)
1521 1522 ancs = self._commonancestorsheads(a, b)
1522 1523 return pycompat.maplist(self.node, ancs)
1523 1524
1524 1525 def _commonancestorsheads(self, *revs):
1525 1526 """calculate all the heads of the common ancestors of revs"""
1526 1527 try:
1527 1528 ancs = self.index.commonancestorsheads(*revs)
1528 1529 except (AttributeError, OverflowError): # C implementation failed
1529 1530 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1530 1531 return ancs
1531 1532
1532 1533 def isancestor(self, a, b):
1533 1534 """return True if node a is an ancestor of node b
1534 1535
1535 1536 A revision is considered an ancestor of itself."""
1536 1537 a, b = self.rev(a), self.rev(b)
1537 1538 return self.isancestorrev(a, b)
1538 1539
1539 1540 def isancestorrev(self, a, b):
1540 1541 """return True if revision a is an ancestor of revision b
1541 1542
1542 1543 A revision is considered an ancestor of itself.
1543 1544
1544 1545 The implementation of this is trivial but the use of
1545 1546 reachableroots is not."""
1546 1547 if a == nullrev:
1547 1548 return True
1548 1549 elif a == b:
1549 1550 return True
1550 1551 elif a > b:
1551 1552 return False
1552 1553 return bool(self.reachableroots(a, [b], [a], includepath=False))
1553 1554
1554 1555 def reachableroots(self, minroot, heads, roots, includepath=False):
1555 1556 """return (heads(::(<roots> and <roots>::<heads>)))
1556 1557
1557 1558 If includepath is True, return (<roots>::<heads>)."""
1558 1559 try:
1559 1560 return self.index.reachableroots2(
1560 1561 minroot, heads, roots, includepath
1561 1562 )
1562 1563 except AttributeError:
1563 1564 return dagop._reachablerootspure(
1564 1565 self.parentrevs, minroot, roots, heads, includepath
1565 1566 )
1566 1567
1567 1568 def ancestor(self, a, b):
1568 1569 """calculate the "best" common ancestor of nodes a and b"""
1569 1570
1570 1571 a, b = self.rev(a), self.rev(b)
1571 1572 try:
1572 1573 ancs = self.index.ancestors(a, b)
1573 1574 except (AttributeError, OverflowError):
1574 1575 ancs = ancestor.ancestors(self.parentrevs, a, b)
1575 1576 if ancs:
1576 1577 # choose a consistent winner when there's a tie
1577 1578 return min(map(self.node, ancs))
1578 1579 return self.nullid
1579 1580
1580 1581 def _match(self, id):
1581 1582 if isinstance(id, int):
1582 1583 # rev
1583 1584 return self.node(id)
1584 1585 if len(id) == self.nodeconstants.nodelen:
1585 1586 # possibly a binary node
1586 1587 # odds of a binary node being all hex in ASCII are 1 in 10**25
1587 1588 try:
1588 1589 node = id
1589 1590 self.rev(node) # quick search the index
1590 1591 return node
1591 1592 except error.LookupError:
1592 1593 pass # may be partial hex id
1593 1594 try:
1594 1595 # str(rev)
1595 1596 rev = int(id)
1596 1597 if b"%d" % rev != id:
1597 1598 raise ValueError
1598 1599 if rev < 0:
1599 1600 rev = len(self) + rev
1600 1601 if rev < 0 or rev >= len(self):
1601 1602 raise ValueError
1602 1603 return self.node(rev)
1603 1604 except (ValueError, OverflowError):
1604 1605 pass
1605 1606 if len(id) == 2 * self.nodeconstants.nodelen:
1606 1607 try:
1607 1608 # a full hex nodeid?
1608 1609 node = bin(id)
1609 1610 self.rev(node)
1610 1611 return node
1611 1612 except (binascii.Error, error.LookupError):
1612 1613 pass
1613 1614
1614 1615 def _partialmatch(self, id):
1615 1616 # we don't care wdirfilenodeids as they should be always full hash
1616 1617 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1617 1618 ambiguous = False
1618 1619 try:
1619 1620 partial = self.index.partialmatch(id)
1620 1621 if partial and self.hasnode(partial):
1621 1622 if maybewdir:
1622 1623 # single 'ff...' match in radix tree, ambiguous with wdir
1623 1624 ambiguous = True
1624 1625 else:
1625 1626 return partial
1626 1627 elif maybewdir:
1627 1628 # no 'ff...' match in radix tree, wdir identified
1628 1629 raise error.WdirUnsupported
1629 1630 else:
1630 1631 return None
1631 1632 except error.RevlogError:
1632 1633 # parsers.c radix tree lookup gave multiple matches
1633 1634 # fast path: for unfiltered changelog, radix tree is accurate
1634 1635 if not getattr(self, 'filteredrevs', None):
1635 1636 ambiguous = True
1636 1637 # fall through to slow path that filters hidden revisions
1637 1638 except (AttributeError, ValueError):
1638 1639 # we are pure python, or key is not hex
1639 1640 pass
1640 1641 if ambiguous:
1641 1642 raise error.AmbiguousPrefixLookupError(
1642 1643 id, self.display_id, _(b'ambiguous identifier')
1643 1644 )
1644 1645
1645 1646 if id in self._pcache:
1646 1647 return self._pcache[id]
1647 1648
1648 1649 if len(id) <= 40:
1649 1650 # hex(node)[:...]
1650 1651 l = len(id) // 2 * 2 # grab an even number of digits
1651 1652 try:
1652 1653 # we're dropping the last digit, so let's check that it's hex,
1653 1654 # to avoid the expensive computation below if it's not
1654 1655 if len(id) % 2 > 0:
1655 1656 if not (id[-1] in hexdigits):
1656 1657 return None
1657 1658 prefix = bin(id[:l])
1658 1659 except binascii.Error:
1659 1660 pass
1660 1661 else:
1661 1662 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1662 1663 nl = [
1663 1664 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1664 1665 ]
1665 1666 if self.nodeconstants.nullhex.startswith(id):
1666 1667 nl.append(self.nullid)
1667 1668 if len(nl) > 0:
1668 1669 if len(nl) == 1 and not maybewdir:
1669 1670 self._pcache[id] = nl[0]
1670 1671 return nl[0]
1671 1672 raise error.AmbiguousPrefixLookupError(
1672 1673 id, self.display_id, _(b'ambiguous identifier')
1673 1674 )
1674 1675 if maybewdir:
1675 1676 raise error.WdirUnsupported
1676 1677 return None
1677 1678
1678 1679 def lookup(self, id):
1679 1680 """locate a node based on:
1680 1681 - revision number or str(revision number)
1681 1682 - nodeid or subset of hex nodeid
1682 1683 """
1683 1684 n = self._match(id)
1684 1685 if n is not None:
1685 1686 return n
1686 1687 n = self._partialmatch(id)
1687 1688 if n:
1688 1689 return n
1689 1690
1690 1691 raise error.LookupError(id, self.display_id, _(b'no match found'))
1691 1692
1692 1693 def shortest(self, node, minlength=1):
1693 1694 """Find the shortest unambiguous prefix that matches node."""
1694 1695
1695 1696 def isvalid(prefix):
1696 1697 try:
1697 1698 matchednode = self._partialmatch(prefix)
1698 1699 except error.AmbiguousPrefixLookupError:
1699 1700 return False
1700 1701 except error.WdirUnsupported:
1701 1702 # single 'ff...' match
1702 1703 return True
1703 1704 if matchednode is None:
1704 1705 raise error.LookupError(node, self.display_id, _(b'no node'))
1705 1706 return True
1706 1707
1707 1708 def maybewdir(prefix):
1708 1709 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1709 1710
1710 1711 hexnode = hex(node)
1711 1712
1712 1713 def disambiguate(hexnode, minlength):
1713 1714 """Disambiguate against wdirid."""
1714 1715 for length in range(minlength, len(hexnode) + 1):
1715 1716 prefix = hexnode[:length]
1716 1717 if not maybewdir(prefix):
1717 1718 return prefix
1718 1719
1719 1720 if not getattr(self, 'filteredrevs', None):
1720 1721 try:
1721 1722 length = max(self.index.shortest(node), minlength)
1722 1723 return disambiguate(hexnode, length)
1723 1724 except error.RevlogError:
1724 1725 if node != self.nodeconstants.wdirid:
1725 1726 raise error.LookupError(
1726 1727 node, self.display_id, _(b'no node')
1727 1728 )
1728 1729 except AttributeError:
1729 1730 # Fall through to pure code
1730 1731 pass
1731 1732
1732 1733 if node == self.nodeconstants.wdirid:
1733 1734 for length in range(minlength, len(hexnode) + 1):
1734 1735 prefix = hexnode[:length]
1735 1736 if isvalid(prefix):
1736 1737 return prefix
1737 1738
1738 1739 for length in range(minlength, len(hexnode) + 1):
1739 1740 prefix = hexnode[:length]
1740 1741 if isvalid(prefix):
1741 1742 return disambiguate(hexnode, length)
1742 1743
1743 1744 def cmp(self, node, text):
1744 1745 """compare text with a given file revision
1745 1746
1746 1747 returns True if text is different than what is stored.
1747 1748 """
1748 1749 p1, p2 = self.parents(node)
1749 1750 return storageutil.hashrevisionsha1(text, p1, p2) != node
1750 1751
1751 1752 def _getsegmentforrevs(self, startrev, endrev, df=None):
1752 1753 """Obtain a segment of raw data corresponding to a range of revisions.
1753 1754
1754 1755 Accepts the start and end revisions and an optional already-open
1755 1756 file handle to be used for reading. If the file handle is read, its
1756 1757 seek position will not be preserved.
1757 1758
1758 1759 Requests for data may be satisfied by a cache.
1759 1760
1760 1761 Returns a 2-tuple of (offset, data) for the requested range of
1761 1762 revisions. Offset is the integer offset from the beginning of the
1762 1763 revlog and data is a str or buffer of the raw byte data.
1763 1764
1764 1765 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1765 1766 to determine where each revision's data begins and ends.
1766 1767 """
1767 1768 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1768 1769 # (functions are expensive).
1769 1770 index = self.index
1770 1771 istart = index[startrev]
1771 1772 start = int(istart[0] >> 16)
1772 1773 if startrev == endrev:
1773 1774 end = start + istart[1]
1774 1775 else:
1775 1776 iend = index[endrev]
1776 1777 end = int(iend[0] >> 16) + iend[1]
1777 1778
1778 1779 if self._inline:
1779 1780 start += (startrev + 1) * self.index.entry_size
1780 1781 end += (endrev + 1) * self.index.entry_size
1781 1782 length = end - start
1782 1783
1783 1784 return start, self._segmentfile.read_chunk(start, length, df)
1784 1785
1785 1786 def _chunk(self, rev, df=None):
1786 1787 """Obtain a single decompressed chunk for a revision.
1787 1788
1788 1789 Accepts an integer revision and an optional already-open file handle
1789 1790 to be used for reading. If used, the seek position of the file will not
1790 1791 be preserved.
1791 1792
1792 1793 Returns a str holding uncompressed data for the requested revision.
1793 1794 """
1794 1795 compression_mode = self.index[rev][10]
1795 1796 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1796 1797 if compression_mode == COMP_MODE_PLAIN:
1797 1798 return data
1798 1799 elif compression_mode == COMP_MODE_DEFAULT:
1799 1800 return self._decompressor(data)
1800 1801 elif compression_mode == COMP_MODE_INLINE:
1801 1802 return self.decompress(data)
1802 1803 else:
1803 1804 msg = b'unknown compression mode %d'
1804 1805 msg %= compression_mode
1805 1806 raise error.RevlogError(msg)
1806 1807
1807 1808 def _chunks(self, revs, df=None, targetsize=None):
1808 1809 """Obtain decompressed chunks for the specified revisions.
1809 1810
1810 1811 Accepts an iterable of numeric revisions that are assumed to be in
1811 1812 ascending order. Also accepts an optional already-open file handle
1812 1813 to be used for reading. If used, the seek position of the file will
1813 1814 not be preserved.
1814 1815
1815 1816 This function is similar to calling ``self._chunk()`` multiple times,
1816 1817 but is faster.
1817 1818
1818 1819 Returns a list with decompressed data for each requested revision.
1819 1820 """
1820 1821 if not revs:
1821 1822 return []
1822 1823 start = self.start
1823 1824 length = self.length
1824 1825 inline = self._inline
1825 1826 iosize = self.index.entry_size
1826 1827 buffer = util.buffer
1827 1828
1828 1829 l = []
1829 1830 ladd = l.append
1830 1831
1831 1832 if not self._withsparseread:
1832 1833 slicedchunks = (revs,)
1833 1834 else:
1834 1835 slicedchunks = deltautil.slicechunk(
1835 1836 self, revs, targetsize=targetsize
1836 1837 )
1837 1838
1838 1839 for revschunk in slicedchunks:
1839 1840 firstrev = revschunk[0]
1840 1841 # Skip trailing revisions with empty diff
1841 1842 for lastrev in revschunk[::-1]:
1842 1843 if length(lastrev) != 0:
1843 1844 break
1844 1845
1845 1846 try:
1846 1847 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1847 1848 except OverflowError:
1848 1849 # issue4215 - we can't cache a run of chunks greater than
1849 1850 # 2G on Windows
1850 1851 return [self._chunk(rev, df=df) for rev in revschunk]
1851 1852
1852 1853 decomp = self.decompress
1853 1854 # self._decompressor might be None, but will not be used in that case
1854 1855 def_decomp = self._decompressor
1855 1856 for rev in revschunk:
1856 1857 chunkstart = start(rev)
1857 1858 if inline:
1858 1859 chunkstart += (rev + 1) * iosize
1859 1860 chunklength = length(rev)
1860 1861 comp_mode = self.index[rev][10]
1861 1862 c = buffer(data, chunkstart - offset, chunklength)
1862 1863 if comp_mode == COMP_MODE_PLAIN:
1863 1864 ladd(c)
1864 1865 elif comp_mode == COMP_MODE_INLINE:
1865 1866 ladd(decomp(c))
1866 1867 elif comp_mode == COMP_MODE_DEFAULT:
1867 1868 ladd(def_decomp(c))
1868 1869 else:
1869 1870 msg = b'unknown compression mode %d'
1870 1871 msg %= comp_mode
1871 1872 raise error.RevlogError(msg)
1872 1873
1873 1874 return l
1874 1875
1875 1876 def deltaparent(self, rev):
1876 1877 """return deltaparent of the given revision"""
1877 1878 base = self.index[rev][3]
1878 1879 if base == rev:
1879 1880 return nullrev
1880 1881 elif self._generaldelta:
1881 1882 return base
1882 1883 else:
1883 1884 return rev - 1
1884 1885
1885 1886 def issnapshot(self, rev):
1886 1887 """tells whether rev is a snapshot"""
1887 1888 if not self._sparserevlog:
1888 1889 return self.deltaparent(rev) == nullrev
1889 1890 elif hasattr(self.index, 'issnapshot'):
1890 1891 # directly assign the method to cache the testing and access
1891 1892 self.issnapshot = self.index.issnapshot
1892 1893 return self.issnapshot(rev)
1893 1894 if rev == nullrev:
1894 1895 return True
1895 1896 entry = self.index[rev]
1896 1897 base = entry[3]
1897 1898 if base == rev:
1898 1899 return True
1899 1900 if base == nullrev:
1900 1901 return True
1901 1902 p1 = entry[5]
1902 1903 while self.length(p1) == 0:
1903 1904 b = self.deltaparent(p1)
1904 1905 if b == p1:
1905 1906 break
1906 1907 p1 = b
1907 1908 p2 = entry[6]
1908 1909 while self.length(p2) == 0:
1909 1910 b = self.deltaparent(p2)
1910 1911 if b == p2:
1911 1912 break
1912 1913 p2 = b
1913 1914 if base == p1 or base == p2:
1914 1915 return False
1915 1916 return self.issnapshot(base)
1916 1917
1917 1918 def snapshotdepth(self, rev):
1918 1919 """number of snapshot in the chain before this one"""
1919 1920 if not self.issnapshot(rev):
1920 1921 raise error.ProgrammingError(b'revision %d not a snapshot')
1921 1922 return len(self._deltachain(rev)[0]) - 1
1922 1923
1923 1924 def revdiff(self, rev1, rev2):
1924 1925 """return or calculate a delta between two revisions
1925 1926
1926 1927 The delta calculated is in binary form and is intended to be written to
1927 1928 revlog data directly. So this function needs raw revision data.
1928 1929 """
1929 1930 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1930 1931 return bytes(self._chunk(rev2))
1931 1932
1932 1933 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1933 1934
1934 1935 def revision(self, nodeorrev, _df=None):
1935 1936 """return an uncompressed revision of a given node or revision
1936 1937 number.
1937 1938
1938 1939 _df - an existing file handle to read from. (internal-only)
1939 1940 """
1940 1941 return self._revisiondata(nodeorrev, _df)
1941 1942
1942 1943 def sidedata(self, nodeorrev, _df=None):
1943 1944 """a map of extra data related to the changeset but not part of the hash
1944 1945
1945 1946 This function currently return a dictionary. However, more advanced
1946 1947 mapping object will likely be used in the future for a more
1947 1948 efficient/lazy code.
1948 1949 """
1949 1950 # deal with <nodeorrev> argument type
1950 1951 if isinstance(nodeorrev, int):
1951 1952 rev = nodeorrev
1952 1953 else:
1953 1954 rev = self.rev(nodeorrev)
1954 1955 return self._sidedata(rev)
1955 1956
1956 1957 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1957 1958 # deal with <nodeorrev> argument type
1958 1959 if isinstance(nodeorrev, int):
1959 1960 rev = nodeorrev
1960 1961 node = self.node(rev)
1961 1962 else:
1962 1963 node = nodeorrev
1963 1964 rev = None
1964 1965
1965 1966 # fast path the special `nullid` rev
1966 1967 if node == self.nullid:
1967 1968 return b""
1968 1969
1969 1970 # ``rawtext`` is the text as stored inside the revlog. Might be the
1970 1971 # revision or might need to be processed to retrieve the revision.
1971 1972 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1972 1973
1973 1974 if raw and validated:
1974 1975 # if we don't want to process the raw text and that raw
1975 1976 # text is cached, we can exit early.
1976 1977 return rawtext
1977 1978 if rev is None:
1978 1979 rev = self.rev(node)
1979 1980 # the revlog's flag for this revision
1980 1981 # (usually alter its state or content)
1981 1982 flags = self.flags(rev)
1982 1983
1983 1984 if validated and flags == REVIDX_DEFAULT_FLAGS:
1984 1985 # no extra flags set, no flag processor runs, text = rawtext
1985 1986 return rawtext
1986 1987
1987 1988 if raw:
1988 1989 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1989 1990 text = rawtext
1990 1991 else:
1991 1992 r = flagutil.processflagsread(self, rawtext, flags)
1992 1993 text, validatehash = r
1993 1994 if validatehash:
1994 1995 self.checkhash(text, node, rev=rev)
1995 1996 if not validated:
1996 1997 self._revisioncache = (node, rev, rawtext)
1997 1998
1998 1999 return text
1999 2000
2000 2001 def _rawtext(self, node, rev, _df=None):
2001 2002 """return the possibly unvalidated rawtext for a revision
2002 2003
2003 2004 returns (rev, rawtext, validated)
2004 2005 """
2005 2006
2006 2007 # revision in the cache (could be useful to apply delta)
2007 2008 cachedrev = None
2008 2009 # An intermediate text to apply deltas to
2009 2010 basetext = None
2010 2011
2011 2012 # Check if we have the entry in cache
2012 2013 # The cache entry looks like (node, rev, rawtext)
2013 2014 if self._revisioncache:
2014 2015 if self._revisioncache[0] == node:
2015 2016 return (rev, self._revisioncache[2], True)
2016 2017 cachedrev = self._revisioncache[1]
2017 2018
2018 2019 if rev is None:
2019 2020 rev = self.rev(node)
2020 2021
2021 2022 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2022 2023 if stopped:
2023 2024 basetext = self._revisioncache[2]
2024 2025
2025 2026 # drop cache to save memory, the caller is expected to
2026 2027 # update self._revisioncache after validating the text
2027 2028 self._revisioncache = None
2028 2029
2029 2030 targetsize = None
2030 2031 rawsize = self.index[rev][2]
2031 2032 if 0 <= rawsize:
2032 2033 targetsize = 4 * rawsize
2033 2034
2034 2035 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2035 2036 if basetext is None:
2036 2037 basetext = bytes(bins[0])
2037 2038 bins = bins[1:]
2038 2039
2039 2040 rawtext = mdiff.patches(basetext, bins)
2040 2041 del basetext # let us have a chance to free memory early
2041 2042 return (rev, rawtext, False)
2042 2043
2043 2044 def _sidedata(self, rev):
2044 2045 """Return the sidedata for a given revision number."""
2045 2046 index_entry = self.index[rev]
2046 2047 sidedata_offset = index_entry[8]
2047 2048 sidedata_size = index_entry[9]
2048 2049
2049 2050 if self._inline:
2050 2051 sidedata_offset += self.index.entry_size * (1 + rev)
2051 2052 if sidedata_size == 0:
2052 2053 return {}
2053 2054
2054 2055 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2055 2056 filename = self._sidedatafile
2056 2057 end = self._docket.sidedata_end
2057 2058 offset = sidedata_offset
2058 2059 length = sidedata_size
2059 2060 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2060 2061 raise error.RevlogError(m)
2061 2062
2062 2063 comp_segment = self._segmentfile_sidedata.read_chunk(
2063 2064 sidedata_offset, sidedata_size
2064 2065 )
2065 2066
2066 2067 comp = self.index[rev][11]
2067 2068 if comp == COMP_MODE_PLAIN:
2068 2069 segment = comp_segment
2069 2070 elif comp == COMP_MODE_DEFAULT:
2070 2071 segment = self._decompressor(comp_segment)
2071 2072 elif comp == COMP_MODE_INLINE:
2072 2073 segment = self.decompress(comp_segment)
2073 2074 else:
2074 2075 msg = b'unknown compression mode %d'
2075 2076 msg %= comp
2076 2077 raise error.RevlogError(msg)
2077 2078
2078 2079 sidedata = sidedatautil.deserialize_sidedata(segment)
2079 2080 return sidedata
2080 2081
2081 2082 def rawdata(self, nodeorrev, _df=None):
2082 2083 """return an uncompressed raw data of a given node or revision number.
2083 2084
2084 2085 _df - an existing file handle to read from. (internal-only)
2085 2086 """
2086 2087 return self._revisiondata(nodeorrev, _df, raw=True)
2087 2088
2088 2089 def hash(self, text, p1, p2):
2089 2090 """Compute a node hash.
2090 2091
2091 2092 Available as a function so that subclasses can replace the hash
2092 2093 as needed.
2093 2094 """
2094 2095 return storageutil.hashrevisionsha1(text, p1, p2)
2095 2096
2096 2097 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2097 2098 """Check node hash integrity.
2098 2099
2099 2100 Available as a function so that subclasses can extend hash mismatch
2100 2101 behaviors as needed.
2101 2102 """
2102 2103 try:
2103 2104 if p1 is None and p2 is None:
2104 2105 p1, p2 = self.parents(node)
2105 2106 if node != self.hash(text, p1, p2):
2106 2107 # Clear the revision cache on hash failure. The revision cache
2107 2108 # only stores the raw revision and clearing the cache does have
2108 2109 # the side-effect that we won't have a cache hit when the raw
2109 2110 # revision data is accessed. But this case should be rare and
2110 2111 # it is extra work to teach the cache about the hash
2111 2112 # verification state.
2112 2113 if self._revisioncache and self._revisioncache[0] == node:
2113 2114 self._revisioncache = None
2114 2115
2115 2116 revornode = rev
2116 2117 if revornode is None:
2117 2118 revornode = templatefilters.short(hex(node))
2118 2119 raise error.RevlogError(
2119 2120 _(b"integrity check failed on %s:%s")
2120 2121 % (self.display_id, pycompat.bytestr(revornode))
2121 2122 )
2122 2123 except error.RevlogError:
2123 2124 if self._censorable and storageutil.iscensoredtext(text):
2124 2125 raise error.CensoredNodeError(self.display_id, node, text)
2125 2126 raise
2126 2127
2127 2128 @property
2128 2129 def _split_index_file(self):
2129 2130 """the path where to expect the index of an ongoing splitting operation
2130 2131
2131 2132 The file will only exist if a splitting operation is in progress, but
2132 2133 it is always expected at the same location."""
2133 2134 parts = self.radix.split(b'/')
2134 2135 if len(parts) > 1:
2135 2136 # adds a '-s' prefix to the ``data/` or `meta/` base
2136 2137 head = parts[0] + b'-s'
2137 2138 mids = parts[1:-1]
2138 2139 tail = parts[-1] + b'.i'
2139 2140 pieces = [head] + mids + [tail]
2140 2141 return b'/'.join(pieces)
2141 2142 else:
2142 2143 # the revlog is stored at the root of the store (changelog or
2143 2144 # manifest), no risk of collision.
2144 2145 return self.radix + b'.i.s'
2145 2146
2146 2147 def _enforceinlinesize(self, tr, side_write=True):
2147 2148 """Check if the revlog is too big for inline and convert if so.
2148 2149
2149 2150 This should be called after revisions are added to the revlog. If the
2150 2151 revlog has grown too large to be an inline revlog, it will convert it
2151 2152 to use multiple index and data files.
2152 2153 """
2153 2154 tiprev = len(self) - 1
2154 2155 total_size = self.start(tiprev) + self.length(tiprev)
2155 2156 if not self._inline or total_size < _maxinline:
2156 2157 return
2157 2158
2158 2159 troffset = tr.findoffset(self._indexfile)
2159 2160 if troffset is None:
2160 2161 raise error.RevlogError(
2161 2162 _(b"%s not found in the transaction") % self._indexfile
2162 2163 )
2163 2164 if troffset:
2164 2165 tr.addbackup(self._indexfile, for_offset=True)
2165 2166 tr.add(self._datafile, 0)
2166 2167
2167 2168 existing_handles = False
2168 2169 if self._writinghandles is not None:
2169 2170 existing_handles = True
2170 2171 fp = self._writinghandles[0]
2171 2172 fp.flush()
2172 2173 fp.close()
2173 2174 # We can't use the cached file handle after close(). So prevent
2174 2175 # its usage.
2175 2176 self._writinghandles = None
2176 2177 self._segmentfile.writing_handle = None
2177 2178 # No need to deal with sidedata writing handle as it is only
2178 2179 # relevant with revlog-v2 which is never inline, not reaching
2179 2180 # this code
2180 2181 if side_write:
2181 2182 old_index_file_path = self._indexfile
2182 2183 new_index_file_path = self._split_index_file
2183 2184 opener = self.opener
2184 2185 weak_self = weakref.ref(self)
2185 2186
2186 2187 # the "split" index replace the real index when the transaction is finalized
2187 2188 def finalize_callback(tr):
2188 2189 opener.rename(
2189 2190 new_index_file_path,
2190 2191 old_index_file_path,
2191 2192 checkambig=True,
2192 2193 )
2193 2194 maybe_self = weak_self()
2194 2195 if maybe_self is not None:
2195 2196 maybe_self._indexfile = old_index_file_path
2196 2197
2197 2198 def abort_callback(tr):
2198 2199 maybe_self = weak_self()
2199 2200 if maybe_self is not None:
2200 2201 maybe_self._indexfile = old_index_file_path
2201 2202
2202 2203 tr.registertmp(new_index_file_path)
2203 2204 if self.target[1] is not None:
2204 2205 callback_id = b'000-revlog-split-%d-%s' % self.target
2205 2206 else:
2206 2207 callback_id = b'000-revlog-split-%d' % self.target[0]
2207 2208 tr.addfinalize(callback_id, finalize_callback)
2208 2209 tr.addabort(callback_id, abort_callback)
2209 2210
2210 2211 new_dfh = self._datafp(b'w+')
2211 2212 new_dfh.truncate(0) # drop any potentially existing data
2212 2213 try:
2213 2214 with self._indexfp() as read_ifh:
2214 2215 for r in self:
2215 2216 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2216 2217 new_dfh.flush()
2217 2218
2218 2219 if side_write:
2219 2220 self._indexfile = new_index_file_path
2220 2221 with self.__index_new_fp() as fp:
2221 2222 self._format_flags &= ~FLAG_INLINE_DATA
2222 2223 self._inline = False
2223 2224 for i in self:
2224 2225 e = self.index.entry_binary(i)
2225 2226 if i == 0 and self._docket is None:
2226 2227 header = self._format_flags | self._format_version
2227 2228 header = self.index.pack_header(header)
2228 2229 e = header + e
2229 2230 fp.write(e)
2230 2231 if self._docket is not None:
2231 2232 self._docket.index_end = fp.tell()
2232 2233
2233 2234 # If we don't use side-write, the temp file replace the real
2234 2235 # index when we exit the context manager
2235 2236
2236 2237 nodemaputil.setup_persistent_nodemap(tr, self)
2237 2238 self._segmentfile = randomaccessfile.randomaccessfile(
2238 2239 self.opener,
2239 2240 self._datafile,
2240 2241 self._chunkcachesize,
2241 2242 )
2242 2243
2243 2244 if existing_handles:
2244 2245 # switched from inline to conventional reopen the index
2245 2246 ifh = self.__index_write_fp()
2246 2247 self._writinghandles = (ifh, new_dfh, None)
2247 2248 self._segmentfile.writing_handle = new_dfh
2248 2249 new_dfh = None
2249 2250 # No need to deal with sidedata writing handle as it is only
2250 2251 # relevant with revlog-v2 which is never inline, not reaching
2251 2252 # this code
2252 2253 finally:
2253 2254 if new_dfh is not None:
2254 2255 new_dfh.close()
2255 2256
2256 2257 def _nodeduplicatecallback(self, transaction, node):
2257 2258 """called when trying to add a node already stored."""
2258 2259
2259 2260 @contextlib.contextmanager
2260 2261 def reading(self):
2261 2262 """Context manager that keeps data and sidedata files open for reading"""
2262 2263 with self._segmentfile.reading():
2263 2264 with self._segmentfile_sidedata.reading():
2264 2265 yield
2265 2266
2266 2267 @contextlib.contextmanager
2267 2268 def _writing(self, transaction):
2268 2269 if self._trypending:
2269 2270 msg = b'try to write in a `trypending` revlog: %s'
2270 2271 msg %= self.display_id
2271 2272 raise error.ProgrammingError(msg)
2272 2273 if self._writinghandles is not None:
2273 2274 yield
2274 2275 else:
2275 2276 ifh = dfh = sdfh = None
2276 2277 try:
2277 2278 r = len(self)
2278 2279 # opening the data file.
2279 2280 dsize = 0
2280 2281 if r:
2281 2282 dsize = self.end(r - 1)
2282 2283 dfh = None
2283 2284 if not self._inline:
2284 2285 try:
2285 2286 dfh = self._datafp(b"r+")
2286 2287 if self._docket is None:
2287 2288 dfh.seek(0, os.SEEK_END)
2288 2289 else:
2289 2290 dfh.seek(self._docket.data_end, os.SEEK_SET)
2290 2291 except FileNotFoundError:
2291 2292 dfh = self._datafp(b"w+")
2292 2293 transaction.add(self._datafile, dsize)
2293 2294 if self._sidedatafile is not None:
2294 2295 # revlog-v2 does not inline, help Pytype
2295 2296 assert dfh is not None
2296 2297 try:
2297 2298 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2298 2299 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2299 2300 except FileNotFoundError:
2300 2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2301 2302 transaction.add(
2302 2303 self._sidedatafile, self._docket.sidedata_end
2303 2304 )
2304 2305
2305 2306 # opening the index file.
2306 2307 isize = r * self.index.entry_size
2307 2308 ifh = self.__index_write_fp()
2308 2309 if self._inline:
2309 2310 transaction.add(self._indexfile, dsize + isize)
2310 2311 else:
2311 2312 transaction.add(self._indexfile, isize)
2312 2313 # exposing all file handle for writing.
2313 2314 self._writinghandles = (ifh, dfh, sdfh)
2314 2315 self._segmentfile.writing_handle = ifh if self._inline else dfh
2315 2316 self._segmentfile_sidedata.writing_handle = sdfh
2316 2317 yield
2317 2318 if self._docket is not None:
2318 2319 self._write_docket(transaction)
2319 2320 finally:
2320 2321 self._writinghandles = None
2321 2322 self._segmentfile.writing_handle = None
2322 2323 self._segmentfile_sidedata.writing_handle = None
2323 2324 if dfh is not None:
2324 2325 dfh.close()
2325 2326 if sdfh is not None:
2326 2327 sdfh.close()
2327 2328 # closing the index file last to avoid exposing referent to
2328 2329 # potential unflushed data content.
2329 2330 if ifh is not None:
2330 2331 ifh.close()
2331 2332
2332 2333 def _write_docket(self, transaction):
2333 2334 """write the current docket on disk
2334 2335
2335 2336 Exist as a method to help changelog to implement transaction logic
2336 2337
2337 2338 We could also imagine using the same transaction logic for all revlog
2338 2339 since docket are cheap."""
2339 2340 self._docket.write(transaction)
2340 2341
2341 2342 def addrevision(
2342 2343 self,
2343 2344 text,
2344 2345 transaction,
2345 2346 link,
2346 2347 p1,
2347 2348 p2,
2348 2349 cachedelta=None,
2349 2350 node=None,
2350 2351 flags=REVIDX_DEFAULT_FLAGS,
2351 2352 deltacomputer=None,
2352 2353 sidedata=None,
2353 2354 ):
2354 2355 """add a revision to the log
2355 2356
2356 2357 text - the revision data to add
2357 2358 transaction - the transaction object used for rollback
2358 2359 link - the linkrev data to add
2359 2360 p1, p2 - the parent nodeids of the revision
2360 2361 cachedelta - an optional precomputed delta
2361 2362 node - nodeid of revision; typically node is not specified, and it is
2362 2363 computed by default as hash(text, p1, p2), however subclasses might
2363 2364 use different hashing method (and override checkhash() in such case)
2364 2365 flags - the known flags to set on the revision
2365 2366 deltacomputer - an optional deltacomputer instance shared between
2366 2367 multiple calls
2367 2368 """
2368 2369 if link == nullrev:
2369 2370 raise error.RevlogError(
2370 2371 _(b"attempted to add linkrev -1 to %s") % self.display_id
2371 2372 )
2372 2373
2373 2374 if sidedata is None:
2374 2375 sidedata = {}
2375 2376 elif sidedata and not self.hassidedata:
2376 2377 raise error.ProgrammingError(
2377 2378 _(b"trying to add sidedata to a revlog who don't support them")
2378 2379 )
2379 2380
2380 2381 if flags:
2381 2382 node = node or self.hash(text, p1, p2)
2382 2383
2383 2384 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2384 2385
2385 2386 # If the flag processor modifies the revision data, ignore any provided
2386 2387 # cachedelta.
2387 2388 if rawtext != text:
2388 2389 cachedelta = None
2389 2390
2390 2391 if len(rawtext) > _maxentrysize:
2391 2392 raise error.RevlogError(
2392 2393 _(
2393 2394 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2394 2395 )
2395 2396 % (self.display_id, len(rawtext))
2396 2397 )
2397 2398
2398 2399 node = node or self.hash(rawtext, p1, p2)
2399 2400 rev = self.index.get_rev(node)
2400 2401 if rev is not None:
2401 2402 return rev
2402 2403
2403 2404 if validatehash:
2404 2405 self.checkhash(rawtext, node, p1=p1, p2=p2)
2405 2406
2406 2407 return self.addrawrevision(
2407 2408 rawtext,
2408 2409 transaction,
2409 2410 link,
2410 2411 p1,
2411 2412 p2,
2412 2413 node,
2413 2414 flags,
2414 2415 cachedelta=cachedelta,
2415 2416 deltacomputer=deltacomputer,
2416 2417 sidedata=sidedata,
2417 2418 )
2418 2419
2419 2420 def addrawrevision(
2420 2421 self,
2421 2422 rawtext,
2422 2423 transaction,
2423 2424 link,
2424 2425 p1,
2425 2426 p2,
2426 2427 node,
2427 2428 flags,
2428 2429 cachedelta=None,
2429 2430 deltacomputer=None,
2430 2431 sidedata=None,
2431 2432 ):
2432 2433 """add a raw revision with known flags, node and parents
2433 2434 useful when reusing a revision not stored in this revlog (ex: received
2434 2435 over wire, or read from an external bundle).
2435 2436 """
2436 2437 with self._writing(transaction):
2437 2438 return self._addrevision(
2438 2439 node,
2439 2440 rawtext,
2440 2441 transaction,
2441 2442 link,
2442 2443 p1,
2443 2444 p2,
2444 2445 flags,
2445 2446 cachedelta,
2446 2447 deltacomputer=deltacomputer,
2447 2448 sidedata=sidedata,
2448 2449 )
2449 2450
2450 2451 def compress(self, data):
2451 2452 """Generate a possibly-compressed representation of data."""
2452 2453 if not data:
2453 2454 return b'', data
2454 2455
2455 2456 compressed = self._compressor.compress(data)
2456 2457
2457 2458 if compressed:
2458 2459 # The revlog compressor added the header in the returned data.
2459 2460 return b'', compressed
2460 2461
2461 2462 if data[0:1] == b'\0':
2462 2463 return b'', data
2463 2464 return b'u', data
2464 2465
2465 2466 def decompress(self, data):
2466 2467 """Decompress a revlog chunk.
2467 2468
2468 2469 The chunk is expected to begin with a header identifying the
2469 2470 format type so it can be routed to an appropriate decompressor.
2470 2471 """
2471 2472 if not data:
2472 2473 return data
2473 2474
2474 2475 # Revlogs are read much more frequently than they are written and many
2475 2476 # chunks only take microseconds to decompress, so performance is
2476 2477 # important here.
2477 2478 #
2478 2479 # We can make a few assumptions about revlogs:
2479 2480 #
2480 2481 # 1) the majority of chunks will be compressed (as opposed to inline
2481 2482 # raw data).
2482 2483 # 2) decompressing *any* data will likely by at least 10x slower than
2483 2484 # returning raw inline data.
2484 2485 # 3) we want to prioritize common and officially supported compression
2485 2486 # engines
2486 2487 #
2487 2488 # It follows that we want to optimize for "decompress compressed data
2488 2489 # when encoded with common and officially supported compression engines"
2489 2490 # case over "raw data" and "data encoded by less common or non-official
2490 2491 # compression engines." That is why we have the inline lookup first
2491 2492 # followed by the compengines lookup.
2492 2493 #
2493 2494 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2494 2495 # compressed chunks. And this matters for changelog and manifest reads.
2495 2496 t = data[0:1]
2496 2497
2497 2498 if t == b'x':
2498 2499 try:
2499 2500 return _zlibdecompress(data)
2500 2501 except zlib.error as e:
2501 2502 raise error.RevlogError(
2502 2503 _(b'revlog decompress error: %s')
2503 2504 % stringutil.forcebytestr(e)
2504 2505 )
2505 2506 # '\0' is more common than 'u' so it goes first.
2506 2507 elif t == b'\0':
2507 2508 return data
2508 2509 elif t == b'u':
2509 2510 return util.buffer(data, 1)
2510 2511
2511 2512 compressor = self._get_decompressor(t)
2512 2513
2513 2514 return compressor.decompress(data)
2514 2515
2515 2516 def _addrevision(
2516 2517 self,
2517 2518 node,
2518 2519 rawtext,
2519 2520 transaction,
2520 2521 link,
2521 2522 p1,
2522 2523 p2,
2523 2524 flags,
2524 2525 cachedelta,
2525 2526 alwayscache=False,
2526 2527 deltacomputer=None,
2527 2528 sidedata=None,
2528 2529 ):
2529 2530 """internal function to add revisions to the log
2530 2531
2531 2532 see addrevision for argument descriptions.
2532 2533
2533 2534 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2534 2535
2535 2536 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2536 2537 be used.
2537 2538
2538 2539 invariants:
2539 2540 - rawtext is optional (can be None); if not set, cachedelta must be set.
2540 2541 if both are set, they must correspond to each other.
2541 2542 """
2542 2543 if node == self.nullid:
2543 2544 raise error.RevlogError(
2544 2545 _(b"%s: attempt to add null revision") % self.display_id
2545 2546 )
2546 2547 if (
2547 2548 node == self.nodeconstants.wdirid
2548 2549 or node in self.nodeconstants.wdirfilenodeids
2549 2550 ):
2550 2551 raise error.RevlogError(
2551 2552 _(b"%s: attempt to add wdir revision") % self.display_id
2552 2553 )
2553 2554 if self._writinghandles is None:
2554 2555 msg = b'adding revision outside `revlog._writing` context'
2555 2556 raise error.ProgrammingError(msg)
2556 2557
2557 2558 if self._inline:
2558 2559 fh = self._writinghandles[0]
2559 2560 else:
2560 2561 fh = self._writinghandles[1]
2561 2562
2562 2563 btext = [rawtext]
2563 2564
2564 2565 curr = len(self)
2565 2566 prev = curr - 1
2566 2567
2567 2568 offset = self._get_data_offset(prev)
2568 2569
2569 2570 if self._concurrencychecker:
2570 2571 ifh, dfh, sdfh = self._writinghandles
2571 2572 # XXX no checking for the sidedata file
2572 2573 if self._inline:
2573 2574 # offset is "as if" it were in the .d file, so we need to add on
2574 2575 # the size of the entry metadata.
2575 2576 self._concurrencychecker(
2576 2577 ifh, self._indexfile, offset + curr * self.index.entry_size
2577 2578 )
2578 2579 else:
2579 2580 # Entries in the .i are a consistent size.
2580 2581 self._concurrencychecker(
2581 2582 ifh, self._indexfile, curr * self.index.entry_size
2582 2583 )
2583 2584 self._concurrencychecker(dfh, self._datafile, offset)
2584 2585
2585 2586 p1r, p2r = self.rev(p1), self.rev(p2)
2586 2587
2587 2588 # full versions are inserted when the needed deltas
2588 2589 # become comparable to the uncompressed text
2589 2590 if rawtext is None:
2590 2591 # need rawtext size, before changed by flag processors, which is
2591 2592 # the non-raw size. use revlog explicitly to avoid filelog's extra
2592 2593 # logic that might remove metadata size.
2593 2594 textlen = mdiff.patchedsize(
2594 2595 revlog.size(self, cachedelta[0]), cachedelta[1]
2595 2596 )
2596 2597 else:
2597 2598 textlen = len(rawtext)
2598 2599
2599 2600 if deltacomputer is None:
2600 2601 write_debug = None
2601 2602 if self._debug_delta:
2602 2603 write_debug = transaction._report
2603 2604 deltacomputer = deltautil.deltacomputer(
2604 2605 self, write_debug=write_debug
2605 2606 )
2606 2607
2607 2608 if cachedelta is not None and len(cachedelta) == 2:
2608 2609 # If the cached delta has no information about how it should be
2609 2610 # reused, add the default reuse instruction according to the
2610 2611 # revlog's configuration.
2611 2612 if self._generaldelta and self._lazydeltabase:
2612 2613 delta_base_reuse = DELTA_BASE_REUSE_TRY
2613 2614 else:
2614 2615 delta_base_reuse = DELTA_BASE_REUSE_NO
2615 2616 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2616 2617
2617 2618 revinfo = revlogutils.revisioninfo(
2618 2619 node,
2619 2620 p1,
2620 2621 p2,
2621 2622 btext,
2622 2623 textlen,
2623 2624 cachedelta,
2624 2625 flags,
2625 2626 )
2626 2627
2627 2628 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2628 2629
2629 2630 compression_mode = COMP_MODE_INLINE
2630 2631 if self._docket is not None:
2631 2632 default_comp = self._docket.default_compression_header
2632 2633 r = deltautil.delta_compression(default_comp, deltainfo)
2633 2634 compression_mode, deltainfo = r
2634 2635
2635 2636 sidedata_compression_mode = COMP_MODE_INLINE
2636 2637 if sidedata and self.hassidedata:
2637 2638 sidedata_compression_mode = COMP_MODE_PLAIN
2638 2639 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2639 2640 sidedata_offset = self._docket.sidedata_end
2640 2641 h, comp_sidedata = self.compress(serialized_sidedata)
2641 2642 if (
2642 2643 h != b'u'
2643 2644 and comp_sidedata[0:1] != b'\0'
2644 2645 and len(comp_sidedata) < len(serialized_sidedata)
2645 2646 ):
2646 2647 assert not h
2647 2648 if (
2648 2649 comp_sidedata[0:1]
2649 2650 == self._docket.default_compression_header
2650 2651 ):
2651 2652 sidedata_compression_mode = COMP_MODE_DEFAULT
2652 2653 serialized_sidedata = comp_sidedata
2653 2654 else:
2654 2655 sidedata_compression_mode = COMP_MODE_INLINE
2655 2656 serialized_sidedata = comp_sidedata
2656 2657 else:
2657 2658 serialized_sidedata = b""
2658 2659 # Don't store the offset if the sidedata is empty, that way
2659 2660 # we can easily detect empty sidedata and they will be no different
2660 2661 # than ones we manually add.
2661 2662 sidedata_offset = 0
2662 2663
2663 2664 rank = RANK_UNKNOWN
2664 2665 if self._compute_rank:
2665 2666 if (p1r, p2r) == (nullrev, nullrev):
2666 2667 rank = 1
2667 2668 elif p1r != nullrev and p2r == nullrev:
2668 2669 rank = 1 + self.fast_rank(p1r)
2669 2670 elif p1r == nullrev and p2r != nullrev:
2670 2671 rank = 1 + self.fast_rank(p2r)
2671 2672 else: # merge node
2672 2673 if rustdagop is not None and self.index.rust_ext_compat:
2673 2674 rank = rustdagop.rank(self.index, p1r, p2r)
2674 2675 else:
2675 2676 pmin, pmax = sorted((p1r, p2r))
2676 2677 rank = 1 + self.fast_rank(pmax)
2677 2678 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2678 2679
2679 2680 e = revlogutils.entry(
2680 2681 flags=flags,
2681 2682 data_offset=offset,
2682 2683 data_compressed_length=deltainfo.deltalen,
2683 2684 data_uncompressed_length=textlen,
2684 2685 data_compression_mode=compression_mode,
2685 2686 data_delta_base=deltainfo.base,
2686 2687 link_rev=link,
2687 2688 parent_rev_1=p1r,
2688 2689 parent_rev_2=p2r,
2689 2690 node_id=node,
2690 2691 sidedata_offset=sidedata_offset,
2691 2692 sidedata_compressed_length=len(serialized_sidedata),
2692 2693 sidedata_compression_mode=sidedata_compression_mode,
2693 2694 rank=rank,
2694 2695 )
2695 2696
2696 2697 self.index.append(e)
2697 2698 entry = self.index.entry_binary(curr)
2698 2699 if curr == 0 and self._docket is None:
2699 2700 header = self._format_flags | self._format_version
2700 2701 header = self.index.pack_header(header)
2701 2702 entry = header + entry
2702 2703 self._writeentry(
2703 2704 transaction,
2704 2705 entry,
2705 2706 deltainfo.data,
2706 2707 link,
2707 2708 offset,
2708 2709 serialized_sidedata,
2709 2710 sidedata_offset,
2710 2711 )
2711 2712
2712 2713 rawtext = btext[0]
2713 2714
2714 2715 if alwayscache and rawtext is None:
2715 2716 rawtext = deltacomputer.buildtext(revinfo, fh)
2716 2717
2717 2718 if type(rawtext) == bytes: # only accept immutable objects
2718 2719 self._revisioncache = (node, curr, rawtext)
2719 2720 self._chainbasecache[curr] = deltainfo.chainbase
2720 2721 return curr
2721 2722
2722 2723 def _get_data_offset(self, prev):
2723 2724 """Returns the current offset in the (in-transaction) data file.
2724 2725 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2725 2726 file to store that information: since sidedata can be rewritten to the
2726 2727 end of the data file within a transaction, you can have cases where, for
2727 2728 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2728 2729 to `n - 1`'s sidedata being written after `n`'s data.
2729 2730
2730 2731 TODO cache this in a docket file before getting out of experimental."""
2731 2732 if self._docket is None:
2732 2733 return self.end(prev)
2733 2734 else:
2734 2735 return self._docket.data_end
2735 2736
2736 2737 def _writeentry(
2737 2738 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2738 2739 ):
2739 2740 # Files opened in a+ mode have inconsistent behavior on various
2740 2741 # platforms. Windows requires that a file positioning call be made
2741 2742 # when the file handle transitions between reads and writes. See
2742 2743 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2743 2744 # platforms, Python or the platform itself can be buggy. Some versions
2744 2745 # of Solaris have been observed to not append at the end of the file
2745 2746 # if the file was seeked to before the end. See issue4943 for more.
2746 2747 #
2747 2748 # We work around this issue by inserting a seek() before writing.
2748 2749 # Note: This is likely not necessary on Python 3. However, because
2749 2750 # the file handle is reused for reads and may be seeked there, we need
2750 2751 # to be careful before changing this.
2751 2752 if self._writinghandles is None:
2752 2753 msg = b'adding revision outside `revlog._writing` context'
2753 2754 raise error.ProgrammingError(msg)
2754 2755 ifh, dfh, sdfh = self._writinghandles
2755 2756 if self._docket is None:
2756 2757 ifh.seek(0, os.SEEK_END)
2757 2758 else:
2758 2759 ifh.seek(self._docket.index_end, os.SEEK_SET)
2759 2760 if dfh:
2760 2761 if self._docket is None:
2761 2762 dfh.seek(0, os.SEEK_END)
2762 2763 else:
2763 2764 dfh.seek(self._docket.data_end, os.SEEK_SET)
2764 2765 if sdfh:
2765 2766 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2766 2767
2767 2768 curr = len(self) - 1
2768 2769 if not self._inline:
2769 2770 transaction.add(self._datafile, offset)
2770 2771 if self._sidedatafile:
2771 2772 transaction.add(self._sidedatafile, sidedata_offset)
2772 2773 transaction.add(self._indexfile, curr * len(entry))
2773 2774 if data[0]:
2774 2775 dfh.write(data[0])
2775 2776 dfh.write(data[1])
2776 2777 if sidedata:
2777 2778 sdfh.write(sidedata)
2778 2779 ifh.write(entry)
2779 2780 else:
2780 2781 offset += curr * self.index.entry_size
2781 2782 transaction.add(self._indexfile, offset)
2782 2783 ifh.write(entry)
2783 2784 ifh.write(data[0])
2784 2785 ifh.write(data[1])
2785 2786 assert not sidedata
2786 2787 self._enforceinlinesize(transaction)
2787 2788 if self._docket is not None:
2788 2789 # revlog-v2 always has 3 writing handles, help Pytype
2789 2790 wh1 = self._writinghandles[0]
2790 2791 wh2 = self._writinghandles[1]
2791 2792 wh3 = self._writinghandles[2]
2792 2793 assert wh1 is not None
2793 2794 assert wh2 is not None
2794 2795 assert wh3 is not None
2795 2796 self._docket.index_end = wh1.tell()
2796 2797 self._docket.data_end = wh2.tell()
2797 2798 self._docket.sidedata_end = wh3.tell()
2798 2799
2799 2800 nodemaputil.setup_persistent_nodemap(transaction, self)
2800 2801
2801 2802 def addgroup(
2802 2803 self,
2803 2804 deltas,
2804 2805 linkmapper,
2805 2806 transaction,
2806 2807 alwayscache=False,
2807 2808 addrevisioncb=None,
2808 2809 duplicaterevisioncb=None,
2809 2810 debug_info=None,
2810 2811 delta_base_reuse_policy=None,
2811 2812 ):
2812 2813 """
2813 2814 add a delta group
2814 2815
2815 2816 given a set of deltas, add them to the revision log. the
2816 2817 first delta is against its parent, which should be in our
2817 2818 log, the rest are against the previous delta.
2818 2819
2819 2820 If ``addrevisioncb`` is defined, it will be called with arguments of
2820 2821 this revlog and the node that was added.
2821 2822 """
2822 2823
2823 2824 if self._adding_group:
2824 2825 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2825 2826
2826 2827 # read the default delta-base reuse policy from revlog config if the
2827 2828 # group did not specify one.
2828 2829 if delta_base_reuse_policy is None:
2829 2830 if self._generaldelta and self._lazydeltabase:
2830 2831 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2831 2832 else:
2832 2833 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2833 2834
2834 2835 self._adding_group = True
2835 2836 empty = True
2836 2837 try:
2837 2838 with self._writing(transaction):
2838 2839 write_debug = None
2839 2840 if self._debug_delta:
2840 2841 write_debug = transaction._report
2841 2842 deltacomputer = deltautil.deltacomputer(
2842 2843 self,
2843 2844 write_debug=write_debug,
2844 2845 debug_info=debug_info,
2845 2846 )
2846 2847 # loop through our set of deltas
2847 2848 for data in deltas:
2848 2849 (
2849 2850 node,
2850 2851 p1,
2851 2852 p2,
2852 2853 linknode,
2853 2854 deltabase,
2854 2855 delta,
2855 2856 flags,
2856 2857 sidedata,
2857 2858 ) = data
2858 2859 link = linkmapper(linknode)
2859 2860 flags = flags or REVIDX_DEFAULT_FLAGS
2860 2861
2861 2862 rev = self.index.get_rev(node)
2862 2863 if rev is not None:
2863 2864 # this can happen if two branches make the same change
2864 2865 self._nodeduplicatecallback(transaction, rev)
2865 2866 if duplicaterevisioncb:
2866 2867 duplicaterevisioncb(self, rev)
2867 2868 empty = False
2868 2869 continue
2869 2870
2870 2871 for p in (p1, p2):
2871 2872 if not self.index.has_node(p):
2872 2873 raise error.LookupError(
2873 2874 p, self.radix, _(b'unknown parent')
2874 2875 )
2875 2876
2876 2877 if not self.index.has_node(deltabase):
2877 2878 raise error.LookupError(
2878 2879 deltabase, self.display_id, _(b'unknown delta base')
2879 2880 )
2880 2881
2881 2882 baserev = self.rev(deltabase)
2882 2883
2883 2884 if baserev != nullrev and self.iscensored(baserev):
2884 2885 # if base is censored, delta must be full replacement in a
2885 2886 # single patch operation
2886 2887 hlen = struct.calcsize(b">lll")
2887 2888 oldlen = self.rawsize(baserev)
2888 2889 newlen = len(delta) - hlen
2889 2890 if delta[:hlen] != mdiff.replacediffheader(
2890 2891 oldlen, newlen
2891 2892 ):
2892 2893 raise error.CensoredBaseError(
2893 2894 self.display_id, self.node(baserev)
2894 2895 )
2895 2896
2896 2897 if not flags and self._peek_iscensored(baserev, delta):
2897 2898 flags |= REVIDX_ISCENSORED
2898 2899
2899 2900 # We assume consumers of addrevisioncb will want to retrieve
2900 2901 # the added revision, which will require a call to
2901 2902 # revision(). revision() will fast path if there is a cache
2902 2903 # hit. So, we tell _addrevision() to always cache in this case.
2903 2904 # We're only using addgroup() in the context of changegroup
2904 2905 # generation so the revision data can always be handled as raw
2905 2906 # by the flagprocessor.
2906 2907 rev = self._addrevision(
2907 2908 node,
2908 2909 None,
2909 2910 transaction,
2910 2911 link,
2911 2912 p1,
2912 2913 p2,
2913 2914 flags,
2914 2915 (baserev, delta, delta_base_reuse_policy),
2915 2916 alwayscache=alwayscache,
2916 2917 deltacomputer=deltacomputer,
2917 2918 sidedata=sidedata,
2918 2919 )
2919 2920
2920 2921 if addrevisioncb:
2921 2922 addrevisioncb(self, rev)
2922 2923 empty = False
2923 2924 finally:
2924 2925 self._adding_group = False
2925 2926 return not empty
2926 2927
2927 2928 def iscensored(self, rev):
2928 2929 """Check if a file revision is censored."""
2929 2930 if not self._censorable:
2930 2931 return False
2931 2932
2932 2933 return self.flags(rev) & REVIDX_ISCENSORED
2933 2934
2934 2935 def _peek_iscensored(self, baserev, delta):
2935 2936 """Quickly check if a delta produces a censored revision."""
2936 2937 if not self._censorable:
2937 2938 return False
2938 2939
2939 2940 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2940 2941
2941 2942 def getstrippoint(self, minlink):
2942 2943 """find the minimum rev that must be stripped to strip the linkrev
2943 2944
2944 2945 Returns a tuple containing the minimum rev and a set of all revs that
2945 2946 have linkrevs that will be broken by this strip.
2946 2947 """
2947 2948 return storageutil.resolvestripinfo(
2948 2949 minlink,
2949 2950 len(self) - 1,
2950 2951 self.headrevs(),
2951 2952 self.linkrev,
2952 2953 self.parentrevs,
2953 2954 )
2954 2955
2955 2956 def strip(self, minlink, transaction):
2956 2957 """truncate the revlog on the first revision with a linkrev >= minlink
2957 2958
2958 2959 This function is called when we're stripping revision minlink and
2959 2960 its descendants from the repository.
2960 2961
2961 2962 We have to remove all revisions with linkrev >= minlink, because
2962 2963 the equivalent changelog revisions will be renumbered after the
2963 2964 strip.
2964 2965
2965 2966 So we truncate the revlog on the first of these revisions, and
2966 2967 trust that the caller has saved the revisions that shouldn't be
2967 2968 removed and that it'll re-add them after this truncation.
2968 2969 """
2969 2970 if len(self) == 0:
2970 2971 return
2971 2972
2972 2973 rev, _ = self.getstrippoint(minlink)
2973 2974 if rev == len(self):
2974 2975 return
2975 2976
2976 2977 # first truncate the files on disk
2977 2978 data_end = self.start(rev)
2978 2979 if not self._inline:
2979 2980 transaction.add(self._datafile, data_end)
2980 2981 end = rev * self.index.entry_size
2981 2982 else:
2982 2983 end = data_end + (rev * self.index.entry_size)
2983 2984
2984 2985 if self._sidedatafile:
2985 2986 sidedata_end = self.sidedata_cut_off(rev)
2986 2987 transaction.add(self._sidedatafile, sidedata_end)
2987 2988
2988 2989 transaction.add(self._indexfile, end)
2989 2990 if self._docket is not None:
2990 2991 # XXX we could, leverage the docket while stripping. However it is
2991 2992 # not powerfull enough at the time of this comment
2992 2993 self._docket.index_end = end
2993 2994 self._docket.data_end = data_end
2994 2995 self._docket.sidedata_end = sidedata_end
2995 2996 self._docket.write(transaction, stripping=True)
2996 2997
2997 2998 # then reset internal state in memory to forget those revisions
2998 2999 self._revisioncache = None
2999 3000 self._chaininfocache = util.lrucachedict(500)
3000 3001 self._segmentfile.clear_cache()
3001 3002 self._segmentfile_sidedata.clear_cache()
3002 3003
3003 3004 del self.index[rev:-1]
3004 3005
3005 3006 def checksize(self):
3006 3007 """Check size of index and data files
3007 3008
3008 3009 return a (dd, di) tuple.
3009 3010 - dd: extra bytes for the "data" file
3010 3011 - di: extra bytes for the "index" file
3011 3012
3012 3013 A healthy revlog will return (0, 0).
3013 3014 """
3014 3015 expected = 0
3015 3016 if len(self):
3016 3017 expected = max(0, self.end(len(self) - 1))
3017 3018
3018 3019 try:
3019 3020 with self._datafp() as f:
3020 3021 f.seek(0, io.SEEK_END)
3021 3022 actual = f.tell()
3022 3023 dd = actual - expected
3023 3024 except FileNotFoundError:
3024 3025 dd = 0
3025 3026
3026 3027 try:
3027 3028 f = self.opener(self._indexfile)
3028 3029 f.seek(0, io.SEEK_END)
3029 3030 actual = f.tell()
3030 3031 f.close()
3031 3032 s = self.index.entry_size
3032 3033 i = max(0, actual // s)
3033 3034 di = actual - (i * s)
3034 3035 if self._inline:
3035 3036 databytes = 0
3036 3037 for r in self:
3037 3038 databytes += max(0, self.length(r))
3038 3039 dd = 0
3039 3040 di = actual - len(self) * s - databytes
3040 3041 except FileNotFoundError:
3041 3042 di = 0
3042 3043
3043 3044 return (dd, di)
3044 3045
3045 3046 def files(self):
3046 3047 res = [self._indexfile]
3047 3048 if self._docket_file is None:
3048 3049 if not self._inline:
3049 3050 res.append(self._datafile)
3050 3051 else:
3051 3052 res.append(self._docket_file)
3052 3053 res.extend(self._docket.old_index_filepaths(include_empty=False))
3053 3054 if self._docket.data_end:
3054 3055 res.append(self._datafile)
3055 3056 res.extend(self._docket.old_data_filepaths(include_empty=False))
3056 3057 if self._docket.sidedata_end:
3057 3058 res.append(self._sidedatafile)
3058 3059 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3059 3060 return res
3060 3061
3061 3062 def emitrevisions(
3062 3063 self,
3063 3064 nodes,
3064 3065 nodesorder=None,
3065 3066 revisiondata=False,
3066 3067 assumehaveparentrevisions=False,
3067 3068 deltamode=repository.CG_DELTAMODE_STD,
3068 3069 sidedata_helpers=None,
3069 3070 debug_info=None,
3070 3071 ):
3071 3072 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3072 3073 raise error.ProgrammingError(
3073 3074 b'unhandled value for nodesorder: %s' % nodesorder
3074 3075 )
3075 3076
3076 3077 if nodesorder is None and not self._generaldelta:
3077 3078 nodesorder = b'storage'
3078 3079
3079 3080 if (
3080 3081 not self._storedeltachains
3081 3082 and deltamode != repository.CG_DELTAMODE_PREV
3082 3083 ):
3083 3084 deltamode = repository.CG_DELTAMODE_FULL
3084 3085
3085 3086 return storageutil.emitrevisions(
3086 3087 self,
3087 3088 nodes,
3088 3089 nodesorder,
3089 3090 revlogrevisiondelta,
3090 3091 deltaparentfn=self.deltaparent,
3091 3092 candeltafn=self.candelta,
3092 3093 rawsizefn=self.rawsize,
3093 3094 revdifffn=self.revdiff,
3094 3095 flagsfn=self.flags,
3095 3096 deltamode=deltamode,
3096 3097 revisiondata=revisiondata,
3097 3098 assumehaveparentrevisions=assumehaveparentrevisions,
3098 3099 sidedata_helpers=sidedata_helpers,
3099 3100 debug_info=debug_info,
3100 3101 )
3101 3102
3102 3103 DELTAREUSEALWAYS = b'always'
3103 3104 DELTAREUSESAMEREVS = b'samerevs'
3104 3105 DELTAREUSENEVER = b'never'
3105 3106
3106 3107 DELTAREUSEFULLADD = b'fulladd'
3107 3108
3108 3109 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3109 3110
3110 3111 def clone(
3111 3112 self,
3112 3113 tr,
3113 3114 destrevlog,
3114 3115 addrevisioncb=None,
3115 3116 deltareuse=DELTAREUSESAMEREVS,
3116 3117 forcedeltabothparents=None,
3117 3118 sidedata_helpers=None,
3118 3119 ):
3119 3120 """Copy this revlog to another, possibly with format changes.
3120 3121
3121 3122 The destination revlog will contain the same revisions and nodes.
3122 3123 However, it may not be bit-for-bit identical due to e.g. delta encoding
3123 3124 differences.
3124 3125
3125 3126 The ``deltareuse`` argument control how deltas from the existing revlog
3126 3127 are preserved in the destination revlog. The argument can have the
3127 3128 following values:
3128 3129
3129 3130 DELTAREUSEALWAYS
3130 3131 Deltas will always be reused (if possible), even if the destination
3131 3132 revlog would not select the same revisions for the delta. This is the
3132 3133 fastest mode of operation.
3133 3134 DELTAREUSESAMEREVS
3134 3135 Deltas will be reused if the destination revlog would pick the same
3135 3136 revisions for the delta. This mode strikes a balance between speed
3136 3137 and optimization.
3137 3138 DELTAREUSENEVER
3138 3139 Deltas will never be reused. This is the slowest mode of execution.
3139 3140 This mode can be used to recompute deltas (e.g. if the diff/delta
3140 3141 algorithm changes).
3141 3142 DELTAREUSEFULLADD
3142 3143 Revision will be re-added as if their were new content. This is
3143 3144 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3144 3145 eg: large file detection and handling.
3145 3146
3146 3147 Delta computation can be slow, so the choice of delta reuse policy can
3147 3148 significantly affect run time.
3148 3149
3149 3150 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3150 3151 two extremes. Deltas will be reused if they are appropriate. But if the
3151 3152 delta could choose a better revision, it will do so. This means if you
3152 3153 are converting a non-generaldelta revlog to a generaldelta revlog,
3153 3154 deltas will be recomputed if the delta's parent isn't a parent of the
3154 3155 revision.
3155 3156
3156 3157 In addition to the delta policy, the ``forcedeltabothparents``
3157 3158 argument controls whether to force compute deltas against both parents
3158 3159 for merges. By default, the current default is used.
3159 3160
3160 3161 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3161 3162 `sidedata_helpers`.
3162 3163 """
3163 3164 if deltareuse not in self.DELTAREUSEALL:
3164 3165 raise ValueError(
3165 3166 _(b'value for deltareuse invalid: %s') % deltareuse
3166 3167 )
3167 3168
3168 3169 if len(destrevlog):
3169 3170 raise ValueError(_(b'destination revlog is not empty'))
3170 3171
3171 3172 if getattr(self, 'filteredrevs', None):
3172 3173 raise ValueError(_(b'source revlog has filtered revisions'))
3173 3174 if getattr(destrevlog, 'filteredrevs', None):
3174 3175 raise ValueError(_(b'destination revlog has filtered revisions'))
3175 3176
3176 3177 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3177 3178 # if possible.
3178 3179 oldlazydelta = destrevlog._lazydelta
3179 3180 oldlazydeltabase = destrevlog._lazydeltabase
3180 3181 oldamd = destrevlog._deltabothparents
3181 3182
3182 3183 try:
3183 3184 if deltareuse == self.DELTAREUSEALWAYS:
3184 3185 destrevlog._lazydeltabase = True
3185 3186 destrevlog._lazydelta = True
3186 3187 elif deltareuse == self.DELTAREUSESAMEREVS:
3187 3188 destrevlog._lazydeltabase = False
3188 3189 destrevlog._lazydelta = True
3189 3190 elif deltareuse == self.DELTAREUSENEVER:
3190 3191 destrevlog._lazydeltabase = False
3191 3192 destrevlog._lazydelta = False
3192 3193
3193 3194 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3194 3195
3195 3196 self._clone(
3196 3197 tr,
3197 3198 destrevlog,
3198 3199 addrevisioncb,
3199 3200 deltareuse,
3200 3201 forcedeltabothparents,
3201 3202 sidedata_helpers,
3202 3203 )
3203 3204
3204 3205 finally:
3205 3206 destrevlog._lazydelta = oldlazydelta
3206 3207 destrevlog._lazydeltabase = oldlazydeltabase
3207 3208 destrevlog._deltabothparents = oldamd
3208 3209
3209 3210 def _clone(
3210 3211 self,
3211 3212 tr,
3212 3213 destrevlog,
3213 3214 addrevisioncb,
3214 3215 deltareuse,
3215 3216 forcedeltabothparents,
3216 3217 sidedata_helpers,
3217 3218 ):
3218 3219 """perform the core duty of `revlog.clone` after parameter processing"""
3219 3220 write_debug = None
3220 3221 if self._debug_delta:
3221 3222 write_debug = tr._report
3222 3223 deltacomputer = deltautil.deltacomputer(
3223 3224 destrevlog,
3224 3225 write_debug=write_debug,
3225 3226 )
3226 3227 index = self.index
3227 3228 for rev in self:
3228 3229 entry = index[rev]
3229 3230
3230 3231 # Some classes override linkrev to take filtered revs into
3231 3232 # account. Use raw entry from index.
3232 3233 flags = entry[0] & 0xFFFF
3233 3234 linkrev = entry[4]
3234 3235 p1 = index[entry[5]][7]
3235 3236 p2 = index[entry[6]][7]
3236 3237 node = entry[7]
3237 3238
3238 3239 # (Possibly) reuse the delta from the revlog if allowed and
3239 3240 # the revlog chunk is a delta.
3240 3241 cachedelta = None
3241 3242 rawtext = None
3242 3243 if deltareuse == self.DELTAREUSEFULLADD:
3243 3244 text = self._revisiondata(rev)
3244 3245 sidedata = self.sidedata(rev)
3245 3246
3246 3247 if sidedata_helpers is not None:
3247 3248 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3248 3249 self, sidedata_helpers, sidedata, rev
3249 3250 )
3250 3251 flags = flags | new_flags[0] & ~new_flags[1]
3251 3252
3252 3253 destrevlog.addrevision(
3253 3254 text,
3254 3255 tr,
3255 3256 linkrev,
3256 3257 p1,
3257 3258 p2,
3258 3259 cachedelta=cachedelta,
3259 3260 node=node,
3260 3261 flags=flags,
3261 3262 deltacomputer=deltacomputer,
3262 3263 sidedata=sidedata,
3263 3264 )
3264 3265 else:
3265 3266 if destrevlog._lazydelta:
3266 3267 dp = self.deltaparent(rev)
3267 3268 if dp != nullrev:
3268 3269 cachedelta = (dp, bytes(self._chunk(rev)))
3269 3270
3270 3271 sidedata = None
3271 3272 if not cachedelta:
3272 3273 rawtext = self._revisiondata(rev)
3273 3274 sidedata = self.sidedata(rev)
3274 3275 if sidedata is None:
3275 3276 sidedata = self.sidedata(rev)
3276 3277
3277 3278 if sidedata_helpers is not None:
3278 3279 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3279 3280 self, sidedata_helpers, sidedata, rev
3280 3281 )
3281 3282 flags = flags | new_flags[0] & ~new_flags[1]
3282 3283
3283 3284 with destrevlog._writing(tr):
3284 3285 destrevlog._addrevision(
3285 3286 node,
3286 3287 rawtext,
3287 3288 tr,
3288 3289 linkrev,
3289 3290 p1,
3290 3291 p2,
3291 3292 flags,
3292 3293 cachedelta,
3293 3294 deltacomputer=deltacomputer,
3294 3295 sidedata=sidedata,
3295 3296 )
3296 3297
3297 3298 if addrevisioncb:
3298 3299 addrevisioncb(self, rev, node)
3299 3300
3300 3301 def censorrevision(self, tr, censornode, tombstone=b''):
3301 3302 if self._format_version == REVLOGV0:
3302 3303 raise error.RevlogError(
3303 3304 _(b'cannot censor with version %d revlogs')
3304 3305 % self._format_version
3305 3306 )
3306 3307 elif self._format_version == REVLOGV1:
3307 3308 rewrite.v1_censor(self, tr, censornode, tombstone)
3308 3309 else:
3309 3310 rewrite.v2_censor(self, tr, censornode, tombstone)
3310 3311
3311 3312 def verifyintegrity(self, state):
3312 3313 """Verifies the integrity of the revlog.
3313 3314
3314 3315 Yields ``revlogproblem`` instances describing problems that are
3315 3316 found.
3316 3317 """
3317 3318 dd, di = self.checksize()
3318 3319 if dd:
3319 3320 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3320 3321 if di:
3321 3322 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3322 3323
3323 3324 version = self._format_version
3324 3325
3325 3326 # The verifier tells us what version revlog we should be.
3326 3327 if version != state[b'expectedversion']:
3327 3328 yield revlogproblem(
3328 3329 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3329 3330 % (self.display_id, version, state[b'expectedversion'])
3330 3331 )
3331 3332
3332 3333 state[b'skipread'] = set()
3333 3334 state[b'safe_renamed'] = set()
3334 3335
3335 3336 for rev in self:
3336 3337 node = self.node(rev)
3337 3338
3338 3339 # Verify contents. 4 cases to care about:
3339 3340 #
3340 3341 # common: the most common case
3341 3342 # rename: with a rename
3342 3343 # meta: file content starts with b'\1\n', the metadata
3343 3344 # header defined in filelog.py, but without a rename
3344 3345 # ext: content stored externally
3345 3346 #
3346 3347 # More formally, their differences are shown below:
3347 3348 #
3348 3349 # | common | rename | meta | ext
3349 3350 # -------------------------------------------------------
3350 3351 # flags() | 0 | 0 | 0 | not 0
3351 3352 # renamed() | False | True | False | ?
3352 3353 # rawtext[0:2]=='\1\n'| False | True | True | ?
3353 3354 #
3354 3355 # "rawtext" means the raw text stored in revlog data, which
3355 3356 # could be retrieved by "rawdata(rev)". "text"
3356 3357 # mentioned below is "revision(rev)".
3357 3358 #
3358 3359 # There are 3 different lengths stored physically:
3359 3360 # 1. L1: rawsize, stored in revlog index
3360 3361 # 2. L2: len(rawtext), stored in revlog data
3361 3362 # 3. L3: len(text), stored in revlog data if flags==0, or
3362 3363 # possibly somewhere else if flags!=0
3363 3364 #
3364 3365 # L1 should be equal to L2. L3 could be different from them.
3365 3366 # "text" may or may not affect commit hash depending on flag
3366 3367 # processors (see flagutil.addflagprocessor).
3367 3368 #
3368 3369 # | common | rename | meta | ext
3369 3370 # -------------------------------------------------
3370 3371 # rawsize() | L1 | L1 | L1 | L1
3371 3372 # size() | L1 | L2-LM | L1(*) | L1 (?)
3372 3373 # len(rawtext) | L2 | L2 | L2 | L2
3373 3374 # len(text) | L2 | L2 | L2 | L3
3374 3375 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3375 3376 #
3376 3377 # LM: length of metadata, depending on rawtext
3377 3378 # (*): not ideal, see comment in filelog.size
3378 3379 # (?): could be "- len(meta)" if the resolved content has
3379 3380 # rename metadata
3380 3381 #
3381 3382 # Checks needed to be done:
3382 3383 # 1. length check: L1 == L2, in all cases.
3383 3384 # 2. hash check: depending on flag processor, we may need to
3384 3385 # use either "text" (external), or "rawtext" (in revlog).
3385 3386
3386 3387 try:
3387 3388 skipflags = state.get(b'skipflags', 0)
3388 3389 if skipflags:
3389 3390 skipflags &= self.flags(rev)
3390 3391
3391 3392 _verify_revision(self, skipflags, state, node)
3392 3393
3393 3394 l1 = self.rawsize(rev)
3394 3395 l2 = len(self.rawdata(node))
3395 3396
3396 3397 if l1 != l2:
3397 3398 yield revlogproblem(
3398 3399 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3399 3400 node=node,
3400 3401 )
3401 3402
3402 3403 except error.CensoredNodeError:
3403 3404 if state[b'erroroncensored']:
3404 3405 yield revlogproblem(
3405 3406 error=_(b'censored file data'), node=node
3406 3407 )
3407 3408 state[b'skipread'].add(node)
3408 3409 except Exception as e:
3409 3410 yield revlogproblem(
3410 3411 error=_(b'unpacking %s: %s')
3411 3412 % (short(node), stringutil.forcebytestr(e)),
3412 3413 node=node,
3413 3414 )
3414 3415 state[b'skipread'].add(node)
3415 3416
3416 3417 def storageinfo(
3417 3418 self,
3418 3419 exclusivefiles=False,
3419 3420 sharedfiles=False,
3420 3421 revisionscount=False,
3421 3422 trackedsize=False,
3422 3423 storedsize=False,
3423 3424 ):
3424 3425 d = {}
3425 3426
3426 3427 if exclusivefiles:
3427 3428 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3428 3429 if not self._inline:
3429 3430 d[b'exclusivefiles'].append((self.opener, self._datafile))
3430 3431
3431 3432 if sharedfiles:
3432 3433 d[b'sharedfiles'] = []
3433 3434
3434 3435 if revisionscount:
3435 3436 d[b'revisionscount'] = len(self)
3436 3437
3437 3438 if trackedsize:
3438 3439 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3439 3440
3440 3441 if storedsize:
3441 3442 d[b'storedsize'] = sum(
3442 3443 self.opener.stat(path).st_size for path in self.files()
3443 3444 )
3444 3445
3445 3446 return d
3446 3447
3447 3448 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3448 3449 if not self.hassidedata:
3449 3450 return
3450 3451 # revlog formats with sidedata support does not support inline
3451 3452 assert not self._inline
3452 3453 if not helpers[1] and not helpers[2]:
3453 3454 # Nothing to generate or remove
3454 3455 return
3455 3456
3456 3457 new_entries = []
3457 3458 # append the new sidedata
3458 3459 with self._writing(transaction):
3459 3460 ifh, dfh, sdfh = self._writinghandles
3460 3461 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3461 3462
3462 3463 current_offset = sdfh.tell()
3463 3464 for rev in range(startrev, endrev + 1):
3464 3465 entry = self.index[rev]
3465 3466 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3466 3467 store=self,
3467 3468 sidedata_helpers=helpers,
3468 3469 sidedata={},
3469 3470 rev=rev,
3470 3471 )
3471 3472
3472 3473 serialized_sidedata = sidedatautil.serialize_sidedata(
3473 3474 new_sidedata
3474 3475 )
3475 3476
3476 3477 sidedata_compression_mode = COMP_MODE_INLINE
3477 3478 if serialized_sidedata and self.hassidedata:
3478 3479 sidedata_compression_mode = COMP_MODE_PLAIN
3479 3480 h, comp_sidedata = self.compress(serialized_sidedata)
3480 3481 if (
3481 3482 h != b'u'
3482 3483 and comp_sidedata[0] != b'\0'
3483 3484 and len(comp_sidedata) < len(serialized_sidedata)
3484 3485 ):
3485 3486 assert not h
3486 3487 if (
3487 3488 comp_sidedata[0]
3488 3489 == self._docket.default_compression_header
3489 3490 ):
3490 3491 sidedata_compression_mode = COMP_MODE_DEFAULT
3491 3492 serialized_sidedata = comp_sidedata
3492 3493 else:
3493 3494 sidedata_compression_mode = COMP_MODE_INLINE
3494 3495 serialized_sidedata = comp_sidedata
3495 3496 if entry[8] != 0 or entry[9] != 0:
3496 3497 # rewriting entries that already have sidedata is not
3497 3498 # supported yet, because it introduces garbage data in the
3498 3499 # revlog.
3499 3500 msg = b"rewriting existing sidedata is not supported yet"
3500 3501 raise error.Abort(msg)
3501 3502
3502 3503 # Apply (potential) flags to add and to remove after running
3503 3504 # the sidedata helpers
3504 3505 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3505 3506 entry_update = (
3506 3507 current_offset,
3507 3508 len(serialized_sidedata),
3508 3509 new_offset_flags,
3509 3510 sidedata_compression_mode,
3510 3511 )
3511 3512
3512 3513 # the sidedata computation might have move the file cursors around
3513 3514 sdfh.seek(current_offset, os.SEEK_SET)
3514 3515 sdfh.write(serialized_sidedata)
3515 3516 new_entries.append(entry_update)
3516 3517 current_offset += len(serialized_sidedata)
3517 3518 self._docket.sidedata_end = sdfh.tell()
3518 3519
3519 3520 # rewrite the new index entries
3520 3521 ifh.seek(startrev * self.index.entry_size)
3521 3522 for i, e in enumerate(new_entries):
3522 3523 rev = startrev + i
3523 3524 self.index.replace_sidedata_info(rev, *e)
3524 3525 packed = self.index.entry_binary(rev)
3525 3526 if rev == 0 and self._docket is None:
3526 3527 header = self._format_flags | self._format_version
3527 3528 header = self.index.pack_header(header)
3528 3529 packed = header + packed
3529 3530 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now