##// END OF EJS Templates
revlog: move the computation of the split_index path in a property...
marmoute -
r51554:978ffa09 stable
parent child Browse files
Show More
@@ -1,3410 +1,3418 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 CHANGELOGV2,
39 39 COMP_MODE_DEFAULT,
40 40 COMP_MODE_INLINE,
41 41 COMP_MODE_PLAIN,
42 42 DELTA_BASE_REUSE_NO,
43 43 DELTA_BASE_REUSE_TRY,
44 44 ENTRY_RANK,
45 45 FEATURES_BY_VERSION,
46 46 FLAG_GENERALDELTA,
47 47 FLAG_INLINE_DATA,
48 48 INDEX_HEADER,
49 49 KIND_CHANGELOG,
50 50 KIND_FILELOG,
51 51 RANK_UNKNOWN,
52 52 REVLOGV0,
53 53 REVLOGV1,
54 54 REVLOGV1_FLAGS,
55 55 REVLOGV2,
56 56 REVLOGV2_FLAGS,
57 57 REVLOG_DEFAULT_FLAGS,
58 58 REVLOG_DEFAULT_FORMAT,
59 59 REVLOG_DEFAULT_VERSION,
60 60 SUPPORTED_FLAGS,
61 61 )
62 62 from .revlogutils.flagutil import (
63 63 REVIDX_DEFAULT_FLAGS,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 REVIDX_FLAGS_ORDER,
67 67 REVIDX_HASCOPIESINFO,
68 68 REVIDX_ISCENSORED,
69 69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 70 )
71 71 from .thirdparty import attr
72 72 from . import (
73 73 ancestor,
74 74 dagop,
75 75 error,
76 76 mdiff,
77 77 policy,
78 78 pycompat,
79 79 revlogutils,
80 80 templatefilters,
81 81 util,
82 82 )
83 83 from .interfaces import (
84 84 repository,
85 85 util as interfaceutil,
86 86 )
87 87 from .revlogutils import (
88 88 deltas as deltautil,
89 89 docket as docketutil,
90 90 flagutil,
91 91 nodemap as nodemaputil,
92 92 randomaccessfile,
93 93 revlogv0,
94 94 rewrite,
95 95 sidedata as sidedatautil,
96 96 )
97 97 from .utils import (
98 98 storageutil,
99 99 stringutil,
100 100 )
101 101
102 102 # blanked usage of all the name to prevent pyflakes constraints
103 103 # We need these name available in the module for extensions.
104 104
105 105 REVLOGV0
106 106 REVLOGV1
107 107 REVLOGV2
108 108 CHANGELOGV2
109 109 FLAG_INLINE_DATA
110 110 FLAG_GENERALDELTA
111 111 REVLOG_DEFAULT_FLAGS
112 112 REVLOG_DEFAULT_FORMAT
113 113 REVLOG_DEFAULT_VERSION
114 114 REVLOGV1_FLAGS
115 115 REVLOGV2_FLAGS
116 116 REVIDX_ISCENSORED
117 117 REVIDX_ELLIPSIS
118 118 REVIDX_HASCOPIESINFO
119 119 REVIDX_EXTSTORED
120 120 REVIDX_DEFAULT_FLAGS
121 121 REVIDX_FLAGS_ORDER
122 122 REVIDX_RAWTEXT_CHANGING_FLAGS
123 123
124 124 parsers = policy.importmod('parsers')
125 125 rustancestor = policy.importrust('ancestor')
126 126 rustdagop = policy.importrust('dagop')
127 127 rustrevlog = policy.importrust('revlog')
128 128
129 129 # Aliased for performance.
130 130 _zlibdecompress = zlib.decompress
131 131
132 132 # max size of inline data embedded into a revlog
133 133 _maxinline = 131072
134 134
135 135 # Flag processors for REVIDX_ELLIPSIS.
136 136 def ellipsisreadprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsiswriteprocessor(rl, text):
141 141 return text, False
142 142
143 143
144 144 def ellipsisrawprocessor(rl, text):
145 145 return False
146 146
147 147
148 148 ellipsisprocessor = (
149 149 ellipsisreadprocessor,
150 150 ellipsiswriteprocessor,
151 151 ellipsisrawprocessor,
152 152 )
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @interfaceutil.implementer(repository.irevisiondelta)
176 176 @attr.s(slots=True)
177 177 class revlogrevisiondelta:
178 178 node = attr.ib()
179 179 p1node = attr.ib()
180 180 p2node = attr.ib()
181 181 basenode = attr.ib()
182 182 flags = attr.ib()
183 183 baserevisionsize = attr.ib()
184 184 revision = attr.ib()
185 185 delta = attr.ib()
186 186 sidedata = attr.ib()
187 187 protocol_flags = attr.ib()
188 188 linknode = attr.ib(default=None)
189 189
190 190
191 191 @interfaceutil.implementer(repository.iverifyproblem)
192 192 @attr.s(frozen=True)
193 193 class revlogproblem:
194 194 warning = attr.ib(default=None)
195 195 error = attr.ib(default=None)
196 196 node = attr.ib(default=None)
197 197
198 198
199 199 def parse_index_v1(data, inline):
200 200 # call the C implementation to parse the index data
201 201 index, cache = parsers.parse_index2(data, inline)
202 202 return index, cache
203 203
204 204
205 205 def parse_index_v2(data, inline):
206 206 # call the C implementation to parse the index data
207 207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 208 return index, cache
209 209
210 210
211 211 def parse_index_cl_v2(data, inline):
212 212 # call the C implementation to parse the index data
213 213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 214 return index, cache
215 215
216 216
217 217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218 218
219 219 def parse_index_v1_nodemap(data, inline):
220 220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 221 return index, cache
222 222
223 223
224 224 else:
225 225 parse_index_v1_nodemap = None
226 226
227 227
228 228 def parse_index_v1_mixed(data, inline):
229 229 index, cache = parse_index_v1(data, inline)
230 230 return rustrevlog.MixedIndex(index), cache
231 231
232 232
233 233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 234 # signed integer)
235 235 _maxentrysize = 0x7FFFFFFF
236 236
237 237 FILE_TOO_SHORT_MSG = _(
238 238 b'cannot read from revlog %s;'
239 239 b' expected %d bytes from offset %d, data size is %d'
240 240 )
241 241
242 242 hexdigits = b'0123456789abcdefABCDEF'
243 243
244 244
245 245 class revlog:
246 246 """
247 247 the underlying revision storage object
248 248
249 249 A revlog consists of two parts, an index and the revision data.
250 250
251 251 The index is a file with a fixed record size containing
252 252 information on each revision, including its nodeid (hash), the
253 253 nodeids of its parents, the position and offset of its data within
254 254 the data file, and the revision it's based on. Finally, each entry
255 255 contains a linkrev entry that can serve as a pointer to external
256 256 data.
257 257
258 258 The revision data itself is a linear collection of data chunks.
259 259 Each chunk represents a revision and is usually represented as a
260 260 delta against the previous chunk. To bound lookup time, runs of
261 261 deltas are limited to about 2 times the length of the original
262 262 version data. This makes retrieval of a version proportional to
263 263 its size, or O(1) relative to the number of revisions.
264 264
265 265 Both pieces of the revlog are written to in an append-only
266 266 fashion, which means we never need to rewrite a file to insert or
267 267 remove data, and can use some simple techniques to avoid the need
268 268 for locking while reading.
269 269
270 270 If checkambig, indexfile is opened with checkambig=True at
271 271 writing, to avoid file stat ambiguity.
272 272
273 273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 274 index will be mmapped rather than read if it is larger than the
275 275 configured threshold.
276 276
277 277 If censorable is True, the revlog can have censored revisions.
278 278
279 279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 280 compression for the data content.
281 281
282 282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 283 file handle, a filename, and an expected position. It should check whether
284 284 the current position in the file handle is valid, and log/warn/fail (by
285 285 raising).
286 286
287 287 See mercurial/revlogutils/contants.py for details about the content of an
288 288 index entry.
289 289 """
290 290
291 291 _flagserrorclass = error.RevlogError
292 292
293 293 def __init__(
294 294 self,
295 295 opener,
296 296 target,
297 297 radix,
298 298 postfix=None, # only exist for `tmpcensored` now
299 299 checkambig=False,
300 300 mmaplargeindex=False,
301 301 censorable=False,
302 302 upperboundcomp=None,
303 303 persistentnodemap=False,
304 304 concurrencychecker=None,
305 305 trypending=False,
306 306 try_split=False,
307 307 canonical_parent_order=True,
308 308 ):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314
315 315 `target`: a (KIND, ID) tuple that identify the content stored in
316 316 this revlog. It help the rest of the code to understand what the revlog
317 317 is about without having to resort to heuristic and index filename
318 318 analysis. Note: that this must be reliably be set by normal code, but
319 319 that test, debug, or performance measurement code might not set this to
320 320 accurate value.
321 321 """
322 322 self.upperboundcomp = upperboundcomp
323 323
324 324 self.radix = radix
325 325
326 326 self._docket_file = None
327 327 self._indexfile = None
328 328 self._datafile = None
329 329 self._sidedatafile = None
330 330 self._nodemap_file = None
331 331 self.postfix = postfix
332 332 self._trypending = trypending
333 333 self._try_split = try_split
334 334 self.opener = opener
335 335 if persistentnodemap:
336 336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337 337
338 338 assert target[0] in ALL_KINDS
339 339 assert len(target) == 2
340 340 self.target = target
341 341 # When True, indexfile is opened with checkambig=True at writing, to
342 342 # avoid file stat ambiguity.
343 343 self._checkambig = checkambig
344 344 self._mmaplargeindex = mmaplargeindex
345 345 self._censorable = censorable
346 346 # 3-tuple of (node, rev, text) for a raw revision.
347 347 self._revisioncache = None
348 348 # Maps rev to chain base rev.
349 349 self._chainbasecache = util.lrucachedict(100)
350 350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 351 self._chunkcache = (0, b'')
352 352 # How much data to read and cache into the raw revlog data cache.
353 353 self._chunkcachesize = 65536
354 354 self._maxchainlen = None
355 355 self._deltabothparents = True
356 356 self._candidate_group_chunk_size = 0
357 357 self._debug_delta = False
358 358 self.index = None
359 359 self._docket = None
360 360 self._nodemap_docket = None
361 361 # Mapping of partial identifiers to full nodes.
362 362 self._pcache = {}
363 363 # Mapping of revision integer to full node.
364 364 self._compengine = b'zlib'
365 365 self._compengineopts = {}
366 366 self._maxdeltachainspan = -1
367 367 self._withsparseread = False
368 368 self._sparserevlog = False
369 369 self.hassidedata = False
370 370 self._srdensitythreshold = 0.50
371 371 self._srmingapsize = 262144
372 372
373 373 # other optionnals features
374 374
375 375 # might remove rank configuration once the computation has no impact
376 376 self._compute_rank = False
377 377
378 378 # Make copy of flag processors so each revlog instance can support
379 379 # custom flags.
380 380 self._flagprocessors = dict(flagutil.flagprocessors)
381 381
382 382 # 3-tuple of file handles being used for active writing.
383 383 self._writinghandles = None
384 384 # prevent nesting of addgroup
385 385 self._adding_group = None
386 386
387 387 self._loadindex()
388 388
389 389 self._concurrencychecker = concurrencychecker
390 390
391 391 # parent order is supposed to be semantically irrelevant, so we
392 392 # normally resort parents to ensure that the first parent is non-null,
393 393 # if there is a non-null parent at all.
394 394 # filelog abuses the parent order as flag to mark some instances of
395 395 # meta-encoded files, so allow it to disable this behavior.
396 396 self.canonical_parent_order = canonical_parent_order
397 397
398 398 def _init_opts(self):
399 399 """process options (from above/config) to setup associated default revlog mode
400 400
401 401 These values might be affected when actually reading on disk information.
402 402
403 403 The relevant values are returned for use in _loadindex().
404 404
405 405 * newversionflags:
406 406 version header to use if we need to create a new revlog
407 407
408 408 * mmapindexthreshold:
409 409 minimal index size for start to use mmap
410 410
411 411 * force_nodemap:
412 412 force the usage of a "development" version of the nodemap code
413 413 """
414 414 mmapindexthreshold = None
415 415 opts = self.opener.options
416 416
417 417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 418 new_header = CHANGELOGV2
419 419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 420 elif b'revlogv2' in opts:
421 421 new_header = REVLOGV2
422 422 elif b'revlogv1' in opts:
423 423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 424 if b'generaldelta' in opts:
425 425 new_header |= FLAG_GENERALDELTA
426 426 elif b'revlogv0' in self.opener.options:
427 427 new_header = REVLOGV0
428 428 else:
429 429 new_header = REVLOG_DEFAULT_VERSION
430 430
431 431 if b'chunkcachesize' in opts:
432 432 self._chunkcachesize = opts[b'chunkcachesize']
433 433 if b'maxchainlen' in opts:
434 434 self._maxchainlen = opts[b'maxchainlen']
435 435 if b'deltabothparents' in opts:
436 436 self._deltabothparents = opts[b'deltabothparents']
437 437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 438 if dps_cgds:
439 439 self._candidate_group_chunk_size = dps_cgds
440 440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 441 self._lazydeltabase = False
442 442 if self._lazydelta:
443 443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 444 if b'debug-delta' in opts:
445 445 self._debug_delta = opts[b'debug-delta']
446 446 if b'compengine' in opts:
447 447 self._compengine = opts[b'compengine']
448 448 if b'zlib.level' in opts:
449 449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 450 if b'zstd.level' in opts:
451 451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 452 if b'maxdeltachainspan' in opts:
453 453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 458 # sparse-revlog forces sparse-read
459 459 self._withsparseread = self._sparserevlog or withsparseread
460 460 if b'sparse-read-density-threshold' in opts:
461 461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 462 if b'sparse-read-min-gap-size' in opts:
463 463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 464 if opts.get(b'enableellipsis'):
465 465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466 466
467 467 # revlog v0 doesn't have flag processors
468 468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470 470
471 471 if self._chunkcachesize <= 0:
472 472 raise error.RevlogError(
473 473 _(b'revlog chunk cache size %r is not greater than 0')
474 474 % self._chunkcachesize
475 475 )
476 476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 477 raise error.RevlogError(
478 478 _(b'revlog chunk cache size %r is not a power of 2')
479 479 % self._chunkcachesize
480 480 )
481 481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 482 return new_header, mmapindexthreshold, force_nodemap
483 483
484 484 def _get_data(self, filepath, mmap_threshold, size=None):
485 485 """return a file content with or without mmap
486 486
487 487 If the file is missing return the empty string"""
488 488 try:
489 489 with self.opener(filepath) as fp:
490 490 if mmap_threshold is not None:
491 491 file_size = self.opener.fstat(fp).st_size
492 492 if file_size >= mmap_threshold:
493 493 if size is not None:
494 494 # avoid potentiel mmap crash
495 495 size = min(file_size, size)
496 496 # TODO: should .close() to release resources without
497 497 # relying on Python GC
498 498 if size is None:
499 499 return util.buffer(util.mmapread(fp))
500 500 else:
501 501 return util.buffer(util.mmapread(fp, size))
502 502 if size is None:
503 503 return fp.read()
504 504 else:
505 505 return fp.read(size)
506 506 except FileNotFoundError:
507 507 return b''
508 508
509 509 def _loadindex(self, docket=None):
510 510
511 511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
512 512
513 513 if self.postfix is not None:
514 514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
515 515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
516 516 entry_point = b'%s.i.a' % self.radix
517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
518 entry_point = b'%s.i.s' % self.radix
517 elif self._try_split and self.opener.exists(self._split_index_file):
518 entry_point = self._split_index_file
519 519 else:
520 520 entry_point = b'%s.i' % self.radix
521 521
522 522 if docket is not None:
523 523 self._docket = docket
524 524 self._docket_file = entry_point
525 525 else:
526 526 self._initempty = True
527 527 entry_data = self._get_data(entry_point, mmapindexthreshold)
528 528 if len(entry_data) > 0:
529 529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
530 530 self._initempty = False
531 531 else:
532 532 header = new_header
533 533
534 534 self._format_flags = header & ~0xFFFF
535 535 self._format_version = header & 0xFFFF
536 536
537 537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
538 538 if supported_flags is None:
539 539 msg = _(b'unknown version (%d) in revlog %s')
540 540 msg %= (self._format_version, self.display_id)
541 541 raise error.RevlogError(msg)
542 542 elif self._format_flags & ~supported_flags:
543 543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
544 544 display_flag = self._format_flags >> 16
545 545 msg %= (display_flag, self._format_version, self.display_id)
546 546 raise error.RevlogError(msg)
547 547
548 548 features = FEATURES_BY_VERSION[self._format_version]
549 549 self._inline = features[b'inline'](self._format_flags)
550 550 self._generaldelta = features[b'generaldelta'](self._format_flags)
551 551 self.hassidedata = features[b'sidedata']
552 552
553 553 if not features[b'docket']:
554 554 self._indexfile = entry_point
555 555 index_data = entry_data
556 556 else:
557 557 self._docket_file = entry_point
558 558 if self._initempty:
559 559 self._docket = docketutil.default_docket(self, header)
560 560 else:
561 561 self._docket = docketutil.parse_docket(
562 562 self, entry_data, use_pending=self._trypending
563 563 )
564 564
565 565 if self._docket is not None:
566 566 self._indexfile = self._docket.index_filepath()
567 567 index_data = b''
568 568 index_size = self._docket.index_end
569 569 if index_size > 0:
570 570 index_data = self._get_data(
571 571 self._indexfile, mmapindexthreshold, size=index_size
572 572 )
573 573 if len(index_data) < index_size:
574 574 msg = _(b'too few index data for %s: got %d, expected %d')
575 575 msg %= (self.display_id, len(index_data), index_size)
576 576 raise error.RevlogError(msg)
577 577
578 578 self._inline = False
579 579 # generaldelta implied by version 2 revlogs.
580 580 self._generaldelta = True
581 581 # the logic for persistent nodemap will be dealt with within the
582 582 # main docket, so disable it for now.
583 583 self._nodemap_file = None
584 584
585 585 if self._docket is not None:
586 586 self._datafile = self._docket.data_filepath()
587 587 self._sidedatafile = self._docket.sidedata_filepath()
588 588 elif self.postfix is None:
589 589 self._datafile = b'%s.d' % self.radix
590 590 else:
591 591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
592 592
593 593 self.nodeconstants = sha1nodeconstants
594 594 self.nullid = self.nodeconstants.nullid
595 595
596 596 # sparse-revlog can't be on without general-delta (issue6056)
597 597 if not self._generaldelta:
598 598 self._sparserevlog = False
599 599
600 600 self._storedeltachains = True
601 601
602 602 devel_nodemap = (
603 603 self._nodemap_file
604 604 and force_nodemap
605 605 and parse_index_v1_nodemap is not None
606 606 )
607 607
608 608 use_rust_index = False
609 609 if rustrevlog is not None:
610 610 if self._nodemap_file is not None:
611 611 use_rust_index = True
612 612 else:
613 613 use_rust_index = self.opener.options.get(b'rust.index')
614 614
615 615 self._parse_index = parse_index_v1
616 616 if self._format_version == REVLOGV0:
617 617 self._parse_index = revlogv0.parse_index_v0
618 618 elif self._format_version == REVLOGV2:
619 619 self._parse_index = parse_index_v2
620 620 elif self._format_version == CHANGELOGV2:
621 621 self._parse_index = parse_index_cl_v2
622 622 elif devel_nodemap:
623 623 self._parse_index = parse_index_v1_nodemap
624 624 elif use_rust_index:
625 625 self._parse_index = parse_index_v1_mixed
626 626 try:
627 627 d = self._parse_index(index_data, self._inline)
628 628 index, chunkcache = d
629 629 use_nodemap = (
630 630 not self._inline
631 631 and self._nodemap_file is not None
632 632 and util.safehasattr(index, 'update_nodemap_data')
633 633 )
634 634 if use_nodemap:
635 635 nodemap_data = nodemaputil.persisted_data(self)
636 636 if nodemap_data is not None:
637 637 docket = nodemap_data[0]
638 638 if (
639 639 len(d[0]) > docket.tip_rev
640 640 and d[0][docket.tip_rev][7] == docket.tip_node
641 641 ):
642 642 # no changelog tampering
643 643 self._nodemap_docket = docket
644 644 index.update_nodemap_data(*nodemap_data)
645 645 except (ValueError, IndexError):
646 646 raise error.RevlogError(
647 647 _(b"index %s is corrupted") % self.display_id
648 648 )
649 649 self.index = index
650 650 self._segmentfile = randomaccessfile.randomaccessfile(
651 651 self.opener,
652 652 (self._indexfile if self._inline else self._datafile),
653 653 self._chunkcachesize,
654 654 chunkcache,
655 655 )
656 656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
657 657 self.opener,
658 658 self._sidedatafile,
659 659 self._chunkcachesize,
660 660 )
661 661 # revnum -> (chain-length, sum-delta-length)
662 662 self._chaininfocache = util.lrucachedict(500)
663 663 # revlog header -> revlog compressor
664 664 self._decompressors = {}
665 665
666 666 @util.propertycache
667 667 def revlog_kind(self):
668 668 return self.target[0]
669 669
670 670 @util.propertycache
671 671 def display_id(self):
672 672 """The public facing "ID" of the revlog that we use in message"""
673 673 if self.revlog_kind == KIND_FILELOG:
674 674 # Reference the file without the "data/" prefix, so it is familiar
675 675 # to the user.
676 676 return self.target[1]
677 677 else:
678 678 return self.radix
679 679
680 680 def _get_decompressor(self, t):
681 681 try:
682 682 compressor = self._decompressors[t]
683 683 except KeyError:
684 684 try:
685 685 engine = util.compengines.forrevlogheader(t)
686 686 compressor = engine.revlogcompressor(self._compengineopts)
687 687 self._decompressors[t] = compressor
688 688 except KeyError:
689 689 raise error.RevlogError(
690 690 _(b'unknown compression type %s') % binascii.hexlify(t)
691 691 )
692 692 return compressor
693 693
694 694 @util.propertycache
695 695 def _compressor(self):
696 696 engine = util.compengines[self._compengine]
697 697 return engine.revlogcompressor(self._compengineopts)
698 698
699 699 @util.propertycache
700 700 def _decompressor(self):
701 701 """the default decompressor"""
702 702 if self._docket is None:
703 703 return None
704 704 t = self._docket.default_compression_header
705 705 c = self._get_decompressor(t)
706 706 return c.decompress
707 707
708 708 def _indexfp(self):
709 709 """file object for the revlog's index file"""
710 710 return self.opener(self._indexfile, mode=b"r")
711 711
712 712 def __index_write_fp(self):
713 713 # You should not use this directly and use `_writing` instead
714 714 try:
715 715 f = self.opener(
716 716 self._indexfile, mode=b"r+", checkambig=self._checkambig
717 717 )
718 718 if self._docket is None:
719 719 f.seek(0, os.SEEK_END)
720 720 else:
721 721 f.seek(self._docket.index_end, os.SEEK_SET)
722 722 return f
723 723 except FileNotFoundError:
724 724 return self.opener(
725 725 self._indexfile, mode=b"w+", checkambig=self._checkambig
726 726 )
727 727
728 728 def __index_new_fp(self):
729 729 # You should not use this unless you are upgrading from inline revlog
730 730 return self.opener(
731 731 self._indexfile,
732 732 mode=b"w",
733 733 checkambig=self._checkambig,
734 734 atomictemp=True,
735 735 )
736 736
737 737 def _datafp(self, mode=b'r'):
738 738 """file object for the revlog's data file"""
739 739 return self.opener(self._datafile, mode=mode)
740 740
741 741 @contextlib.contextmanager
742 742 def _sidedatareadfp(self):
743 743 """file object suitable to read sidedata"""
744 744 if self._writinghandles:
745 745 yield self._writinghandles[2]
746 746 else:
747 747 with self.opener(self._sidedatafile) as fp:
748 748 yield fp
749 749
750 750 def tiprev(self):
751 751 return len(self.index) - 1
752 752
753 753 def tip(self):
754 754 return self.node(self.tiprev())
755 755
756 756 def __contains__(self, rev):
757 757 return 0 <= rev < len(self)
758 758
759 759 def __len__(self):
760 760 return len(self.index)
761 761
762 762 def __iter__(self):
763 763 return iter(range(len(self)))
764 764
765 765 def revs(self, start=0, stop=None):
766 766 """iterate over all rev in this revlog (from start to stop)"""
767 767 return storageutil.iterrevs(len(self), start=start, stop=stop)
768 768
769 769 def hasnode(self, node):
770 770 try:
771 771 self.rev(node)
772 772 return True
773 773 except KeyError:
774 774 return False
775 775
776 776 def candelta(self, baserev, rev):
777 777 """whether two revisions (baserev, rev) can be delta-ed or not"""
778 778 # Disable delta if either rev requires a content-changing flag
779 779 # processor (ex. LFS). This is because such flag processor can alter
780 780 # the rawtext content that the delta will be based on, and two clients
781 781 # could have a same revlog node with different flags (i.e. different
782 782 # rawtext contents) and the delta could be incompatible.
783 783 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
784 784 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
785 785 ):
786 786 return False
787 787 return True
788 788
789 789 def update_caches(self, transaction):
790 790 if self._nodemap_file is not None:
791 791 if transaction is None:
792 792 nodemaputil.update_persistent_nodemap(self)
793 793 else:
794 794 nodemaputil.setup_persistent_nodemap(transaction, self)
795 795
796 796 def clearcaches(self):
797 797 self._revisioncache = None
798 798 self._chainbasecache.clear()
799 799 self._segmentfile.clear_cache()
800 800 self._segmentfile_sidedata.clear_cache()
801 801 self._pcache = {}
802 802 self._nodemap_docket = None
803 803 self.index.clearcaches()
804 804 # The python code is the one responsible for validating the docket, we
805 805 # end up having to refresh it here.
806 806 use_nodemap = (
807 807 not self._inline
808 808 and self._nodemap_file is not None
809 809 and util.safehasattr(self.index, 'update_nodemap_data')
810 810 )
811 811 if use_nodemap:
812 812 nodemap_data = nodemaputil.persisted_data(self)
813 813 if nodemap_data is not None:
814 814 self._nodemap_docket = nodemap_data[0]
815 815 self.index.update_nodemap_data(*nodemap_data)
816 816
817 817 def rev(self, node):
818 818 try:
819 819 return self.index.rev(node)
820 820 except TypeError:
821 821 raise
822 822 except error.RevlogError:
823 823 # parsers.c radix tree lookup failed
824 824 if (
825 825 node == self.nodeconstants.wdirid
826 826 or node in self.nodeconstants.wdirfilenodeids
827 827 ):
828 828 raise error.WdirUnsupported
829 829 raise error.LookupError(node, self.display_id, _(b'no node'))
830 830
831 831 # Accessors for index entries.
832 832
833 833 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
834 834 # are flags.
835 835 def start(self, rev):
836 836 return int(self.index[rev][0] >> 16)
837 837
838 838 def sidedata_cut_off(self, rev):
839 839 sd_cut_off = self.index[rev][8]
840 840 if sd_cut_off != 0:
841 841 return sd_cut_off
842 842 # This is some annoying dance, because entries without sidedata
843 843 # currently use 0 as their ofsset. (instead of previous-offset +
844 844 # previous-size)
845 845 #
846 846 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
847 847 # In the meantime, we need this.
848 848 while 0 <= rev:
849 849 e = self.index[rev]
850 850 if e[9] != 0:
851 851 return e[8] + e[9]
852 852 rev -= 1
853 853 return 0
854 854
855 855 def flags(self, rev):
856 856 return self.index[rev][0] & 0xFFFF
857 857
858 858 def length(self, rev):
859 859 return self.index[rev][1]
860 860
861 861 def sidedata_length(self, rev):
862 862 if not self.hassidedata:
863 863 return 0
864 864 return self.index[rev][9]
865 865
866 866 def rawsize(self, rev):
867 867 """return the length of the uncompressed text for a given revision"""
868 868 l = self.index[rev][2]
869 869 if l >= 0:
870 870 return l
871 871
872 872 t = self.rawdata(rev)
873 873 return len(t)
874 874
875 875 def size(self, rev):
876 876 """length of non-raw text (processed by a "read" flag processor)"""
877 877 # fast path: if no "read" flag processor could change the content,
878 878 # size is rawsize. note: ELLIPSIS is known to not change the content.
879 879 flags = self.flags(rev)
880 880 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
881 881 return self.rawsize(rev)
882 882
883 883 return len(self.revision(rev))
884 884
885 885 def fast_rank(self, rev):
886 886 """Return the rank of a revision if already known, or None otherwise.
887 887
888 888 The rank of a revision is the size of the sub-graph it defines as a
889 889 head. Equivalently, the rank of a revision `r` is the size of the set
890 890 `ancestors(r)`, `r` included.
891 891
892 892 This method returns the rank retrieved from the revlog in constant
893 893 time. It makes no attempt at computing unknown values for versions of
894 894 the revlog which do not persist the rank.
895 895 """
896 896 rank = self.index[rev][ENTRY_RANK]
897 897 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
898 898 return None
899 899 if rev == nullrev:
900 900 return 0 # convention
901 901 return rank
902 902
903 903 def chainbase(self, rev):
904 904 base = self._chainbasecache.get(rev)
905 905 if base is not None:
906 906 return base
907 907
908 908 index = self.index
909 909 iterrev = rev
910 910 base = index[iterrev][3]
911 911 while base != iterrev:
912 912 iterrev = base
913 913 base = index[iterrev][3]
914 914
915 915 self._chainbasecache[rev] = base
916 916 return base
917 917
918 918 def linkrev(self, rev):
919 919 return self.index[rev][4]
920 920
921 921 def parentrevs(self, rev):
922 922 try:
923 923 entry = self.index[rev]
924 924 except IndexError:
925 925 if rev == wdirrev:
926 926 raise error.WdirUnsupported
927 927 raise
928 928
929 929 if self.canonical_parent_order and entry[5] == nullrev:
930 930 return entry[6], entry[5]
931 931 else:
932 932 return entry[5], entry[6]
933 933
934 934 # fast parentrevs(rev) where rev isn't filtered
935 935 _uncheckedparentrevs = parentrevs
936 936
937 937 def node(self, rev):
938 938 try:
939 939 return self.index[rev][7]
940 940 except IndexError:
941 941 if rev == wdirrev:
942 942 raise error.WdirUnsupported
943 943 raise
944 944
945 945 # Derived from index values.
946 946
947 947 def end(self, rev):
948 948 return self.start(rev) + self.length(rev)
949 949
950 950 def parents(self, node):
951 951 i = self.index
952 952 d = i[self.rev(node)]
953 953 # inline node() to avoid function call overhead
954 954 if self.canonical_parent_order and d[5] == self.nullid:
955 955 return i[d[6]][7], i[d[5]][7]
956 956 else:
957 957 return i[d[5]][7], i[d[6]][7]
958 958
959 959 def chainlen(self, rev):
960 960 return self._chaininfo(rev)[0]
961 961
962 962 def _chaininfo(self, rev):
963 963 chaininfocache = self._chaininfocache
964 964 if rev in chaininfocache:
965 965 return chaininfocache[rev]
966 966 index = self.index
967 967 generaldelta = self._generaldelta
968 968 iterrev = rev
969 969 e = index[iterrev]
970 970 clen = 0
971 971 compresseddeltalen = 0
972 972 while iterrev != e[3]:
973 973 clen += 1
974 974 compresseddeltalen += e[1]
975 975 if generaldelta:
976 976 iterrev = e[3]
977 977 else:
978 978 iterrev -= 1
979 979 if iterrev in chaininfocache:
980 980 t = chaininfocache[iterrev]
981 981 clen += t[0]
982 982 compresseddeltalen += t[1]
983 983 break
984 984 e = index[iterrev]
985 985 else:
986 986 # Add text length of base since decompressing that also takes
987 987 # work. For cache hits the length is already included.
988 988 compresseddeltalen += e[1]
989 989 r = (clen, compresseddeltalen)
990 990 chaininfocache[rev] = r
991 991 return r
992 992
993 993 def _deltachain(self, rev, stoprev=None):
994 994 """Obtain the delta chain for a revision.
995 995
996 996 ``stoprev`` specifies a revision to stop at. If not specified, we
997 997 stop at the base of the chain.
998 998
999 999 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1000 1000 revs in ascending order and ``stopped`` is a bool indicating whether
1001 1001 ``stoprev`` was hit.
1002 1002 """
1003 1003 # Try C implementation.
1004 1004 try:
1005 1005 return self.index.deltachain(rev, stoprev, self._generaldelta)
1006 1006 except AttributeError:
1007 1007 pass
1008 1008
1009 1009 chain = []
1010 1010
1011 1011 # Alias to prevent attribute lookup in tight loop.
1012 1012 index = self.index
1013 1013 generaldelta = self._generaldelta
1014 1014
1015 1015 iterrev = rev
1016 1016 e = index[iterrev]
1017 1017 while iterrev != e[3] and iterrev != stoprev:
1018 1018 chain.append(iterrev)
1019 1019 if generaldelta:
1020 1020 iterrev = e[3]
1021 1021 else:
1022 1022 iterrev -= 1
1023 1023 e = index[iterrev]
1024 1024
1025 1025 if iterrev == stoprev:
1026 1026 stopped = True
1027 1027 else:
1028 1028 chain.append(iterrev)
1029 1029 stopped = False
1030 1030
1031 1031 chain.reverse()
1032 1032 return chain, stopped
1033 1033
1034 1034 def ancestors(self, revs, stoprev=0, inclusive=False):
1035 1035 """Generate the ancestors of 'revs' in reverse revision order.
1036 1036 Does not generate revs lower than stoprev.
1037 1037
1038 1038 See the documentation for ancestor.lazyancestors for more details."""
1039 1039
1040 1040 # first, make sure start revisions aren't filtered
1041 1041 revs = list(revs)
1042 1042 checkrev = self.node
1043 1043 for r in revs:
1044 1044 checkrev(r)
1045 1045 # and we're sure ancestors aren't filtered as well
1046 1046
1047 1047 if rustancestor is not None and self.index.rust_ext_compat:
1048 1048 lazyancestors = rustancestor.LazyAncestors
1049 1049 arg = self.index
1050 1050 else:
1051 1051 lazyancestors = ancestor.lazyancestors
1052 1052 arg = self._uncheckedparentrevs
1053 1053 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1054 1054
1055 1055 def descendants(self, revs):
1056 1056 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1057 1057
1058 1058 def findcommonmissing(self, common=None, heads=None):
1059 1059 """Return a tuple of the ancestors of common and the ancestors of heads
1060 1060 that are not ancestors of common. In revset terminology, we return the
1061 1061 tuple:
1062 1062
1063 1063 ::common, (::heads) - (::common)
1064 1064
1065 1065 The list is sorted by revision number, meaning it is
1066 1066 topologically sorted.
1067 1067
1068 1068 'heads' and 'common' are both lists of node IDs. If heads is
1069 1069 not supplied, uses all of the revlog's heads. If common is not
1070 1070 supplied, uses nullid."""
1071 1071 if common is None:
1072 1072 common = [self.nullid]
1073 1073 if heads is None:
1074 1074 heads = self.heads()
1075 1075
1076 1076 common = [self.rev(n) for n in common]
1077 1077 heads = [self.rev(n) for n in heads]
1078 1078
1079 1079 # we want the ancestors, but inclusive
1080 1080 class lazyset:
1081 1081 def __init__(self, lazyvalues):
1082 1082 self.addedvalues = set()
1083 1083 self.lazyvalues = lazyvalues
1084 1084
1085 1085 def __contains__(self, value):
1086 1086 return value in self.addedvalues or value in self.lazyvalues
1087 1087
1088 1088 def __iter__(self):
1089 1089 added = self.addedvalues
1090 1090 for r in added:
1091 1091 yield r
1092 1092 for r in self.lazyvalues:
1093 1093 if not r in added:
1094 1094 yield r
1095 1095
1096 1096 def add(self, value):
1097 1097 self.addedvalues.add(value)
1098 1098
1099 1099 def update(self, values):
1100 1100 self.addedvalues.update(values)
1101 1101
1102 1102 has = lazyset(self.ancestors(common))
1103 1103 has.add(nullrev)
1104 1104 has.update(common)
1105 1105
1106 1106 # take all ancestors from heads that aren't in has
1107 1107 missing = set()
1108 1108 visit = collections.deque(r for r in heads if r not in has)
1109 1109 while visit:
1110 1110 r = visit.popleft()
1111 1111 if r in missing:
1112 1112 continue
1113 1113 else:
1114 1114 missing.add(r)
1115 1115 for p in self.parentrevs(r):
1116 1116 if p not in has:
1117 1117 visit.append(p)
1118 1118 missing = list(missing)
1119 1119 missing.sort()
1120 1120 return has, [self.node(miss) for miss in missing]
1121 1121
1122 1122 def incrementalmissingrevs(self, common=None):
1123 1123 """Return an object that can be used to incrementally compute the
1124 1124 revision numbers of the ancestors of arbitrary sets that are not
1125 1125 ancestors of common. This is an ancestor.incrementalmissingancestors
1126 1126 object.
1127 1127
1128 1128 'common' is a list of revision numbers. If common is not supplied, uses
1129 1129 nullrev.
1130 1130 """
1131 1131 if common is None:
1132 1132 common = [nullrev]
1133 1133
1134 1134 if rustancestor is not None and self.index.rust_ext_compat:
1135 1135 return rustancestor.MissingAncestors(self.index, common)
1136 1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137 1137
1138 1138 def findmissingrevs(self, common=None, heads=None):
1139 1139 """Return the revision numbers of the ancestors of heads that
1140 1140 are not ancestors of common.
1141 1141
1142 1142 More specifically, return a list of revision numbers corresponding to
1143 1143 nodes N such that every N satisfies the following constraints:
1144 1144
1145 1145 1. N is an ancestor of some node in 'heads'
1146 1146 2. N is not an ancestor of any node in 'common'
1147 1147
1148 1148 The list is sorted by revision number, meaning it is
1149 1149 topologically sorted.
1150 1150
1151 1151 'heads' and 'common' are both lists of revision numbers. If heads is
1152 1152 not supplied, uses all of the revlog's heads. If common is not
1153 1153 supplied, uses nullid."""
1154 1154 if common is None:
1155 1155 common = [nullrev]
1156 1156 if heads is None:
1157 1157 heads = self.headrevs()
1158 1158
1159 1159 inc = self.incrementalmissingrevs(common=common)
1160 1160 return inc.missingancestors(heads)
1161 1161
1162 1162 def findmissing(self, common=None, heads=None):
1163 1163 """Return the ancestors of heads that are not ancestors of common.
1164 1164
1165 1165 More specifically, return a list of nodes N such that every N
1166 1166 satisfies the following constraints:
1167 1167
1168 1168 1. N is an ancestor of some node in 'heads'
1169 1169 2. N is not an ancestor of any node in 'common'
1170 1170
1171 1171 The list is sorted by revision number, meaning it is
1172 1172 topologically sorted.
1173 1173
1174 1174 'heads' and 'common' are both lists of node IDs. If heads is
1175 1175 not supplied, uses all of the revlog's heads. If common is not
1176 1176 supplied, uses nullid."""
1177 1177 if common is None:
1178 1178 common = [self.nullid]
1179 1179 if heads is None:
1180 1180 heads = self.heads()
1181 1181
1182 1182 common = [self.rev(n) for n in common]
1183 1183 heads = [self.rev(n) for n in heads]
1184 1184
1185 1185 inc = self.incrementalmissingrevs(common=common)
1186 1186 return [self.node(r) for r in inc.missingancestors(heads)]
1187 1187
1188 1188 def nodesbetween(self, roots=None, heads=None):
1189 1189 """Return a topological path from 'roots' to 'heads'.
1190 1190
1191 1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 1192 topologically sorted list of all nodes N that satisfy both of
1193 1193 these constraints:
1194 1194
1195 1195 1. N is a descendant of some node in 'roots'
1196 1196 2. N is an ancestor of some node in 'heads'
1197 1197
1198 1198 Every node is considered to be both a descendant and an ancestor
1199 1199 of itself, so every reachable node in 'roots' and 'heads' will be
1200 1200 included in 'nodes'.
1201 1201
1202 1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205 1205
1206 1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 1207 unspecified, uses nullid as the only root. If 'heads' is
1208 1208 unspecified, uses list of all of the revlog's heads."""
1209 1209 nonodes = ([], [], [])
1210 1210 if roots is not None:
1211 1211 roots = list(roots)
1212 1212 if not roots:
1213 1213 return nonodes
1214 1214 lowestrev = min([self.rev(n) for n in roots])
1215 1215 else:
1216 1216 roots = [self.nullid] # Everybody's a descendant of nullid
1217 1217 lowestrev = nullrev
1218 1218 if (lowestrev == nullrev) and (heads is None):
1219 1219 # We want _all_ the nodes!
1220 1220 return (
1221 1221 [self.node(r) for r in self],
1222 1222 [self.nullid],
1223 1223 list(self.heads()),
1224 1224 )
1225 1225 if heads is None:
1226 1226 # All nodes are ancestors, so the latest ancestor is the last
1227 1227 # node.
1228 1228 highestrev = len(self) - 1
1229 1229 # Set ancestors to None to signal that every node is an ancestor.
1230 1230 ancestors = None
1231 1231 # Set heads to an empty dictionary for later discovery of heads
1232 1232 heads = {}
1233 1233 else:
1234 1234 heads = list(heads)
1235 1235 if not heads:
1236 1236 return nonodes
1237 1237 ancestors = set()
1238 1238 # Turn heads into a dictionary so we can remove 'fake' heads.
1239 1239 # Also, later we will be using it to filter out the heads we can't
1240 1240 # find from roots.
1241 1241 heads = dict.fromkeys(heads, False)
1242 1242 # Start at the top and keep marking parents until we're done.
1243 1243 nodestotag = set(heads)
1244 1244 # Remember where the top was so we can use it as a limit later.
1245 1245 highestrev = max([self.rev(n) for n in nodestotag])
1246 1246 while nodestotag:
1247 1247 # grab a node to tag
1248 1248 n = nodestotag.pop()
1249 1249 # Never tag nullid
1250 1250 if n == self.nullid:
1251 1251 continue
1252 1252 # A node's revision number represents its place in a
1253 1253 # topologically sorted list of nodes.
1254 1254 r = self.rev(n)
1255 1255 if r >= lowestrev:
1256 1256 if n not in ancestors:
1257 1257 # If we are possibly a descendant of one of the roots
1258 1258 # and we haven't already been marked as an ancestor
1259 1259 ancestors.add(n) # Mark as ancestor
1260 1260 # Add non-nullid parents to list of nodes to tag.
1261 1261 nodestotag.update(
1262 1262 [p for p in self.parents(n) if p != self.nullid]
1263 1263 )
1264 1264 elif n in heads: # We've seen it before, is it a fake head?
1265 1265 # So it is, real heads should not be the ancestors of
1266 1266 # any other heads.
1267 1267 heads.pop(n)
1268 1268 if not ancestors:
1269 1269 return nonodes
1270 1270 # Now that we have our set of ancestors, we want to remove any
1271 1271 # roots that are not ancestors.
1272 1272
1273 1273 # If one of the roots was nullid, everything is included anyway.
1274 1274 if lowestrev > nullrev:
1275 1275 # But, since we weren't, let's recompute the lowest rev to not
1276 1276 # include roots that aren't ancestors.
1277 1277
1278 1278 # Filter out roots that aren't ancestors of heads
1279 1279 roots = [root for root in roots if root in ancestors]
1280 1280 # Recompute the lowest revision
1281 1281 if roots:
1282 1282 lowestrev = min([self.rev(root) for root in roots])
1283 1283 else:
1284 1284 # No more roots? Return empty list
1285 1285 return nonodes
1286 1286 else:
1287 1287 # We are descending from nullid, and don't need to care about
1288 1288 # any other roots.
1289 1289 lowestrev = nullrev
1290 1290 roots = [self.nullid]
1291 1291 # Transform our roots list into a set.
1292 1292 descendants = set(roots)
1293 1293 # Also, keep the original roots so we can filter out roots that aren't
1294 1294 # 'real' roots (i.e. are descended from other roots).
1295 1295 roots = descendants.copy()
1296 1296 # Our topologically sorted list of output nodes.
1297 1297 orderedout = []
1298 1298 # Don't start at nullid since we don't want nullid in our output list,
1299 1299 # and if nullid shows up in descendants, empty parents will look like
1300 1300 # they're descendants.
1301 1301 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1302 1302 n = self.node(r)
1303 1303 isdescendant = False
1304 1304 if lowestrev == nullrev: # Everybody is a descendant of nullid
1305 1305 isdescendant = True
1306 1306 elif n in descendants:
1307 1307 # n is already a descendant
1308 1308 isdescendant = True
1309 1309 # This check only needs to be done here because all the roots
1310 1310 # will start being marked is descendants before the loop.
1311 1311 if n in roots:
1312 1312 # If n was a root, check if it's a 'real' root.
1313 1313 p = tuple(self.parents(n))
1314 1314 # If any of its parents are descendants, it's not a root.
1315 1315 if (p[0] in descendants) or (p[1] in descendants):
1316 1316 roots.remove(n)
1317 1317 else:
1318 1318 p = tuple(self.parents(n))
1319 1319 # A node is a descendant if either of its parents are
1320 1320 # descendants. (We seeded the dependents list with the roots
1321 1321 # up there, remember?)
1322 1322 if (p[0] in descendants) or (p[1] in descendants):
1323 1323 descendants.add(n)
1324 1324 isdescendant = True
1325 1325 if isdescendant and ((ancestors is None) or (n in ancestors)):
1326 1326 # Only include nodes that are both descendants and ancestors.
1327 1327 orderedout.append(n)
1328 1328 if (ancestors is not None) and (n in heads):
1329 1329 # We're trying to figure out which heads are reachable
1330 1330 # from roots.
1331 1331 # Mark this head as having been reached
1332 1332 heads[n] = True
1333 1333 elif ancestors is None:
1334 1334 # Otherwise, we're trying to discover the heads.
1335 1335 # Assume this is a head because if it isn't, the next step
1336 1336 # will eventually remove it.
1337 1337 heads[n] = True
1338 1338 # But, obviously its parents aren't.
1339 1339 for p in self.parents(n):
1340 1340 heads.pop(p, None)
1341 1341 heads = [head for head, flag in heads.items() if flag]
1342 1342 roots = list(roots)
1343 1343 assert orderedout
1344 1344 assert roots
1345 1345 assert heads
1346 1346 return (orderedout, roots, heads)
1347 1347
1348 1348 def headrevs(self, revs=None):
1349 1349 if revs is None:
1350 1350 try:
1351 1351 return self.index.headrevs()
1352 1352 except AttributeError:
1353 1353 return self._headrevs()
1354 1354 if rustdagop is not None and self.index.rust_ext_compat:
1355 1355 return rustdagop.headrevs(self.index, revs)
1356 1356 return dagop.headrevs(revs, self._uncheckedparentrevs)
1357 1357
1358 1358 def computephases(self, roots):
1359 1359 return self.index.computephasesmapsets(roots)
1360 1360
1361 1361 def _headrevs(self):
1362 1362 count = len(self)
1363 1363 if not count:
1364 1364 return [nullrev]
1365 1365 # we won't iter over filtered rev so nobody is a head at start
1366 1366 ishead = [0] * (count + 1)
1367 1367 index = self.index
1368 1368 for r in self:
1369 1369 ishead[r] = 1 # I may be an head
1370 1370 e = index[r]
1371 1371 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1372 1372 return [r for r, val in enumerate(ishead) if val]
1373 1373
1374 1374 def heads(self, start=None, stop=None):
1375 1375 """return the list of all nodes that have no children
1376 1376
1377 1377 if start is specified, only heads that are descendants of
1378 1378 start will be returned
1379 1379 if stop is specified, it will consider all the revs from stop
1380 1380 as if they had no children
1381 1381 """
1382 1382 if start is None and stop is None:
1383 1383 if not len(self):
1384 1384 return [self.nullid]
1385 1385 return [self.node(r) for r in self.headrevs()]
1386 1386
1387 1387 if start is None:
1388 1388 start = nullrev
1389 1389 else:
1390 1390 start = self.rev(start)
1391 1391
1392 1392 stoprevs = {self.rev(n) for n in stop or []}
1393 1393
1394 1394 revs = dagop.headrevssubset(
1395 1395 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1396 1396 )
1397 1397
1398 1398 return [self.node(rev) for rev in revs]
1399 1399
1400 1400 def children(self, node):
1401 1401 """find the children of a given node"""
1402 1402 c = []
1403 1403 p = self.rev(node)
1404 1404 for r in self.revs(start=p + 1):
1405 1405 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1406 1406 if prevs:
1407 1407 for pr in prevs:
1408 1408 if pr == p:
1409 1409 c.append(self.node(r))
1410 1410 elif p == nullrev:
1411 1411 c.append(self.node(r))
1412 1412 return c
1413 1413
1414 1414 def commonancestorsheads(self, a, b):
1415 1415 """calculate all the heads of the common ancestors of nodes a and b"""
1416 1416 a, b = self.rev(a), self.rev(b)
1417 1417 ancs = self._commonancestorsheads(a, b)
1418 1418 return pycompat.maplist(self.node, ancs)
1419 1419
1420 1420 def _commonancestorsheads(self, *revs):
1421 1421 """calculate all the heads of the common ancestors of revs"""
1422 1422 try:
1423 1423 ancs = self.index.commonancestorsheads(*revs)
1424 1424 except (AttributeError, OverflowError): # C implementation failed
1425 1425 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1426 1426 return ancs
1427 1427
1428 1428 def isancestor(self, a, b):
1429 1429 """return True if node a is an ancestor of node b
1430 1430
1431 1431 A revision is considered an ancestor of itself."""
1432 1432 a, b = self.rev(a), self.rev(b)
1433 1433 return self.isancestorrev(a, b)
1434 1434
1435 1435 def isancestorrev(self, a, b):
1436 1436 """return True if revision a is an ancestor of revision b
1437 1437
1438 1438 A revision is considered an ancestor of itself.
1439 1439
1440 1440 The implementation of this is trivial but the use of
1441 1441 reachableroots is not."""
1442 1442 if a == nullrev:
1443 1443 return True
1444 1444 elif a == b:
1445 1445 return True
1446 1446 elif a > b:
1447 1447 return False
1448 1448 return bool(self.reachableroots(a, [b], [a], includepath=False))
1449 1449
1450 1450 def reachableroots(self, minroot, heads, roots, includepath=False):
1451 1451 """return (heads(::(<roots> and <roots>::<heads>)))
1452 1452
1453 1453 If includepath is True, return (<roots>::<heads>)."""
1454 1454 try:
1455 1455 return self.index.reachableroots2(
1456 1456 minroot, heads, roots, includepath
1457 1457 )
1458 1458 except AttributeError:
1459 1459 return dagop._reachablerootspure(
1460 1460 self.parentrevs, minroot, roots, heads, includepath
1461 1461 )
1462 1462
1463 1463 def ancestor(self, a, b):
1464 1464 """calculate the "best" common ancestor of nodes a and b"""
1465 1465
1466 1466 a, b = self.rev(a), self.rev(b)
1467 1467 try:
1468 1468 ancs = self.index.ancestors(a, b)
1469 1469 except (AttributeError, OverflowError):
1470 1470 ancs = ancestor.ancestors(self.parentrevs, a, b)
1471 1471 if ancs:
1472 1472 # choose a consistent winner when there's a tie
1473 1473 return min(map(self.node, ancs))
1474 1474 return self.nullid
1475 1475
1476 1476 def _match(self, id):
1477 1477 if isinstance(id, int):
1478 1478 # rev
1479 1479 return self.node(id)
1480 1480 if len(id) == self.nodeconstants.nodelen:
1481 1481 # possibly a binary node
1482 1482 # odds of a binary node being all hex in ASCII are 1 in 10**25
1483 1483 try:
1484 1484 node = id
1485 1485 self.rev(node) # quick search the index
1486 1486 return node
1487 1487 except error.LookupError:
1488 1488 pass # may be partial hex id
1489 1489 try:
1490 1490 # str(rev)
1491 1491 rev = int(id)
1492 1492 if b"%d" % rev != id:
1493 1493 raise ValueError
1494 1494 if rev < 0:
1495 1495 rev = len(self) + rev
1496 1496 if rev < 0 or rev >= len(self):
1497 1497 raise ValueError
1498 1498 return self.node(rev)
1499 1499 except (ValueError, OverflowError):
1500 1500 pass
1501 1501 if len(id) == 2 * self.nodeconstants.nodelen:
1502 1502 try:
1503 1503 # a full hex nodeid?
1504 1504 node = bin(id)
1505 1505 self.rev(node)
1506 1506 return node
1507 1507 except (binascii.Error, error.LookupError):
1508 1508 pass
1509 1509
1510 1510 def _partialmatch(self, id):
1511 1511 # we don't care wdirfilenodeids as they should be always full hash
1512 1512 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1513 1513 ambiguous = False
1514 1514 try:
1515 1515 partial = self.index.partialmatch(id)
1516 1516 if partial and self.hasnode(partial):
1517 1517 if maybewdir:
1518 1518 # single 'ff...' match in radix tree, ambiguous with wdir
1519 1519 ambiguous = True
1520 1520 else:
1521 1521 return partial
1522 1522 elif maybewdir:
1523 1523 # no 'ff...' match in radix tree, wdir identified
1524 1524 raise error.WdirUnsupported
1525 1525 else:
1526 1526 return None
1527 1527 except error.RevlogError:
1528 1528 # parsers.c radix tree lookup gave multiple matches
1529 1529 # fast path: for unfiltered changelog, radix tree is accurate
1530 1530 if not getattr(self, 'filteredrevs', None):
1531 1531 ambiguous = True
1532 1532 # fall through to slow path that filters hidden revisions
1533 1533 except (AttributeError, ValueError):
1534 1534 # we are pure python, or key is not hex
1535 1535 pass
1536 1536 if ambiguous:
1537 1537 raise error.AmbiguousPrefixLookupError(
1538 1538 id, self.display_id, _(b'ambiguous identifier')
1539 1539 )
1540 1540
1541 1541 if id in self._pcache:
1542 1542 return self._pcache[id]
1543 1543
1544 1544 if len(id) <= 40:
1545 1545 # hex(node)[:...]
1546 1546 l = len(id) // 2 * 2 # grab an even number of digits
1547 1547 try:
1548 1548 # we're dropping the last digit, so let's check that it's hex,
1549 1549 # to avoid the expensive computation below if it's not
1550 1550 if len(id) % 2 > 0:
1551 1551 if not (id[-1] in hexdigits):
1552 1552 return None
1553 1553 prefix = bin(id[:l])
1554 1554 except binascii.Error:
1555 1555 pass
1556 1556 else:
1557 1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1558 1558 nl = [
1559 1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1560 1560 ]
1561 1561 if self.nodeconstants.nullhex.startswith(id):
1562 1562 nl.append(self.nullid)
1563 1563 if len(nl) > 0:
1564 1564 if len(nl) == 1 and not maybewdir:
1565 1565 self._pcache[id] = nl[0]
1566 1566 return nl[0]
1567 1567 raise error.AmbiguousPrefixLookupError(
1568 1568 id, self.display_id, _(b'ambiguous identifier')
1569 1569 )
1570 1570 if maybewdir:
1571 1571 raise error.WdirUnsupported
1572 1572 return None
1573 1573
1574 1574 def lookup(self, id):
1575 1575 """locate a node based on:
1576 1576 - revision number or str(revision number)
1577 1577 - nodeid or subset of hex nodeid
1578 1578 """
1579 1579 n = self._match(id)
1580 1580 if n is not None:
1581 1581 return n
1582 1582 n = self._partialmatch(id)
1583 1583 if n:
1584 1584 return n
1585 1585
1586 1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1587 1587
1588 1588 def shortest(self, node, minlength=1):
1589 1589 """Find the shortest unambiguous prefix that matches node."""
1590 1590
1591 1591 def isvalid(prefix):
1592 1592 try:
1593 1593 matchednode = self._partialmatch(prefix)
1594 1594 except error.AmbiguousPrefixLookupError:
1595 1595 return False
1596 1596 except error.WdirUnsupported:
1597 1597 # single 'ff...' match
1598 1598 return True
1599 1599 if matchednode is None:
1600 1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1601 1601 return True
1602 1602
1603 1603 def maybewdir(prefix):
1604 1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1605 1605
1606 1606 hexnode = hex(node)
1607 1607
1608 1608 def disambiguate(hexnode, minlength):
1609 1609 """Disambiguate against wdirid."""
1610 1610 for length in range(minlength, len(hexnode) + 1):
1611 1611 prefix = hexnode[:length]
1612 1612 if not maybewdir(prefix):
1613 1613 return prefix
1614 1614
1615 1615 if not getattr(self, 'filteredrevs', None):
1616 1616 try:
1617 1617 length = max(self.index.shortest(node), minlength)
1618 1618 return disambiguate(hexnode, length)
1619 1619 except error.RevlogError:
1620 1620 if node != self.nodeconstants.wdirid:
1621 1621 raise error.LookupError(
1622 1622 node, self.display_id, _(b'no node')
1623 1623 )
1624 1624 except AttributeError:
1625 1625 # Fall through to pure code
1626 1626 pass
1627 1627
1628 1628 if node == self.nodeconstants.wdirid:
1629 1629 for length in range(minlength, len(hexnode) + 1):
1630 1630 prefix = hexnode[:length]
1631 1631 if isvalid(prefix):
1632 1632 return prefix
1633 1633
1634 1634 for length in range(minlength, len(hexnode) + 1):
1635 1635 prefix = hexnode[:length]
1636 1636 if isvalid(prefix):
1637 1637 return disambiguate(hexnode, length)
1638 1638
1639 1639 def cmp(self, node, text):
1640 1640 """compare text with a given file revision
1641 1641
1642 1642 returns True if text is different than what is stored.
1643 1643 """
1644 1644 p1, p2 = self.parents(node)
1645 1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1646 1646
1647 1647 def _getsegmentforrevs(self, startrev, endrev, df=None):
1648 1648 """Obtain a segment of raw data corresponding to a range of revisions.
1649 1649
1650 1650 Accepts the start and end revisions and an optional already-open
1651 1651 file handle to be used for reading. If the file handle is read, its
1652 1652 seek position will not be preserved.
1653 1653
1654 1654 Requests for data may be satisfied by a cache.
1655 1655
1656 1656 Returns a 2-tuple of (offset, data) for the requested range of
1657 1657 revisions. Offset is the integer offset from the beginning of the
1658 1658 revlog and data is a str or buffer of the raw byte data.
1659 1659
1660 1660 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1661 1661 to determine where each revision's data begins and ends.
1662 1662 """
1663 1663 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1664 1664 # (functions are expensive).
1665 1665 index = self.index
1666 1666 istart = index[startrev]
1667 1667 start = int(istart[0] >> 16)
1668 1668 if startrev == endrev:
1669 1669 end = start + istart[1]
1670 1670 else:
1671 1671 iend = index[endrev]
1672 1672 end = int(iend[0] >> 16) + iend[1]
1673 1673
1674 1674 if self._inline:
1675 1675 start += (startrev + 1) * self.index.entry_size
1676 1676 end += (endrev + 1) * self.index.entry_size
1677 1677 length = end - start
1678 1678
1679 1679 return start, self._segmentfile.read_chunk(start, length, df)
1680 1680
1681 1681 def _chunk(self, rev, df=None):
1682 1682 """Obtain a single decompressed chunk for a revision.
1683 1683
1684 1684 Accepts an integer revision and an optional already-open file handle
1685 1685 to be used for reading. If used, the seek position of the file will not
1686 1686 be preserved.
1687 1687
1688 1688 Returns a str holding uncompressed data for the requested revision.
1689 1689 """
1690 1690 compression_mode = self.index[rev][10]
1691 1691 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1692 1692 if compression_mode == COMP_MODE_PLAIN:
1693 1693 return data
1694 1694 elif compression_mode == COMP_MODE_DEFAULT:
1695 1695 return self._decompressor(data)
1696 1696 elif compression_mode == COMP_MODE_INLINE:
1697 1697 return self.decompress(data)
1698 1698 else:
1699 1699 msg = b'unknown compression mode %d'
1700 1700 msg %= compression_mode
1701 1701 raise error.RevlogError(msg)
1702 1702
1703 1703 def _chunks(self, revs, df=None, targetsize=None):
1704 1704 """Obtain decompressed chunks for the specified revisions.
1705 1705
1706 1706 Accepts an iterable of numeric revisions that are assumed to be in
1707 1707 ascending order. Also accepts an optional already-open file handle
1708 1708 to be used for reading. If used, the seek position of the file will
1709 1709 not be preserved.
1710 1710
1711 1711 This function is similar to calling ``self._chunk()`` multiple times,
1712 1712 but is faster.
1713 1713
1714 1714 Returns a list with decompressed data for each requested revision.
1715 1715 """
1716 1716 if not revs:
1717 1717 return []
1718 1718 start = self.start
1719 1719 length = self.length
1720 1720 inline = self._inline
1721 1721 iosize = self.index.entry_size
1722 1722 buffer = util.buffer
1723 1723
1724 1724 l = []
1725 1725 ladd = l.append
1726 1726
1727 1727 if not self._withsparseread:
1728 1728 slicedchunks = (revs,)
1729 1729 else:
1730 1730 slicedchunks = deltautil.slicechunk(
1731 1731 self, revs, targetsize=targetsize
1732 1732 )
1733 1733
1734 1734 for revschunk in slicedchunks:
1735 1735 firstrev = revschunk[0]
1736 1736 # Skip trailing revisions with empty diff
1737 1737 for lastrev in revschunk[::-1]:
1738 1738 if length(lastrev) != 0:
1739 1739 break
1740 1740
1741 1741 try:
1742 1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1743 1743 except OverflowError:
1744 1744 # issue4215 - we can't cache a run of chunks greater than
1745 1745 # 2G on Windows
1746 1746 return [self._chunk(rev, df=df) for rev in revschunk]
1747 1747
1748 1748 decomp = self.decompress
1749 1749 # self._decompressor might be None, but will not be used in that case
1750 1750 def_decomp = self._decompressor
1751 1751 for rev in revschunk:
1752 1752 chunkstart = start(rev)
1753 1753 if inline:
1754 1754 chunkstart += (rev + 1) * iosize
1755 1755 chunklength = length(rev)
1756 1756 comp_mode = self.index[rev][10]
1757 1757 c = buffer(data, chunkstart - offset, chunklength)
1758 1758 if comp_mode == COMP_MODE_PLAIN:
1759 1759 ladd(c)
1760 1760 elif comp_mode == COMP_MODE_INLINE:
1761 1761 ladd(decomp(c))
1762 1762 elif comp_mode == COMP_MODE_DEFAULT:
1763 1763 ladd(def_decomp(c))
1764 1764 else:
1765 1765 msg = b'unknown compression mode %d'
1766 1766 msg %= comp_mode
1767 1767 raise error.RevlogError(msg)
1768 1768
1769 1769 return l
1770 1770
1771 1771 def deltaparent(self, rev):
1772 1772 """return deltaparent of the given revision"""
1773 1773 base = self.index[rev][3]
1774 1774 if base == rev:
1775 1775 return nullrev
1776 1776 elif self._generaldelta:
1777 1777 return base
1778 1778 else:
1779 1779 return rev - 1
1780 1780
1781 1781 def issnapshot(self, rev):
1782 1782 """tells whether rev is a snapshot"""
1783 1783 if not self._sparserevlog:
1784 1784 return self.deltaparent(rev) == nullrev
1785 1785 elif util.safehasattr(self.index, b'issnapshot'):
1786 1786 # directly assign the method to cache the testing and access
1787 1787 self.issnapshot = self.index.issnapshot
1788 1788 return self.issnapshot(rev)
1789 1789 if rev == nullrev:
1790 1790 return True
1791 1791 entry = self.index[rev]
1792 1792 base = entry[3]
1793 1793 if base == rev:
1794 1794 return True
1795 1795 if base == nullrev:
1796 1796 return True
1797 1797 p1 = entry[5]
1798 1798 while self.length(p1) == 0:
1799 1799 b = self.deltaparent(p1)
1800 1800 if b == p1:
1801 1801 break
1802 1802 p1 = b
1803 1803 p2 = entry[6]
1804 1804 while self.length(p2) == 0:
1805 1805 b = self.deltaparent(p2)
1806 1806 if b == p2:
1807 1807 break
1808 1808 p2 = b
1809 1809 if base == p1 or base == p2:
1810 1810 return False
1811 1811 return self.issnapshot(base)
1812 1812
1813 1813 def snapshotdepth(self, rev):
1814 1814 """number of snapshot in the chain before this one"""
1815 1815 if not self.issnapshot(rev):
1816 1816 raise error.ProgrammingError(b'revision %d not a snapshot')
1817 1817 return len(self._deltachain(rev)[0]) - 1
1818 1818
1819 1819 def revdiff(self, rev1, rev2):
1820 1820 """return or calculate a delta between two revisions
1821 1821
1822 1822 The delta calculated is in binary form and is intended to be written to
1823 1823 revlog data directly. So this function needs raw revision data.
1824 1824 """
1825 1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1826 1826 return bytes(self._chunk(rev2))
1827 1827
1828 1828 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1829 1829
1830 1830 def revision(self, nodeorrev, _df=None):
1831 1831 """return an uncompressed revision of a given node or revision
1832 1832 number.
1833 1833
1834 1834 _df - an existing file handle to read from. (internal-only)
1835 1835 """
1836 1836 return self._revisiondata(nodeorrev, _df)
1837 1837
1838 1838 def sidedata(self, nodeorrev, _df=None):
1839 1839 """a map of extra data related to the changeset but not part of the hash
1840 1840
1841 1841 This function currently return a dictionary. However, more advanced
1842 1842 mapping object will likely be used in the future for a more
1843 1843 efficient/lazy code.
1844 1844 """
1845 1845 # deal with <nodeorrev> argument type
1846 1846 if isinstance(nodeorrev, int):
1847 1847 rev = nodeorrev
1848 1848 else:
1849 1849 rev = self.rev(nodeorrev)
1850 1850 return self._sidedata(rev)
1851 1851
1852 1852 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1853 1853 # deal with <nodeorrev> argument type
1854 1854 if isinstance(nodeorrev, int):
1855 1855 rev = nodeorrev
1856 1856 node = self.node(rev)
1857 1857 else:
1858 1858 node = nodeorrev
1859 1859 rev = None
1860 1860
1861 1861 # fast path the special `nullid` rev
1862 1862 if node == self.nullid:
1863 1863 return b""
1864 1864
1865 1865 # ``rawtext`` is the text as stored inside the revlog. Might be the
1866 1866 # revision or might need to be processed to retrieve the revision.
1867 1867 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1868 1868
1869 1869 if raw and validated:
1870 1870 # if we don't want to process the raw text and that raw
1871 1871 # text is cached, we can exit early.
1872 1872 return rawtext
1873 1873 if rev is None:
1874 1874 rev = self.rev(node)
1875 1875 # the revlog's flag for this revision
1876 1876 # (usually alter its state or content)
1877 1877 flags = self.flags(rev)
1878 1878
1879 1879 if validated and flags == REVIDX_DEFAULT_FLAGS:
1880 1880 # no extra flags set, no flag processor runs, text = rawtext
1881 1881 return rawtext
1882 1882
1883 1883 if raw:
1884 1884 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1885 1885 text = rawtext
1886 1886 else:
1887 1887 r = flagutil.processflagsread(self, rawtext, flags)
1888 1888 text, validatehash = r
1889 1889 if validatehash:
1890 1890 self.checkhash(text, node, rev=rev)
1891 1891 if not validated:
1892 1892 self._revisioncache = (node, rev, rawtext)
1893 1893
1894 1894 return text
1895 1895
1896 1896 def _rawtext(self, node, rev, _df=None):
1897 1897 """return the possibly unvalidated rawtext for a revision
1898 1898
1899 1899 returns (rev, rawtext, validated)
1900 1900 """
1901 1901
1902 1902 # revision in the cache (could be useful to apply delta)
1903 1903 cachedrev = None
1904 1904 # An intermediate text to apply deltas to
1905 1905 basetext = None
1906 1906
1907 1907 # Check if we have the entry in cache
1908 1908 # The cache entry looks like (node, rev, rawtext)
1909 1909 if self._revisioncache:
1910 1910 if self._revisioncache[0] == node:
1911 1911 return (rev, self._revisioncache[2], True)
1912 1912 cachedrev = self._revisioncache[1]
1913 1913
1914 1914 if rev is None:
1915 1915 rev = self.rev(node)
1916 1916
1917 1917 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1918 1918 if stopped:
1919 1919 basetext = self._revisioncache[2]
1920 1920
1921 1921 # drop cache to save memory, the caller is expected to
1922 1922 # update self._revisioncache after validating the text
1923 1923 self._revisioncache = None
1924 1924
1925 1925 targetsize = None
1926 1926 rawsize = self.index[rev][2]
1927 1927 if 0 <= rawsize:
1928 1928 targetsize = 4 * rawsize
1929 1929
1930 1930 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1931 1931 if basetext is None:
1932 1932 basetext = bytes(bins[0])
1933 1933 bins = bins[1:]
1934 1934
1935 1935 rawtext = mdiff.patches(basetext, bins)
1936 1936 del basetext # let us have a chance to free memory early
1937 1937 return (rev, rawtext, False)
1938 1938
1939 1939 def _sidedata(self, rev):
1940 1940 """Return the sidedata for a given revision number."""
1941 1941 index_entry = self.index[rev]
1942 1942 sidedata_offset = index_entry[8]
1943 1943 sidedata_size = index_entry[9]
1944 1944
1945 1945 if self._inline:
1946 1946 sidedata_offset += self.index.entry_size * (1 + rev)
1947 1947 if sidedata_size == 0:
1948 1948 return {}
1949 1949
1950 1950 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1951 1951 filename = self._sidedatafile
1952 1952 end = self._docket.sidedata_end
1953 1953 offset = sidedata_offset
1954 1954 length = sidedata_size
1955 1955 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1956 1956 raise error.RevlogError(m)
1957 1957
1958 1958 comp_segment = self._segmentfile_sidedata.read_chunk(
1959 1959 sidedata_offset, sidedata_size
1960 1960 )
1961 1961
1962 1962 comp = self.index[rev][11]
1963 1963 if comp == COMP_MODE_PLAIN:
1964 1964 segment = comp_segment
1965 1965 elif comp == COMP_MODE_DEFAULT:
1966 1966 segment = self._decompressor(comp_segment)
1967 1967 elif comp == COMP_MODE_INLINE:
1968 1968 segment = self.decompress(comp_segment)
1969 1969 else:
1970 1970 msg = b'unknown compression mode %d'
1971 1971 msg %= comp
1972 1972 raise error.RevlogError(msg)
1973 1973
1974 1974 sidedata = sidedatautil.deserialize_sidedata(segment)
1975 1975 return sidedata
1976 1976
1977 1977 def rawdata(self, nodeorrev, _df=None):
1978 1978 """return an uncompressed raw data of a given node or revision number.
1979 1979
1980 1980 _df - an existing file handle to read from. (internal-only)
1981 1981 """
1982 1982 return self._revisiondata(nodeorrev, _df, raw=True)
1983 1983
1984 1984 def hash(self, text, p1, p2):
1985 1985 """Compute a node hash.
1986 1986
1987 1987 Available as a function so that subclasses can replace the hash
1988 1988 as needed.
1989 1989 """
1990 1990 return storageutil.hashrevisionsha1(text, p1, p2)
1991 1991
1992 1992 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1993 1993 """Check node hash integrity.
1994 1994
1995 1995 Available as a function so that subclasses can extend hash mismatch
1996 1996 behaviors as needed.
1997 1997 """
1998 1998 try:
1999 1999 if p1 is None and p2 is None:
2000 2000 p1, p2 = self.parents(node)
2001 2001 if node != self.hash(text, p1, p2):
2002 2002 # Clear the revision cache on hash failure. The revision cache
2003 2003 # only stores the raw revision and clearing the cache does have
2004 2004 # the side-effect that we won't have a cache hit when the raw
2005 2005 # revision data is accessed. But this case should be rare and
2006 2006 # it is extra work to teach the cache about the hash
2007 2007 # verification state.
2008 2008 if self._revisioncache and self._revisioncache[0] == node:
2009 2009 self._revisioncache = None
2010 2010
2011 2011 revornode = rev
2012 2012 if revornode is None:
2013 2013 revornode = templatefilters.short(hex(node))
2014 2014 raise error.RevlogError(
2015 2015 _(b"integrity check failed on %s:%s")
2016 2016 % (self.display_id, pycompat.bytestr(revornode))
2017 2017 )
2018 2018 except error.RevlogError:
2019 2019 if self._censorable and storageutil.iscensoredtext(text):
2020 2020 raise error.CensoredNodeError(self.display_id, node, text)
2021 2021 raise
2022 2022
2023 @property
2024 def _split_index_file(self):
2025 """the path where to expect the index of an ongoing splitting operation
2026
2027 The file will only exist if a splitting operation is in progress, but
2028 it is always expected at the same location."""
2029 return self.radix + b'.i.s'
2030
2023 2031 def _enforceinlinesize(self, tr, side_write=True):
2024 2032 """Check if the revlog is too big for inline and convert if so.
2025 2033
2026 2034 This should be called after revisions are added to the revlog. If the
2027 2035 revlog has grown too large to be an inline revlog, it will convert it
2028 2036 to use multiple index and data files.
2029 2037 """
2030 2038 tiprev = len(self) - 1
2031 2039 total_size = self.start(tiprev) + self.length(tiprev)
2032 2040 if not self._inline or total_size < _maxinline:
2033 2041 return
2034 2042
2035 2043 troffset = tr.findoffset(self._indexfile)
2036 2044 if troffset is None:
2037 2045 raise error.RevlogError(
2038 2046 _(b"%s not found in the transaction") % self._indexfile
2039 2047 )
2040 2048 if troffset:
2041 2049 tr.addbackup(self._indexfile, for_offset=True)
2042 2050 tr.add(self._datafile, 0)
2043 2051
2044 2052 existing_handles = False
2045 2053 if self._writinghandles is not None:
2046 2054 existing_handles = True
2047 2055 fp = self._writinghandles[0]
2048 2056 fp.flush()
2049 2057 fp.close()
2050 2058 # We can't use the cached file handle after close(). So prevent
2051 2059 # its usage.
2052 2060 self._writinghandles = None
2053 2061 self._segmentfile.writing_handle = None
2054 2062 # No need to deal with sidedata writing handle as it is only
2055 2063 # relevant with revlog-v2 which is never inline, not reaching
2056 2064 # this code
2057 2065 if side_write:
2058 2066 old_index_file_path = self._indexfile
2059 new_index_file_path = self._indexfile + b'.s'
2067 new_index_file_path = self._split_index_file
2060 2068 opener = self.opener
2061 2069 weak_self = weakref.ref(self)
2062 2070
2063 2071 fncache = getattr(opener, 'fncache', None)
2064 2072 if fncache is not None:
2065 2073 fncache.addignore(new_index_file_path)
2066 2074
2067 2075 # the "split" index replace the real index when the transaction is finalized
2068 2076 def finalize_callback(tr):
2069 2077 opener.rename(
2070 2078 new_index_file_path,
2071 2079 old_index_file_path,
2072 2080 checkambig=True,
2073 2081 )
2074 2082 maybe_self = weak_self()
2075 2083 if maybe_self is not None:
2076 2084 maybe_self._indexfile = old_index_file_path
2077 2085
2078 2086 def abort_callback(tr):
2079 2087 maybe_self = weak_self()
2080 2088 if maybe_self is not None:
2081 2089 maybe_self._indexfile = old_index_file_path
2082 2090
2083 2091 tr.registertmp(new_index_file_path)
2084 2092 if self.target[1] is not None:
2085 2093 callback_id = b'000-revlog-split-%d-%s' % self.target
2086 2094 else:
2087 2095 callback_id = b'000-revlog-split-%d' % self.target[0]
2088 2096 tr.addfinalize(callback_id, finalize_callback)
2089 2097 tr.addabort(callback_id, abort_callback)
2090 2098
2091 2099 new_dfh = self._datafp(b'w+')
2092 2100 new_dfh.truncate(0) # drop any potentially existing data
2093 2101 try:
2094 2102 with self._indexfp() as read_ifh:
2095 2103 for r in self:
2096 2104 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2097 2105 new_dfh.flush()
2098 2106
2099 2107 if side_write:
2100 2108 self._indexfile = new_index_file_path
2101 2109 with self.__index_new_fp() as fp:
2102 2110 self._format_flags &= ~FLAG_INLINE_DATA
2103 2111 self._inline = False
2104 2112 for i in self:
2105 2113 e = self.index.entry_binary(i)
2106 2114 if i == 0 and self._docket is None:
2107 2115 header = self._format_flags | self._format_version
2108 2116 header = self.index.pack_header(header)
2109 2117 e = header + e
2110 2118 fp.write(e)
2111 2119 if self._docket is not None:
2112 2120 self._docket.index_end = fp.tell()
2113 2121
2114 2122 # If we don't use side-write, the temp file replace the real
2115 2123 # index when we exit the context manager
2116 2124
2117 2125 nodemaputil.setup_persistent_nodemap(tr, self)
2118 2126 self._segmentfile = randomaccessfile.randomaccessfile(
2119 2127 self.opener,
2120 2128 self._datafile,
2121 2129 self._chunkcachesize,
2122 2130 )
2123 2131
2124 2132 if existing_handles:
2125 2133 # switched from inline to conventional reopen the index
2126 2134 ifh = self.__index_write_fp()
2127 2135 self._writinghandles = (ifh, new_dfh, None)
2128 2136 self._segmentfile.writing_handle = new_dfh
2129 2137 new_dfh = None
2130 2138 # No need to deal with sidedata writing handle as it is only
2131 2139 # relevant with revlog-v2 which is never inline, not reaching
2132 2140 # this code
2133 2141 finally:
2134 2142 if new_dfh is not None:
2135 2143 new_dfh.close()
2136 2144
2137 2145 def _nodeduplicatecallback(self, transaction, node):
2138 2146 """called when trying to add a node already stored."""
2139 2147
2140 2148 @contextlib.contextmanager
2141 2149 def reading(self):
2142 2150 """Context manager that keeps data and sidedata files open for reading"""
2143 2151 with self._segmentfile.reading():
2144 2152 with self._segmentfile_sidedata.reading():
2145 2153 yield
2146 2154
2147 2155 @contextlib.contextmanager
2148 2156 def _writing(self, transaction):
2149 2157 if self._trypending:
2150 2158 msg = b'try to write in a `trypending` revlog: %s'
2151 2159 msg %= self.display_id
2152 2160 raise error.ProgrammingError(msg)
2153 2161 if self._writinghandles is not None:
2154 2162 yield
2155 2163 else:
2156 2164 ifh = dfh = sdfh = None
2157 2165 try:
2158 2166 r = len(self)
2159 2167 # opening the data file.
2160 2168 dsize = 0
2161 2169 if r:
2162 2170 dsize = self.end(r - 1)
2163 2171 dfh = None
2164 2172 if not self._inline:
2165 2173 try:
2166 2174 dfh = self._datafp(b"r+")
2167 2175 if self._docket is None:
2168 2176 dfh.seek(0, os.SEEK_END)
2169 2177 else:
2170 2178 dfh.seek(self._docket.data_end, os.SEEK_SET)
2171 2179 except FileNotFoundError:
2172 2180 dfh = self._datafp(b"w+")
2173 2181 transaction.add(self._datafile, dsize)
2174 2182 if self._sidedatafile is not None:
2175 2183 # revlog-v2 does not inline, help Pytype
2176 2184 assert dfh is not None
2177 2185 try:
2178 2186 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2179 2187 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2180 2188 except FileNotFoundError:
2181 2189 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2182 2190 transaction.add(
2183 2191 self._sidedatafile, self._docket.sidedata_end
2184 2192 )
2185 2193
2186 2194 # opening the index file.
2187 2195 isize = r * self.index.entry_size
2188 2196 ifh = self.__index_write_fp()
2189 2197 if self._inline:
2190 2198 transaction.add(self._indexfile, dsize + isize)
2191 2199 else:
2192 2200 transaction.add(self._indexfile, isize)
2193 2201 # exposing all file handle for writing.
2194 2202 self._writinghandles = (ifh, dfh, sdfh)
2195 2203 self._segmentfile.writing_handle = ifh if self._inline else dfh
2196 2204 self._segmentfile_sidedata.writing_handle = sdfh
2197 2205 yield
2198 2206 if self._docket is not None:
2199 2207 self._write_docket(transaction)
2200 2208 finally:
2201 2209 self._writinghandles = None
2202 2210 self._segmentfile.writing_handle = None
2203 2211 self._segmentfile_sidedata.writing_handle = None
2204 2212 if dfh is not None:
2205 2213 dfh.close()
2206 2214 if sdfh is not None:
2207 2215 sdfh.close()
2208 2216 # closing the index file last to avoid exposing referent to
2209 2217 # potential unflushed data content.
2210 2218 if ifh is not None:
2211 2219 ifh.close()
2212 2220
2213 2221 def _write_docket(self, transaction):
2214 2222 """write the current docket on disk
2215 2223
2216 2224 Exist as a method to help changelog to implement transaction logic
2217 2225
2218 2226 We could also imagine using the same transaction logic for all revlog
2219 2227 since docket are cheap."""
2220 2228 self._docket.write(transaction)
2221 2229
2222 2230 def addrevision(
2223 2231 self,
2224 2232 text,
2225 2233 transaction,
2226 2234 link,
2227 2235 p1,
2228 2236 p2,
2229 2237 cachedelta=None,
2230 2238 node=None,
2231 2239 flags=REVIDX_DEFAULT_FLAGS,
2232 2240 deltacomputer=None,
2233 2241 sidedata=None,
2234 2242 ):
2235 2243 """add a revision to the log
2236 2244
2237 2245 text - the revision data to add
2238 2246 transaction - the transaction object used for rollback
2239 2247 link - the linkrev data to add
2240 2248 p1, p2 - the parent nodeids of the revision
2241 2249 cachedelta - an optional precomputed delta
2242 2250 node - nodeid of revision; typically node is not specified, and it is
2243 2251 computed by default as hash(text, p1, p2), however subclasses might
2244 2252 use different hashing method (and override checkhash() in such case)
2245 2253 flags - the known flags to set on the revision
2246 2254 deltacomputer - an optional deltacomputer instance shared between
2247 2255 multiple calls
2248 2256 """
2249 2257 if link == nullrev:
2250 2258 raise error.RevlogError(
2251 2259 _(b"attempted to add linkrev -1 to %s") % self.display_id
2252 2260 )
2253 2261
2254 2262 if sidedata is None:
2255 2263 sidedata = {}
2256 2264 elif sidedata and not self.hassidedata:
2257 2265 raise error.ProgrammingError(
2258 2266 _(b"trying to add sidedata to a revlog who don't support them")
2259 2267 )
2260 2268
2261 2269 if flags:
2262 2270 node = node or self.hash(text, p1, p2)
2263 2271
2264 2272 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2265 2273
2266 2274 # If the flag processor modifies the revision data, ignore any provided
2267 2275 # cachedelta.
2268 2276 if rawtext != text:
2269 2277 cachedelta = None
2270 2278
2271 2279 if len(rawtext) > _maxentrysize:
2272 2280 raise error.RevlogError(
2273 2281 _(
2274 2282 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2275 2283 )
2276 2284 % (self.display_id, len(rawtext))
2277 2285 )
2278 2286
2279 2287 node = node or self.hash(rawtext, p1, p2)
2280 2288 rev = self.index.get_rev(node)
2281 2289 if rev is not None:
2282 2290 return rev
2283 2291
2284 2292 if validatehash:
2285 2293 self.checkhash(rawtext, node, p1=p1, p2=p2)
2286 2294
2287 2295 return self.addrawrevision(
2288 2296 rawtext,
2289 2297 transaction,
2290 2298 link,
2291 2299 p1,
2292 2300 p2,
2293 2301 node,
2294 2302 flags,
2295 2303 cachedelta=cachedelta,
2296 2304 deltacomputer=deltacomputer,
2297 2305 sidedata=sidedata,
2298 2306 )
2299 2307
2300 2308 def addrawrevision(
2301 2309 self,
2302 2310 rawtext,
2303 2311 transaction,
2304 2312 link,
2305 2313 p1,
2306 2314 p2,
2307 2315 node,
2308 2316 flags,
2309 2317 cachedelta=None,
2310 2318 deltacomputer=None,
2311 2319 sidedata=None,
2312 2320 ):
2313 2321 """add a raw revision with known flags, node and parents
2314 2322 useful when reusing a revision not stored in this revlog (ex: received
2315 2323 over wire, or read from an external bundle).
2316 2324 """
2317 2325 with self._writing(transaction):
2318 2326 return self._addrevision(
2319 2327 node,
2320 2328 rawtext,
2321 2329 transaction,
2322 2330 link,
2323 2331 p1,
2324 2332 p2,
2325 2333 flags,
2326 2334 cachedelta,
2327 2335 deltacomputer=deltacomputer,
2328 2336 sidedata=sidedata,
2329 2337 )
2330 2338
2331 2339 def compress(self, data):
2332 2340 """Generate a possibly-compressed representation of data."""
2333 2341 if not data:
2334 2342 return b'', data
2335 2343
2336 2344 compressed = self._compressor.compress(data)
2337 2345
2338 2346 if compressed:
2339 2347 # The revlog compressor added the header in the returned data.
2340 2348 return b'', compressed
2341 2349
2342 2350 if data[0:1] == b'\0':
2343 2351 return b'', data
2344 2352 return b'u', data
2345 2353
2346 2354 def decompress(self, data):
2347 2355 """Decompress a revlog chunk.
2348 2356
2349 2357 The chunk is expected to begin with a header identifying the
2350 2358 format type so it can be routed to an appropriate decompressor.
2351 2359 """
2352 2360 if not data:
2353 2361 return data
2354 2362
2355 2363 # Revlogs are read much more frequently than they are written and many
2356 2364 # chunks only take microseconds to decompress, so performance is
2357 2365 # important here.
2358 2366 #
2359 2367 # We can make a few assumptions about revlogs:
2360 2368 #
2361 2369 # 1) the majority of chunks will be compressed (as opposed to inline
2362 2370 # raw data).
2363 2371 # 2) decompressing *any* data will likely by at least 10x slower than
2364 2372 # returning raw inline data.
2365 2373 # 3) we want to prioritize common and officially supported compression
2366 2374 # engines
2367 2375 #
2368 2376 # It follows that we want to optimize for "decompress compressed data
2369 2377 # when encoded with common and officially supported compression engines"
2370 2378 # case over "raw data" and "data encoded by less common or non-official
2371 2379 # compression engines." That is why we have the inline lookup first
2372 2380 # followed by the compengines lookup.
2373 2381 #
2374 2382 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2375 2383 # compressed chunks. And this matters for changelog and manifest reads.
2376 2384 t = data[0:1]
2377 2385
2378 2386 if t == b'x':
2379 2387 try:
2380 2388 return _zlibdecompress(data)
2381 2389 except zlib.error as e:
2382 2390 raise error.RevlogError(
2383 2391 _(b'revlog decompress error: %s')
2384 2392 % stringutil.forcebytestr(e)
2385 2393 )
2386 2394 # '\0' is more common than 'u' so it goes first.
2387 2395 elif t == b'\0':
2388 2396 return data
2389 2397 elif t == b'u':
2390 2398 return util.buffer(data, 1)
2391 2399
2392 2400 compressor = self._get_decompressor(t)
2393 2401
2394 2402 return compressor.decompress(data)
2395 2403
2396 2404 def _addrevision(
2397 2405 self,
2398 2406 node,
2399 2407 rawtext,
2400 2408 transaction,
2401 2409 link,
2402 2410 p1,
2403 2411 p2,
2404 2412 flags,
2405 2413 cachedelta,
2406 2414 alwayscache=False,
2407 2415 deltacomputer=None,
2408 2416 sidedata=None,
2409 2417 ):
2410 2418 """internal function to add revisions to the log
2411 2419
2412 2420 see addrevision for argument descriptions.
2413 2421
2414 2422 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2415 2423
2416 2424 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2417 2425 be used.
2418 2426
2419 2427 invariants:
2420 2428 - rawtext is optional (can be None); if not set, cachedelta must be set.
2421 2429 if both are set, they must correspond to each other.
2422 2430 """
2423 2431 if node == self.nullid:
2424 2432 raise error.RevlogError(
2425 2433 _(b"%s: attempt to add null revision") % self.display_id
2426 2434 )
2427 2435 if (
2428 2436 node == self.nodeconstants.wdirid
2429 2437 or node in self.nodeconstants.wdirfilenodeids
2430 2438 ):
2431 2439 raise error.RevlogError(
2432 2440 _(b"%s: attempt to add wdir revision") % self.display_id
2433 2441 )
2434 2442 if self._writinghandles is None:
2435 2443 msg = b'adding revision outside `revlog._writing` context'
2436 2444 raise error.ProgrammingError(msg)
2437 2445
2438 2446 if self._inline:
2439 2447 fh = self._writinghandles[0]
2440 2448 else:
2441 2449 fh = self._writinghandles[1]
2442 2450
2443 2451 btext = [rawtext]
2444 2452
2445 2453 curr = len(self)
2446 2454 prev = curr - 1
2447 2455
2448 2456 offset = self._get_data_offset(prev)
2449 2457
2450 2458 if self._concurrencychecker:
2451 2459 ifh, dfh, sdfh = self._writinghandles
2452 2460 # XXX no checking for the sidedata file
2453 2461 if self._inline:
2454 2462 # offset is "as if" it were in the .d file, so we need to add on
2455 2463 # the size of the entry metadata.
2456 2464 self._concurrencychecker(
2457 2465 ifh, self._indexfile, offset + curr * self.index.entry_size
2458 2466 )
2459 2467 else:
2460 2468 # Entries in the .i are a consistent size.
2461 2469 self._concurrencychecker(
2462 2470 ifh, self._indexfile, curr * self.index.entry_size
2463 2471 )
2464 2472 self._concurrencychecker(dfh, self._datafile, offset)
2465 2473
2466 2474 p1r, p2r = self.rev(p1), self.rev(p2)
2467 2475
2468 2476 # full versions are inserted when the needed deltas
2469 2477 # become comparable to the uncompressed text
2470 2478 if rawtext is None:
2471 2479 # need rawtext size, before changed by flag processors, which is
2472 2480 # the non-raw size. use revlog explicitly to avoid filelog's extra
2473 2481 # logic that might remove metadata size.
2474 2482 textlen = mdiff.patchedsize(
2475 2483 revlog.size(self, cachedelta[0]), cachedelta[1]
2476 2484 )
2477 2485 else:
2478 2486 textlen = len(rawtext)
2479 2487
2480 2488 if deltacomputer is None:
2481 2489 write_debug = None
2482 2490 if self._debug_delta:
2483 2491 write_debug = transaction._report
2484 2492 deltacomputer = deltautil.deltacomputer(
2485 2493 self, write_debug=write_debug
2486 2494 )
2487 2495
2488 2496 if cachedelta is not None and len(cachedelta) == 2:
2489 2497 # If the cached delta has no information about how it should be
2490 2498 # reused, add the default reuse instruction according to the
2491 2499 # revlog's configuration.
2492 2500 if self._generaldelta and self._lazydeltabase:
2493 2501 delta_base_reuse = DELTA_BASE_REUSE_TRY
2494 2502 else:
2495 2503 delta_base_reuse = DELTA_BASE_REUSE_NO
2496 2504 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2497 2505
2498 2506 revinfo = revlogutils.revisioninfo(
2499 2507 node,
2500 2508 p1,
2501 2509 p2,
2502 2510 btext,
2503 2511 textlen,
2504 2512 cachedelta,
2505 2513 flags,
2506 2514 )
2507 2515
2508 2516 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2509 2517
2510 2518 compression_mode = COMP_MODE_INLINE
2511 2519 if self._docket is not None:
2512 2520 default_comp = self._docket.default_compression_header
2513 2521 r = deltautil.delta_compression(default_comp, deltainfo)
2514 2522 compression_mode, deltainfo = r
2515 2523
2516 2524 sidedata_compression_mode = COMP_MODE_INLINE
2517 2525 if sidedata and self.hassidedata:
2518 2526 sidedata_compression_mode = COMP_MODE_PLAIN
2519 2527 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2520 2528 sidedata_offset = self._docket.sidedata_end
2521 2529 h, comp_sidedata = self.compress(serialized_sidedata)
2522 2530 if (
2523 2531 h != b'u'
2524 2532 and comp_sidedata[0:1] != b'\0'
2525 2533 and len(comp_sidedata) < len(serialized_sidedata)
2526 2534 ):
2527 2535 assert not h
2528 2536 if (
2529 2537 comp_sidedata[0:1]
2530 2538 == self._docket.default_compression_header
2531 2539 ):
2532 2540 sidedata_compression_mode = COMP_MODE_DEFAULT
2533 2541 serialized_sidedata = comp_sidedata
2534 2542 else:
2535 2543 sidedata_compression_mode = COMP_MODE_INLINE
2536 2544 serialized_sidedata = comp_sidedata
2537 2545 else:
2538 2546 serialized_sidedata = b""
2539 2547 # Don't store the offset if the sidedata is empty, that way
2540 2548 # we can easily detect empty sidedata and they will be no different
2541 2549 # than ones we manually add.
2542 2550 sidedata_offset = 0
2543 2551
2544 2552 rank = RANK_UNKNOWN
2545 2553 if self._compute_rank:
2546 2554 if (p1r, p2r) == (nullrev, nullrev):
2547 2555 rank = 1
2548 2556 elif p1r != nullrev and p2r == nullrev:
2549 2557 rank = 1 + self.fast_rank(p1r)
2550 2558 elif p1r == nullrev and p2r != nullrev:
2551 2559 rank = 1 + self.fast_rank(p2r)
2552 2560 else: # merge node
2553 2561 if rustdagop is not None and self.index.rust_ext_compat:
2554 2562 rank = rustdagop.rank(self.index, p1r, p2r)
2555 2563 else:
2556 2564 pmin, pmax = sorted((p1r, p2r))
2557 2565 rank = 1 + self.fast_rank(pmax)
2558 2566 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2559 2567
2560 2568 e = revlogutils.entry(
2561 2569 flags=flags,
2562 2570 data_offset=offset,
2563 2571 data_compressed_length=deltainfo.deltalen,
2564 2572 data_uncompressed_length=textlen,
2565 2573 data_compression_mode=compression_mode,
2566 2574 data_delta_base=deltainfo.base,
2567 2575 link_rev=link,
2568 2576 parent_rev_1=p1r,
2569 2577 parent_rev_2=p2r,
2570 2578 node_id=node,
2571 2579 sidedata_offset=sidedata_offset,
2572 2580 sidedata_compressed_length=len(serialized_sidedata),
2573 2581 sidedata_compression_mode=sidedata_compression_mode,
2574 2582 rank=rank,
2575 2583 )
2576 2584
2577 2585 self.index.append(e)
2578 2586 entry = self.index.entry_binary(curr)
2579 2587 if curr == 0 and self._docket is None:
2580 2588 header = self._format_flags | self._format_version
2581 2589 header = self.index.pack_header(header)
2582 2590 entry = header + entry
2583 2591 self._writeentry(
2584 2592 transaction,
2585 2593 entry,
2586 2594 deltainfo.data,
2587 2595 link,
2588 2596 offset,
2589 2597 serialized_sidedata,
2590 2598 sidedata_offset,
2591 2599 )
2592 2600
2593 2601 rawtext = btext[0]
2594 2602
2595 2603 if alwayscache and rawtext is None:
2596 2604 rawtext = deltacomputer.buildtext(revinfo, fh)
2597 2605
2598 2606 if type(rawtext) == bytes: # only accept immutable objects
2599 2607 self._revisioncache = (node, curr, rawtext)
2600 2608 self._chainbasecache[curr] = deltainfo.chainbase
2601 2609 return curr
2602 2610
2603 2611 def _get_data_offset(self, prev):
2604 2612 """Returns the current offset in the (in-transaction) data file.
2605 2613 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2606 2614 file to store that information: since sidedata can be rewritten to the
2607 2615 end of the data file within a transaction, you can have cases where, for
2608 2616 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2609 2617 to `n - 1`'s sidedata being written after `n`'s data.
2610 2618
2611 2619 TODO cache this in a docket file before getting out of experimental."""
2612 2620 if self._docket is None:
2613 2621 return self.end(prev)
2614 2622 else:
2615 2623 return self._docket.data_end
2616 2624
2617 2625 def _writeentry(
2618 2626 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2619 2627 ):
2620 2628 # Files opened in a+ mode have inconsistent behavior on various
2621 2629 # platforms. Windows requires that a file positioning call be made
2622 2630 # when the file handle transitions between reads and writes. See
2623 2631 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2624 2632 # platforms, Python or the platform itself can be buggy. Some versions
2625 2633 # of Solaris have been observed to not append at the end of the file
2626 2634 # if the file was seeked to before the end. See issue4943 for more.
2627 2635 #
2628 2636 # We work around this issue by inserting a seek() before writing.
2629 2637 # Note: This is likely not necessary on Python 3. However, because
2630 2638 # the file handle is reused for reads and may be seeked there, we need
2631 2639 # to be careful before changing this.
2632 2640 if self._writinghandles is None:
2633 2641 msg = b'adding revision outside `revlog._writing` context'
2634 2642 raise error.ProgrammingError(msg)
2635 2643 ifh, dfh, sdfh = self._writinghandles
2636 2644 if self._docket is None:
2637 2645 ifh.seek(0, os.SEEK_END)
2638 2646 else:
2639 2647 ifh.seek(self._docket.index_end, os.SEEK_SET)
2640 2648 if dfh:
2641 2649 if self._docket is None:
2642 2650 dfh.seek(0, os.SEEK_END)
2643 2651 else:
2644 2652 dfh.seek(self._docket.data_end, os.SEEK_SET)
2645 2653 if sdfh:
2646 2654 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2647 2655
2648 2656 curr = len(self) - 1
2649 2657 if not self._inline:
2650 2658 transaction.add(self._datafile, offset)
2651 2659 if self._sidedatafile:
2652 2660 transaction.add(self._sidedatafile, sidedata_offset)
2653 2661 transaction.add(self._indexfile, curr * len(entry))
2654 2662 if data[0]:
2655 2663 dfh.write(data[0])
2656 2664 dfh.write(data[1])
2657 2665 if sidedata:
2658 2666 sdfh.write(sidedata)
2659 2667 ifh.write(entry)
2660 2668 else:
2661 2669 offset += curr * self.index.entry_size
2662 2670 transaction.add(self._indexfile, offset)
2663 2671 ifh.write(entry)
2664 2672 ifh.write(data[0])
2665 2673 ifh.write(data[1])
2666 2674 assert not sidedata
2667 2675 self._enforceinlinesize(transaction)
2668 2676 if self._docket is not None:
2669 2677 # revlog-v2 always has 3 writing handles, help Pytype
2670 2678 wh1 = self._writinghandles[0]
2671 2679 wh2 = self._writinghandles[1]
2672 2680 wh3 = self._writinghandles[2]
2673 2681 assert wh1 is not None
2674 2682 assert wh2 is not None
2675 2683 assert wh3 is not None
2676 2684 self._docket.index_end = wh1.tell()
2677 2685 self._docket.data_end = wh2.tell()
2678 2686 self._docket.sidedata_end = wh3.tell()
2679 2687
2680 2688 nodemaputil.setup_persistent_nodemap(transaction, self)
2681 2689
2682 2690 def addgroup(
2683 2691 self,
2684 2692 deltas,
2685 2693 linkmapper,
2686 2694 transaction,
2687 2695 alwayscache=False,
2688 2696 addrevisioncb=None,
2689 2697 duplicaterevisioncb=None,
2690 2698 debug_info=None,
2691 2699 delta_base_reuse_policy=None,
2692 2700 ):
2693 2701 """
2694 2702 add a delta group
2695 2703
2696 2704 given a set of deltas, add them to the revision log. the
2697 2705 first delta is against its parent, which should be in our
2698 2706 log, the rest are against the previous delta.
2699 2707
2700 2708 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 2709 this revlog and the node that was added.
2702 2710 """
2703 2711
2704 2712 if self._adding_group:
2705 2713 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706 2714
2707 2715 # read the default delta-base reuse policy from revlog config if the
2708 2716 # group did not specify one.
2709 2717 if delta_base_reuse_policy is None:
2710 2718 if self._generaldelta and self._lazydeltabase:
2711 2719 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2712 2720 else:
2713 2721 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2714 2722
2715 2723 self._adding_group = True
2716 2724 empty = True
2717 2725 try:
2718 2726 with self._writing(transaction):
2719 2727 write_debug = None
2720 2728 if self._debug_delta:
2721 2729 write_debug = transaction._report
2722 2730 deltacomputer = deltautil.deltacomputer(
2723 2731 self,
2724 2732 write_debug=write_debug,
2725 2733 debug_info=debug_info,
2726 2734 )
2727 2735 # loop through our set of deltas
2728 2736 for data in deltas:
2729 2737 (
2730 2738 node,
2731 2739 p1,
2732 2740 p2,
2733 2741 linknode,
2734 2742 deltabase,
2735 2743 delta,
2736 2744 flags,
2737 2745 sidedata,
2738 2746 ) = data
2739 2747 link = linkmapper(linknode)
2740 2748 flags = flags or REVIDX_DEFAULT_FLAGS
2741 2749
2742 2750 rev = self.index.get_rev(node)
2743 2751 if rev is not None:
2744 2752 # this can happen if two branches make the same change
2745 2753 self._nodeduplicatecallback(transaction, rev)
2746 2754 if duplicaterevisioncb:
2747 2755 duplicaterevisioncb(self, rev)
2748 2756 empty = False
2749 2757 continue
2750 2758
2751 2759 for p in (p1, p2):
2752 2760 if not self.index.has_node(p):
2753 2761 raise error.LookupError(
2754 2762 p, self.radix, _(b'unknown parent')
2755 2763 )
2756 2764
2757 2765 if not self.index.has_node(deltabase):
2758 2766 raise error.LookupError(
2759 2767 deltabase, self.display_id, _(b'unknown delta base')
2760 2768 )
2761 2769
2762 2770 baserev = self.rev(deltabase)
2763 2771
2764 2772 if baserev != nullrev and self.iscensored(baserev):
2765 2773 # if base is censored, delta must be full replacement in a
2766 2774 # single patch operation
2767 2775 hlen = struct.calcsize(b">lll")
2768 2776 oldlen = self.rawsize(baserev)
2769 2777 newlen = len(delta) - hlen
2770 2778 if delta[:hlen] != mdiff.replacediffheader(
2771 2779 oldlen, newlen
2772 2780 ):
2773 2781 raise error.CensoredBaseError(
2774 2782 self.display_id, self.node(baserev)
2775 2783 )
2776 2784
2777 2785 if not flags and self._peek_iscensored(baserev, delta):
2778 2786 flags |= REVIDX_ISCENSORED
2779 2787
2780 2788 # We assume consumers of addrevisioncb will want to retrieve
2781 2789 # the added revision, which will require a call to
2782 2790 # revision(). revision() will fast path if there is a cache
2783 2791 # hit. So, we tell _addrevision() to always cache in this case.
2784 2792 # We're only using addgroup() in the context of changegroup
2785 2793 # generation so the revision data can always be handled as raw
2786 2794 # by the flagprocessor.
2787 2795 rev = self._addrevision(
2788 2796 node,
2789 2797 None,
2790 2798 transaction,
2791 2799 link,
2792 2800 p1,
2793 2801 p2,
2794 2802 flags,
2795 2803 (baserev, delta, delta_base_reuse_policy),
2796 2804 alwayscache=alwayscache,
2797 2805 deltacomputer=deltacomputer,
2798 2806 sidedata=sidedata,
2799 2807 )
2800 2808
2801 2809 if addrevisioncb:
2802 2810 addrevisioncb(self, rev)
2803 2811 empty = False
2804 2812 finally:
2805 2813 self._adding_group = False
2806 2814 return not empty
2807 2815
2808 2816 def iscensored(self, rev):
2809 2817 """Check if a file revision is censored."""
2810 2818 if not self._censorable:
2811 2819 return False
2812 2820
2813 2821 return self.flags(rev) & REVIDX_ISCENSORED
2814 2822
2815 2823 def _peek_iscensored(self, baserev, delta):
2816 2824 """Quickly check if a delta produces a censored revision."""
2817 2825 if not self._censorable:
2818 2826 return False
2819 2827
2820 2828 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2821 2829
2822 2830 def getstrippoint(self, minlink):
2823 2831 """find the minimum rev that must be stripped to strip the linkrev
2824 2832
2825 2833 Returns a tuple containing the minimum rev and a set of all revs that
2826 2834 have linkrevs that will be broken by this strip.
2827 2835 """
2828 2836 return storageutil.resolvestripinfo(
2829 2837 minlink,
2830 2838 len(self) - 1,
2831 2839 self.headrevs(),
2832 2840 self.linkrev,
2833 2841 self.parentrevs,
2834 2842 )
2835 2843
2836 2844 def strip(self, minlink, transaction):
2837 2845 """truncate the revlog on the first revision with a linkrev >= minlink
2838 2846
2839 2847 This function is called when we're stripping revision minlink and
2840 2848 its descendants from the repository.
2841 2849
2842 2850 We have to remove all revisions with linkrev >= minlink, because
2843 2851 the equivalent changelog revisions will be renumbered after the
2844 2852 strip.
2845 2853
2846 2854 So we truncate the revlog on the first of these revisions, and
2847 2855 trust that the caller has saved the revisions that shouldn't be
2848 2856 removed and that it'll re-add them after this truncation.
2849 2857 """
2850 2858 if len(self) == 0:
2851 2859 return
2852 2860
2853 2861 rev, _ = self.getstrippoint(minlink)
2854 2862 if rev == len(self):
2855 2863 return
2856 2864
2857 2865 # first truncate the files on disk
2858 2866 data_end = self.start(rev)
2859 2867 if not self._inline:
2860 2868 transaction.add(self._datafile, data_end)
2861 2869 end = rev * self.index.entry_size
2862 2870 else:
2863 2871 end = data_end + (rev * self.index.entry_size)
2864 2872
2865 2873 if self._sidedatafile:
2866 2874 sidedata_end = self.sidedata_cut_off(rev)
2867 2875 transaction.add(self._sidedatafile, sidedata_end)
2868 2876
2869 2877 transaction.add(self._indexfile, end)
2870 2878 if self._docket is not None:
2871 2879 # XXX we could, leverage the docket while stripping. However it is
2872 2880 # not powerfull enough at the time of this comment
2873 2881 self._docket.index_end = end
2874 2882 self._docket.data_end = data_end
2875 2883 self._docket.sidedata_end = sidedata_end
2876 2884 self._docket.write(transaction, stripping=True)
2877 2885
2878 2886 # then reset internal state in memory to forget those revisions
2879 2887 self._revisioncache = None
2880 2888 self._chaininfocache = util.lrucachedict(500)
2881 2889 self._segmentfile.clear_cache()
2882 2890 self._segmentfile_sidedata.clear_cache()
2883 2891
2884 2892 del self.index[rev:-1]
2885 2893
2886 2894 def checksize(self):
2887 2895 """Check size of index and data files
2888 2896
2889 2897 return a (dd, di) tuple.
2890 2898 - dd: extra bytes for the "data" file
2891 2899 - di: extra bytes for the "index" file
2892 2900
2893 2901 A healthy revlog will return (0, 0).
2894 2902 """
2895 2903 expected = 0
2896 2904 if len(self):
2897 2905 expected = max(0, self.end(len(self) - 1))
2898 2906
2899 2907 try:
2900 2908 with self._datafp() as f:
2901 2909 f.seek(0, io.SEEK_END)
2902 2910 actual = f.tell()
2903 2911 dd = actual - expected
2904 2912 except FileNotFoundError:
2905 2913 dd = 0
2906 2914
2907 2915 try:
2908 2916 f = self.opener(self._indexfile)
2909 2917 f.seek(0, io.SEEK_END)
2910 2918 actual = f.tell()
2911 2919 f.close()
2912 2920 s = self.index.entry_size
2913 2921 i = max(0, actual // s)
2914 2922 di = actual - (i * s)
2915 2923 if self._inline:
2916 2924 databytes = 0
2917 2925 for r in self:
2918 2926 databytes += max(0, self.length(r))
2919 2927 dd = 0
2920 2928 di = actual - len(self) * s - databytes
2921 2929 except FileNotFoundError:
2922 2930 di = 0
2923 2931
2924 2932 return (dd, di)
2925 2933
2926 2934 def files(self):
2927 2935 res = [self._indexfile]
2928 2936 if self._docket_file is None:
2929 2937 if not self._inline:
2930 2938 res.append(self._datafile)
2931 2939 else:
2932 2940 res.append(self._docket_file)
2933 2941 res.extend(self._docket.old_index_filepaths(include_empty=False))
2934 2942 if self._docket.data_end:
2935 2943 res.append(self._datafile)
2936 2944 res.extend(self._docket.old_data_filepaths(include_empty=False))
2937 2945 if self._docket.sidedata_end:
2938 2946 res.append(self._sidedatafile)
2939 2947 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2940 2948 return res
2941 2949
2942 2950 def emitrevisions(
2943 2951 self,
2944 2952 nodes,
2945 2953 nodesorder=None,
2946 2954 revisiondata=False,
2947 2955 assumehaveparentrevisions=False,
2948 2956 deltamode=repository.CG_DELTAMODE_STD,
2949 2957 sidedata_helpers=None,
2950 2958 debug_info=None,
2951 2959 ):
2952 2960 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2953 2961 raise error.ProgrammingError(
2954 2962 b'unhandled value for nodesorder: %s' % nodesorder
2955 2963 )
2956 2964
2957 2965 if nodesorder is None and not self._generaldelta:
2958 2966 nodesorder = b'storage'
2959 2967
2960 2968 if (
2961 2969 not self._storedeltachains
2962 2970 and deltamode != repository.CG_DELTAMODE_PREV
2963 2971 ):
2964 2972 deltamode = repository.CG_DELTAMODE_FULL
2965 2973
2966 2974 return storageutil.emitrevisions(
2967 2975 self,
2968 2976 nodes,
2969 2977 nodesorder,
2970 2978 revlogrevisiondelta,
2971 2979 deltaparentfn=self.deltaparent,
2972 2980 candeltafn=self.candelta,
2973 2981 rawsizefn=self.rawsize,
2974 2982 revdifffn=self.revdiff,
2975 2983 flagsfn=self.flags,
2976 2984 deltamode=deltamode,
2977 2985 revisiondata=revisiondata,
2978 2986 assumehaveparentrevisions=assumehaveparentrevisions,
2979 2987 sidedata_helpers=sidedata_helpers,
2980 2988 debug_info=debug_info,
2981 2989 )
2982 2990
2983 2991 DELTAREUSEALWAYS = b'always'
2984 2992 DELTAREUSESAMEREVS = b'samerevs'
2985 2993 DELTAREUSENEVER = b'never'
2986 2994
2987 2995 DELTAREUSEFULLADD = b'fulladd'
2988 2996
2989 2997 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2990 2998
2991 2999 def clone(
2992 3000 self,
2993 3001 tr,
2994 3002 destrevlog,
2995 3003 addrevisioncb=None,
2996 3004 deltareuse=DELTAREUSESAMEREVS,
2997 3005 forcedeltabothparents=None,
2998 3006 sidedata_helpers=None,
2999 3007 ):
3000 3008 """Copy this revlog to another, possibly with format changes.
3001 3009
3002 3010 The destination revlog will contain the same revisions and nodes.
3003 3011 However, it may not be bit-for-bit identical due to e.g. delta encoding
3004 3012 differences.
3005 3013
3006 3014 The ``deltareuse`` argument control how deltas from the existing revlog
3007 3015 are preserved in the destination revlog. The argument can have the
3008 3016 following values:
3009 3017
3010 3018 DELTAREUSEALWAYS
3011 3019 Deltas will always be reused (if possible), even if the destination
3012 3020 revlog would not select the same revisions for the delta. This is the
3013 3021 fastest mode of operation.
3014 3022 DELTAREUSESAMEREVS
3015 3023 Deltas will be reused if the destination revlog would pick the same
3016 3024 revisions for the delta. This mode strikes a balance between speed
3017 3025 and optimization.
3018 3026 DELTAREUSENEVER
3019 3027 Deltas will never be reused. This is the slowest mode of execution.
3020 3028 This mode can be used to recompute deltas (e.g. if the diff/delta
3021 3029 algorithm changes).
3022 3030 DELTAREUSEFULLADD
3023 3031 Revision will be re-added as if their were new content. This is
3024 3032 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3025 3033 eg: large file detection and handling.
3026 3034
3027 3035 Delta computation can be slow, so the choice of delta reuse policy can
3028 3036 significantly affect run time.
3029 3037
3030 3038 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3031 3039 two extremes. Deltas will be reused if they are appropriate. But if the
3032 3040 delta could choose a better revision, it will do so. This means if you
3033 3041 are converting a non-generaldelta revlog to a generaldelta revlog,
3034 3042 deltas will be recomputed if the delta's parent isn't a parent of the
3035 3043 revision.
3036 3044
3037 3045 In addition to the delta policy, the ``forcedeltabothparents``
3038 3046 argument controls whether to force compute deltas against both parents
3039 3047 for merges. By default, the current default is used.
3040 3048
3041 3049 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3042 3050 `sidedata_helpers`.
3043 3051 """
3044 3052 if deltareuse not in self.DELTAREUSEALL:
3045 3053 raise ValueError(
3046 3054 _(b'value for deltareuse invalid: %s') % deltareuse
3047 3055 )
3048 3056
3049 3057 if len(destrevlog):
3050 3058 raise ValueError(_(b'destination revlog is not empty'))
3051 3059
3052 3060 if getattr(self, 'filteredrevs', None):
3053 3061 raise ValueError(_(b'source revlog has filtered revisions'))
3054 3062 if getattr(destrevlog, 'filteredrevs', None):
3055 3063 raise ValueError(_(b'destination revlog has filtered revisions'))
3056 3064
3057 3065 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3058 3066 # if possible.
3059 3067 oldlazydelta = destrevlog._lazydelta
3060 3068 oldlazydeltabase = destrevlog._lazydeltabase
3061 3069 oldamd = destrevlog._deltabothparents
3062 3070
3063 3071 try:
3064 3072 if deltareuse == self.DELTAREUSEALWAYS:
3065 3073 destrevlog._lazydeltabase = True
3066 3074 destrevlog._lazydelta = True
3067 3075 elif deltareuse == self.DELTAREUSESAMEREVS:
3068 3076 destrevlog._lazydeltabase = False
3069 3077 destrevlog._lazydelta = True
3070 3078 elif deltareuse == self.DELTAREUSENEVER:
3071 3079 destrevlog._lazydeltabase = False
3072 3080 destrevlog._lazydelta = False
3073 3081
3074 3082 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3075 3083
3076 3084 self._clone(
3077 3085 tr,
3078 3086 destrevlog,
3079 3087 addrevisioncb,
3080 3088 deltareuse,
3081 3089 forcedeltabothparents,
3082 3090 sidedata_helpers,
3083 3091 )
3084 3092
3085 3093 finally:
3086 3094 destrevlog._lazydelta = oldlazydelta
3087 3095 destrevlog._lazydeltabase = oldlazydeltabase
3088 3096 destrevlog._deltabothparents = oldamd
3089 3097
3090 3098 def _clone(
3091 3099 self,
3092 3100 tr,
3093 3101 destrevlog,
3094 3102 addrevisioncb,
3095 3103 deltareuse,
3096 3104 forcedeltabothparents,
3097 3105 sidedata_helpers,
3098 3106 ):
3099 3107 """perform the core duty of `revlog.clone` after parameter processing"""
3100 3108 write_debug = None
3101 3109 if self._debug_delta:
3102 3110 write_debug = tr._report
3103 3111 deltacomputer = deltautil.deltacomputer(
3104 3112 destrevlog,
3105 3113 write_debug=write_debug,
3106 3114 )
3107 3115 index = self.index
3108 3116 for rev in self:
3109 3117 entry = index[rev]
3110 3118
3111 3119 # Some classes override linkrev to take filtered revs into
3112 3120 # account. Use raw entry from index.
3113 3121 flags = entry[0] & 0xFFFF
3114 3122 linkrev = entry[4]
3115 3123 p1 = index[entry[5]][7]
3116 3124 p2 = index[entry[6]][7]
3117 3125 node = entry[7]
3118 3126
3119 3127 # (Possibly) reuse the delta from the revlog if allowed and
3120 3128 # the revlog chunk is a delta.
3121 3129 cachedelta = None
3122 3130 rawtext = None
3123 3131 if deltareuse == self.DELTAREUSEFULLADD:
3124 3132 text = self._revisiondata(rev)
3125 3133 sidedata = self.sidedata(rev)
3126 3134
3127 3135 if sidedata_helpers is not None:
3128 3136 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3129 3137 self, sidedata_helpers, sidedata, rev
3130 3138 )
3131 3139 flags = flags | new_flags[0] & ~new_flags[1]
3132 3140
3133 3141 destrevlog.addrevision(
3134 3142 text,
3135 3143 tr,
3136 3144 linkrev,
3137 3145 p1,
3138 3146 p2,
3139 3147 cachedelta=cachedelta,
3140 3148 node=node,
3141 3149 flags=flags,
3142 3150 deltacomputer=deltacomputer,
3143 3151 sidedata=sidedata,
3144 3152 )
3145 3153 else:
3146 3154 if destrevlog._lazydelta:
3147 3155 dp = self.deltaparent(rev)
3148 3156 if dp != nullrev:
3149 3157 cachedelta = (dp, bytes(self._chunk(rev)))
3150 3158
3151 3159 sidedata = None
3152 3160 if not cachedelta:
3153 3161 rawtext = self._revisiondata(rev)
3154 3162 sidedata = self.sidedata(rev)
3155 3163 if sidedata is None:
3156 3164 sidedata = self.sidedata(rev)
3157 3165
3158 3166 if sidedata_helpers is not None:
3159 3167 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3160 3168 self, sidedata_helpers, sidedata, rev
3161 3169 )
3162 3170 flags = flags | new_flags[0] & ~new_flags[1]
3163 3171
3164 3172 with destrevlog._writing(tr):
3165 3173 destrevlog._addrevision(
3166 3174 node,
3167 3175 rawtext,
3168 3176 tr,
3169 3177 linkrev,
3170 3178 p1,
3171 3179 p2,
3172 3180 flags,
3173 3181 cachedelta,
3174 3182 deltacomputer=deltacomputer,
3175 3183 sidedata=sidedata,
3176 3184 )
3177 3185
3178 3186 if addrevisioncb:
3179 3187 addrevisioncb(self, rev, node)
3180 3188
3181 3189 def censorrevision(self, tr, censornode, tombstone=b''):
3182 3190 if self._format_version == REVLOGV0:
3183 3191 raise error.RevlogError(
3184 3192 _(b'cannot censor with version %d revlogs')
3185 3193 % self._format_version
3186 3194 )
3187 3195 elif self._format_version == REVLOGV1:
3188 3196 rewrite.v1_censor(self, tr, censornode, tombstone)
3189 3197 else:
3190 3198 rewrite.v2_censor(self, tr, censornode, tombstone)
3191 3199
3192 3200 def verifyintegrity(self, state):
3193 3201 """Verifies the integrity of the revlog.
3194 3202
3195 3203 Yields ``revlogproblem`` instances describing problems that are
3196 3204 found.
3197 3205 """
3198 3206 dd, di = self.checksize()
3199 3207 if dd:
3200 3208 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3201 3209 if di:
3202 3210 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3203 3211
3204 3212 version = self._format_version
3205 3213
3206 3214 # The verifier tells us what version revlog we should be.
3207 3215 if version != state[b'expectedversion']:
3208 3216 yield revlogproblem(
3209 3217 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3210 3218 % (self.display_id, version, state[b'expectedversion'])
3211 3219 )
3212 3220
3213 3221 state[b'skipread'] = set()
3214 3222 state[b'safe_renamed'] = set()
3215 3223
3216 3224 for rev in self:
3217 3225 node = self.node(rev)
3218 3226
3219 3227 # Verify contents. 4 cases to care about:
3220 3228 #
3221 3229 # common: the most common case
3222 3230 # rename: with a rename
3223 3231 # meta: file content starts with b'\1\n', the metadata
3224 3232 # header defined in filelog.py, but without a rename
3225 3233 # ext: content stored externally
3226 3234 #
3227 3235 # More formally, their differences are shown below:
3228 3236 #
3229 3237 # | common | rename | meta | ext
3230 3238 # -------------------------------------------------------
3231 3239 # flags() | 0 | 0 | 0 | not 0
3232 3240 # renamed() | False | True | False | ?
3233 3241 # rawtext[0:2]=='\1\n'| False | True | True | ?
3234 3242 #
3235 3243 # "rawtext" means the raw text stored in revlog data, which
3236 3244 # could be retrieved by "rawdata(rev)". "text"
3237 3245 # mentioned below is "revision(rev)".
3238 3246 #
3239 3247 # There are 3 different lengths stored physically:
3240 3248 # 1. L1: rawsize, stored in revlog index
3241 3249 # 2. L2: len(rawtext), stored in revlog data
3242 3250 # 3. L3: len(text), stored in revlog data if flags==0, or
3243 3251 # possibly somewhere else if flags!=0
3244 3252 #
3245 3253 # L1 should be equal to L2. L3 could be different from them.
3246 3254 # "text" may or may not affect commit hash depending on flag
3247 3255 # processors (see flagutil.addflagprocessor).
3248 3256 #
3249 3257 # | common | rename | meta | ext
3250 3258 # -------------------------------------------------
3251 3259 # rawsize() | L1 | L1 | L1 | L1
3252 3260 # size() | L1 | L2-LM | L1(*) | L1 (?)
3253 3261 # len(rawtext) | L2 | L2 | L2 | L2
3254 3262 # len(text) | L2 | L2 | L2 | L3
3255 3263 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3256 3264 #
3257 3265 # LM: length of metadata, depending on rawtext
3258 3266 # (*): not ideal, see comment in filelog.size
3259 3267 # (?): could be "- len(meta)" if the resolved content has
3260 3268 # rename metadata
3261 3269 #
3262 3270 # Checks needed to be done:
3263 3271 # 1. length check: L1 == L2, in all cases.
3264 3272 # 2. hash check: depending on flag processor, we may need to
3265 3273 # use either "text" (external), or "rawtext" (in revlog).
3266 3274
3267 3275 try:
3268 3276 skipflags = state.get(b'skipflags', 0)
3269 3277 if skipflags:
3270 3278 skipflags &= self.flags(rev)
3271 3279
3272 3280 _verify_revision(self, skipflags, state, node)
3273 3281
3274 3282 l1 = self.rawsize(rev)
3275 3283 l2 = len(self.rawdata(node))
3276 3284
3277 3285 if l1 != l2:
3278 3286 yield revlogproblem(
3279 3287 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3280 3288 node=node,
3281 3289 )
3282 3290
3283 3291 except error.CensoredNodeError:
3284 3292 if state[b'erroroncensored']:
3285 3293 yield revlogproblem(
3286 3294 error=_(b'censored file data'), node=node
3287 3295 )
3288 3296 state[b'skipread'].add(node)
3289 3297 except Exception as e:
3290 3298 yield revlogproblem(
3291 3299 error=_(b'unpacking %s: %s')
3292 3300 % (short(node), stringutil.forcebytestr(e)),
3293 3301 node=node,
3294 3302 )
3295 3303 state[b'skipread'].add(node)
3296 3304
3297 3305 def storageinfo(
3298 3306 self,
3299 3307 exclusivefiles=False,
3300 3308 sharedfiles=False,
3301 3309 revisionscount=False,
3302 3310 trackedsize=False,
3303 3311 storedsize=False,
3304 3312 ):
3305 3313 d = {}
3306 3314
3307 3315 if exclusivefiles:
3308 3316 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3309 3317 if not self._inline:
3310 3318 d[b'exclusivefiles'].append((self.opener, self._datafile))
3311 3319
3312 3320 if sharedfiles:
3313 3321 d[b'sharedfiles'] = []
3314 3322
3315 3323 if revisionscount:
3316 3324 d[b'revisionscount'] = len(self)
3317 3325
3318 3326 if trackedsize:
3319 3327 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3320 3328
3321 3329 if storedsize:
3322 3330 d[b'storedsize'] = sum(
3323 3331 self.opener.stat(path).st_size for path in self.files()
3324 3332 )
3325 3333
3326 3334 return d
3327 3335
3328 3336 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3329 3337 if not self.hassidedata:
3330 3338 return
3331 3339 # revlog formats with sidedata support does not support inline
3332 3340 assert not self._inline
3333 3341 if not helpers[1] and not helpers[2]:
3334 3342 # Nothing to generate or remove
3335 3343 return
3336 3344
3337 3345 new_entries = []
3338 3346 # append the new sidedata
3339 3347 with self._writing(transaction):
3340 3348 ifh, dfh, sdfh = self._writinghandles
3341 3349 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3342 3350
3343 3351 current_offset = sdfh.tell()
3344 3352 for rev in range(startrev, endrev + 1):
3345 3353 entry = self.index[rev]
3346 3354 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3347 3355 store=self,
3348 3356 sidedata_helpers=helpers,
3349 3357 sidedata={},
3350 3358 rev=rev,
3351 3359 )
3352 3360
3353 3361 serialized_sidedata = sidedatautil.serialize_sidedata(
3354 3362 new_sidedata
3355 3363 )
3356 3364
3357 3365 sidedata_compression_mode = COMP_MODE_INLINE
3358 3366 if serialized_sidedata and self.hassidedata:
3359 3367 sidedata_compression_mode = COMP_MODE_PLAIN
3360 3368 h, comp_sidedata = self.compress(serialized_sidedata)
3361 3369 if (
3362 3370 h != b'u'
3363 3371 and comp_sidedata[0] != b'\0'
3364 3372 and len(comp_sidedata) < len(serialized_sidedata)
3365 3373 ):
3366 3374 assert not h
3367 3375 if (
3368 3376 comp_sidedata[0]
3369 3377 == self._docket.default_compression_header
3370 3378 ):
3371 3379 sidedata_compression_mode = COMP_MODE_DEFAULT
3372 3380 serialized_sidedata = comp_sidedata
3373 3381 else:
3374 3382 sidedata_compression_mode = COMP_MODE_INLINE
3375 3383 serialized_sidedata = comp_sidedata
3376 3384 if entry[8] != 0 or entry[9] != 0:
3377 3385 # rewriting entries that already have sidedata is not
3378 3386 # supported yet, because it introduces garbage data in the
3379 3387 # revlog.
3380 3388 msg = b"rewriting existing sidedata is not supported yet"
3381 3389 raise error.Abort(msg)
3382 3390
3383 3391 # Apply (potential) flags to add and to remove after running
3384 3392 # the sidedata helpers
3385 3393 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3386 3394 entry_update = (
3387 3395 current_offset,
3388 3396 len(serialized_sidedata),
3389 3397 new_offset_flags,
3390 3398 sidedata_compression_mode,
3391 3399 )
3392 3400
3393 3401 # the sidedata computation might have move the file cursors around
3394 3402 sdfh.seek(current_offset, os.SEEK_SET)
3395 3403 sdfh.write(serialized_sidedata)
3396 3404 new_entries.append(entry_update)
3397 3405 current_offset += len(serialized_sidedata)
3398 3406 self._docket.sidedata_end = sdfh.tell()
3399 3407
3400 3408 # rewrite the new index entries
3401 3409 ifh.seek(startrev * self.index.entry_size)
3402 3410 for i, e in enumerate(new_entries):
3403 3411 rev = startrev + i
3404 3412 self.index.replace_sidedata_info(rev, *e)
3405 3413 packed = self.index.entry_binary(rev)
3406 3414 if rev == 0 and self._docket is None:
3407 3415 header = self._format_flags | self._format_version
3408 3416 header = self.index.pack_header(header)
3409 3417 packed = header + packed
3410 3418 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now