##// END OF EJS Templates
revlog: fix misleading comment about _maxinline
Arseniy Alekseyev -
r50723:9854a9ad default
parent child Browse files
Show More
@@ -1,3385 +1,3385 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullrev,
29 29 sha1nodeconstants,
30 30 short,
31 31 wdirrev,
32 32 )
33 33 from .i18n import _
34 34 from .pycompat import getattr
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 # max size of revlog with inline data
131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class revlog:
245 245 """
246 246 the underlying revision storage object
247 247
248 248 A revlog consists of two parts, an index and the revision data.
249 249
250 250 The index is a file with a fixed record size containing
251 251 information on each revision, including its nodeid (hash), the
252 252 nodeids of its parents, the position and offset of its data within
253 253 the data file, and the revision it's based on. Finally, each entry
254 254 contains a linkrev entry that can serve as a pointer to external
255 255 data.
256 256
257 257 The revision data itself is a linear collection of data chunks.
258 258 Each chunk represents a revision and is usually represented as a
259 259 delta against the previous chunk. To bound lookup time, runs of
260 260 deltas are limited to about 2 times the length of the original
261 261 version data. This makes retrieval of a version proportional to
262 262 its size, or O(1) relative to the number of revisions.
263 263
264 264 Both pieces of the revlog are written to in an append-only
265 265 fashion, which means we never need to rewrite a file to insert or
266 266 remove data, and can use some simple techniques to avoid the need
267 267 for locking while reading.
268 268
269 269 If checkambig, indexfile is opened with checkambig=True at
270 270 writing, to avoid file stat ambiguity.
271 271
272 272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 273 index will be mmapped rather than read if it is larger than the
274 274 configured threshold.
275 275
276 276 If censorable is True, the revlog can have censored revisions.
277 277
278 278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 279 compression for the data content.
280 280
281 281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 282 file handle, a filename, and an expected position. It should check whether
283 283 the current position in the file handle is valid, and log/warn/fail (by
284 284 raising).
285 285
286 286 See mercurial/revlogutils/contants.py for details about the content of an
287 287 index entry.
288 288 """
289 289
290 290 _flagserrorclass = error.RevlogError
291 291
292 292 def __init__(
293 293 self,
294 294 opener,
295 295 target,
296 296 radix,
297 297 postfix=None, # only exist for `tmpcensored` now
298 298 checkambig=False,
299 299 mmaplargeindex=False,
300 300 censorable=False,
301 301 upperboundcomp=None,
302 302 persistentnodemap=False,
303 303 concurrencychecker=None,
304 304 trypending=False,
305 305 canonical_parent_order=True,
306 306 ):
307 307 """
308 308 create a revlog object
309 309
310 310 opener is a function that abstracts the file opening operation
311 311 and can be used to implement COW semantics or the like.
312 312
313 313 `target`: a (KIND, ID) tuple that identify the content stored in
314 314 this revlog. It help the rest of the code to understand what the revlog
315 315 is about without having to resort to heuristic and index filename
316 316 analysis. Note: that this must be reliably be set by normal code, but
317 317 that test, debug, or performance measurement code might not set this to
318 318 accurate value.
319 319 """
320 320 self.upperboundcomp = upperboundcomp
321 321
322 322 self.radix = radix
323 323
324 324 self._docket_file = None
325 325 self._indexfile = None
326 326 self._datafile = None
327 327 self._sidedatafile = None
328 328 self._nodemap_file = None
329 329 self.postfix = postfix
330 330 self._trypending = trypending
331 331 self.opener = opener
332 332 if persistentnodemap:
333 333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334 334
335 335 assert target[0] in ALL_KINDS
336 336 assert len(target) == 2
337 337 self.target = target
338 338 # When True, indexfile is opened with checkambig=True at writing, to
339 339 # avoid file stat ambiguity.
340 340 self._checkambig = checkambig
341 341 self._mmaplargeindex = mmaplargeindex
342 342 self._censorable = censorable
343 343 # 3-tuple of (node, rev, text) for a raw revision.
344 344 self._revisioncache = None
345 345 # Maps rev to chain base rev.
346 346 self._chainbasecache = util.lrucachedict(100)
347 347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 348 self._chunkcache = (0, b'')
349 349 # How much data to read and cache into the raw revlog data cache.
350 350 self._chunkcachesize = 65536
351 351 self._maxchainlen = None
352 352 self._deltabothparents = True
353 353 self._candidate_group_chunk_size = 0
354 354 self._debug_delta = False
355 355 self.index = None
356 356 self._docket = None
357 357 self._nodemap_docket = None
358 358 # Mapping of partial identifiers to full nodes.
359 359 self._pcache = {}
360 360 # Mapping of revision integer to full node.
361 361 self._compengine = b'zlib'
362 362 self._compengineopts = {}
363 363 self._maxdeltachainspan = -1
364 364 self._withsparseread = False
365 365 self._sparserevlog = False
366 366 self.hassidedata = False
367 367 self._srdensitythreshold = 0.50
368 368 self._srmingapsize = 262144
369 369
370 370 # other optionnals features
371 371
372 372 # might remove rank configuration once the computation has no impact
373 373 self._compute_rank = False
374 374
375 375 # Make copy of flag processors so each revlog instance can support
376 376 # custom flags.
377 377 self._flagprocessors = dict(flagutil.flagprocessors)
378 378
379 379 # 3-tuple of file handles being used for active writing.
380 380 self._writinghandles = None
381 381 # prevent nesting of addgroup
382 382 self._adding_group = None
383 383
384 384 self._loadindex()
385 385
386 386 self._concurrencychecker = concurrencychecker
387 387
388 388 # parent order is supposed to be semantically irrelevant, so we
389 389 # normally resort parents to ensure that the first parent is non-null,
390 390 # if there is a non-null parent at all.
391 391 # filelog abuses the parent order as flag to mark some instances of
392 392 # meta-encoded files, so allow it to disable this behavior.
393 393 self.canonical_parent_order = canonical_parent_order
394 394
395 395 def _init_opts(self):
396 396 """process options (from above/config) to setup associated default revlog mode
397 397
398 398 These values might be affected when actually reading on disk information.
399 399
400 400 The relevant values are returned for use in _loadindex().
401 401
402 402 * newversionflags:
403 403 version header to use if we need to create a new revlog
404 404
405 405 * mmapindexthreshold:
406 406 minimal index size for start to use mmap
407 407
408 408 * force_nodemap:
409 409 force the usage of a "development" version of the nodemap code
410 410 """
411 411 mmapindexthreshold = None
412 412 opts = self.opener.options
413 413
414 414 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
415 415 new_header = CHANGELOGV2
416 416 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
417 417 elif b'revlogv2' in opts:
418 418 new_header = REVLOGV2
419 419 elif b'revlogv1' in opts:
420 420 new_header = REVLOGV1 | FLAG_INLINE_DATA
421 421 if b'generaldelta' in opts:
422 422 new_header |= FLAG_GENERALDELTA
423 423 elif b'revlogv0' in self.opener.options:
424 424 new_header = REVLOGV0
425 425 else:
426 426 new_header = REVLOG_DEFAULT_VERSION
427 427
428 428 if b'chunkcachesize' in opts:
429 429 self._chunkcachesize = opts[b'chunkcachesize']
430 430 if b'maxchainlen' in opts:
431 431 self._maxchainlen = opts[b'maxchainlen']
432 432 if b'deltabothparents' in opts:
433 433 self._deltabothparents = opts[b'deltabothparents']
434 434 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
435 435 if dps_cgds:
436 436 self._candidate_group_chunk_size = dps_cgds
437 437 self._lazydelta = bool(opts.get(b'lazydelta', True))
438 438 self._lazydeltabase = False
439 439 if self._lazydelta:
440 440 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
441 441 if b'debug-delta' in opts:
442 442 self._debug_delta = opts[b'debug-delta']
443 443 if b'compengine' in opts:
444 444 self._compengine = opts[b'compengine']
445 445 if b'zlib.level' in opts:
446 446 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
447 447 if b'zstd.level' in opts:
448 448 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
449 449 if b'maxdeltachainspan' in opts:
450 450 self._maxdeltachainspan = opts[b'maxdeltachainspan']
451 451 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
452 452 mmapindexthreshold = opts[b'mmapindexthreshold']
453 453 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
454 454 withsparseread = bool(opts.get(b'with-sparse-read', False))
455 455 # sparse-revlog forces sparse-read
456 456 self._withsparseread = self._sparserevlog or withsparseread
457 457 if b'sparse-read-density-threshold' in opts:
458 458 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
459 459 if b'sparse-read-min-gap-size' in opts:
460 460 self._srmingapsize = opts[b'sparse-read-min-gap-size']
461 461 if opts.get(b'enableellipsis'):
462 462 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
463 463
464 464 # revlog v0 doesn't have flag processors
465 465 for flag, processor in opts.get(b'flagprocessors', {}).items():
466 466 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
467 467
468 468 if self._chunkcachesize <= 0:
469 469 raise error.RevlogError(
470 470 _(b'revlog chunk cache size %r is not greater than 0')
471 471 % self._chunkcachesize
472 472 )
473 473 elif self._chunkcachesize & (self._chunkcachesize - 1):
474 474 raise error.RevlogError(
475 475 _(b'revlog chunk cache size %r is not a power of 2')
476 476 % self._chunkcachesize
477 477 )
478 478 force_nodemap = opts.get(b'devel-force-nodemap', False)
479 479 return new_header, mmapindexthreshold, force_nodemap
480 480
481 481 def _get_data(self, filepath, mmap_threshold, size=None):
482 482 """return a file content with or without mmap
483 483
484 484 If the file is missing return the empty string"""
485 485 try:
486 486 with self.opener(filepath) as fp:
487 487 if mmap_threshold is not None:
488 488 file_size = self.opener.fstat(fp).st_size
489 489 if file_size >= mmap_threshold:
490 490 if size is not None:
491 491 # avoid potentiel mmap crash
492 492 size = min(file_size, size)
493 493 # TODO: should .close() to release resources without
494 494 # relying on Python GC
495 495 if size is None:
496 496 return util.buffer(util.mmapread(fp))
497 497 else:
498 498 return util.buffer(util.mmapread(fp, size))
499 499 if size is None:
500 500 return fp.read()
501 501 else:
502 502 return fp.read(size)
503 503 except FileNotFoundError:
504 504 return b''
505 505
506 506 def _loadindex(self, docket=None):
507 507
508 508 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
509 509
510 510 if self.postfix is not None:
511 511 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
512 512 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
513 513 entry_point = b'%s.i.a' % self.radix
514 514 else:
515 515 entry_point = b'%s.i' % self.radix
516 516
517 517 if docket is not None:
518 518 self._docket = docket
519 519 self._docket_file = entry_point
520 520 else:
521 521 self._initempty = True
522 522 entry_data = self._get_data(entry_point, mmapindexthreshold)
523 523 if len(entry_data) > 0:
524 524 header = INDEX_HEADER.unpack(entry_data[:4])[0]
525 525 self._initempty = False
526 526 else:
527 527 header = new_header
528 528
529 529 self._format_flags = header & ~0xFFFF
530 530 self._format_version = header & 0xFFFF
531 531
532 532 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
533 533 if supported_flags is None:
534 534 msg = _(b'unknown version (%d) in revlog %s')
535 535 msg %= (self._format_version, self.display_id)
536 536 raise error.RevlogError(msg)
537 537 elif self._format_flags & ~supported_flags:
538 538 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
539 539 display_flag = self._format_flags >> 16
540 540 msg %= (display_flag, self._format_version, self.display_id)
541 541 raise error.RevlogError(msg)
542 542
543 543 features = FEATURES_BY_VERSION[self._format_version]
544 544 self._inline = features[b'inline'](self._format_flags)
545 545 self._generaldelta = features[b'generaldelta'](self._format_flags)
546 546 self.hassidedata = features[b'sidedata']
547 547
548 548 if not features[b'docket']:
549 549 self._indexfile = entry_point
550 550 index_data = entry_data
551 551 else:
552 552 self._docket_file = entry_point
553 553 if self._initempty:
554 554 self._docket = docketutil.default_docket(self, header)
555 555 else:
556 556 self._docket = docketutil.parse_docket(
557 557 self, entry_data, use_pending=self._trypending
558 558 )
559 559
560 560 if self._docket is not None:
561 561 self._indexfile = self._docket.index_filepath()
562 562 index_data = b''
563 563 index_size = self._docket.index_end
564 564 if index_size > 0:
565 565 index_data = self._get_data(
566 566 self._indexfile, mmapindexthreshold, size=index_size
567 567 )
568 568 if len(index_data) < index_size:
569 569 msg = _(b'too few index data for %s: got %d, expected %d')
570 570 msg %= (self.display_id, len(index_data), index_size)
571 571 raise error.RevlogError(msg)
572 572
573 573 self._inline = False
574 574 # generaldelta implied by version 2 revlogs.
575 575 self._generaldelta = True
576 576 # the logic for persistent nodemap will be dealt with within the
577 577 # main docket, so disable it for now.
578 578 self._nodemap_file = None
579 579
580 580 if self._docket is not None:
581 581 self._datafile = self._docket.data_filepath()
582 582 self._sidedatafile = self._docket.sidedata_filepath()
583 583 elif self.postfix is None:
584 584 self._datafile = b'%s.d' % self.radix
585 585 else:
586 586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
587 587
588 588 self.nodeconstants = sha1nodeconstants
589 589 self.nullid = self.nodeconstants.nullid
590 590
591 591 # sparse-revlog can't be on without general-delta (issue6056)
592 592 if not self._generaldelta:
593 593 self._sparserevlog = False
594 594
595 595 self._storedeltachains = True
596 596
597 597 devel_nodemap = (
598 598 self._nodemap_file
599 599 and force_nodemap
600 600 and parse_index_v1_nodemap is not None
601 601 )
602 602
603 603 use_rust_index = False
604 604 if rustrevlog is not None:
605 605 if self._nodemap_file is not None:
606 606 use_rust_index = True
607 607 else:
608 608 use_rust_index = self.opener.options.get(b'rust.index')
609 609
610 610 self._parse_index = parse_index_v1
611 611 if self._format_version == REVLOGV0:
612 612 self._parse_index = revlogv0.parse_index_v0
613 613 elif self._format_version == REVLOGV2:
614 614 self._parse_index = parse_index_v2
615 615 elif self._format_version == CHANGELOGV2:
616 616 self._parse_index = parse_index_cl_v2
617 617 elif devel_nodemap:
618 618 self._parse_index = parse_index_v1_nodemap
619 619 elif use_rust_index:
620 620 self._parse_index = parse_index_v1_mixed
621 621 try:
622 622 d = self._parse_index(index_data, self._inline)
623 623 index, chunkcache = d
624 624 use_nodemap = (
625 625 not self._inline
626 626 and self._nodemap_file is not None
627 627 and util.safehasattr(index, 'update_nodemap_data')
628 628 )
629 629 if use_nodemap:
630 630 nodemap_data = nodemaputil.persisted_data(self)
631 631 if nodemap_data is not None:
632 632 docket = nodemap_data[0]
633 633 if (
634 634 len(d[0]) > docket.tip_rev
635 635 and d[0][docket.tip_rev][7] == docket.tip_node
636 636 ):
637 637 # no changelog tampering
638 638 self._nodemap_docket = docket
639 639 index.update_nodemap_data(*nodemap_data)
640 640 except (ValueError, IndexError):
641 641 raise error.RevlogError(
642 642 _(b"index %s is corrupted") % self.display_id
643 643 )
644 644 self.index = index
645 645 self._segmentfile = randomaccessfile.randomaccessfile(
646 646 self.opener,
647 647 (self._indexfile if self._inline else self._datafile),
648 648 self._chunkcachesize,
649 649 chunkcache,
650 650 )
651 651 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
652 652 self.opener,
653 653 self._sidedatafile,
654 654 self._chunkcachesize,
655 655 )
656 656 # revnum -> (chain-length, sum-delta-length)
657 657 self._chaininfocache = util.lrucachedict(500)
658 658 # revlog header -> revlog compressor
659 659 self._decompressors = {}
660 660
661 661 @util.propertycache
662 662 def revlog_kind(self):
663 663 return self.target[0]
664 664
665 665 @util.propertycache
666 666 def display_id(self):
667 667 """The public facing "ID" of the revlog that we use in message"""
668 668 if self.revlog_kind == KIND_FILELOG:
669 669 # Reference the file without the "data/" prefix, so it is familiar
670 670 # to the user.
671 671 return self.target[1]
672 672 else:
673 673 return self.radix
674 674
675 675 def _get_decompressor(self, t):
676 676 try:
677 677 compressor = self._decompressors[t]
678 678 except KeyError:
679 679 try:
680 680 engine = util.compengines.forrevlogheader(t)
681 681 compressor = engine.revlogcompressor(self._compengineopts)
682 682 self._decompressors[t] = compressor
683 683 except KeyError:
684 684 raise error.RevlogError(
685 685 _(b'unknown compression type %s') % binascii.hexlify(t)
686 686 )
687 687 return compressor
688 688
689 689 @util.propertycache
690 690 def _compressor(self):
691 691 engine = util.compengines[self._compengine]
692 692 return engine.revlogcompressor(self._compengineopts)
693 693
694 694 @util.propertycache
695 695 def _decompressor(self):
696 696 """the default decompressor"""
697 697 if self._docket is None:
698 698 return None
699 699 t = self._docket.default_compression_header
700 700 c = self._get_decompressor(t)
701 701 return c.decompress
702 702
703 703 def _indexfp(self):
704 704 """file object for the revlog's index file"""
705 705 return self.opener(self._indexfile, mode=b"r")
706 706
707 707 def __index_write_fp(self):
708 708 # You should not use this directly and use `_writing` instead
709 709 try:
710 710 f = self.opener(
711 711 self._indexfile, mode=b"r+", checkambig=self._checkambig
712 712 )
713 713 if self._docket is None:
714 714 f.seek(0, os.SEEK_END)
715 715 else:
716 716 f.seek(self._docket.index_end, os.SEEK_SET)
717 717 return f
718 718 except FileNotFoundError:
719 719 return self.opener(
720 720 self._indexfile, mode=b"w+", checkambig=self._checkambig
721 721 )
722 722
723 723 def __index_new_fp(self):
724 724 # You should not use this unless you are upgrading from inline revlog
725 725 return self.opener(
726 726 self._indexfile,
727 727 mode=b"w",
728 728 checkambig=self._checkambig,
729 729 atomictemp=True,
730 730 )
731 731
732 732 def _datafp(self, mode=b'r'):
733 733 """file object for the revlog's data file"""
734 734 return self.opener(self._datafile, mode=mode)
735 735
736 736 @contextlib.contextmanager
737 737 def _sidedatareadfp(self):
738 738 """file object suitable to read sidedata"""
739 739 if self._writinghandles:
740 740 yield self._writinghandles[2]
741 741 else:
742 742 with self.opener(self._sidedatafile) as fp:
743 743 yield fp
744 744
745 745 def tiprev(self):
746 746 return len(self.index) - 1
747 747
748 748 def tip(self):
749 749 return self.node(self.tiprev())
750 750
751 751 def __contains__(self, rev):
752 752 return 0 <= rev < len(self)
753 753
754 754 def __len__(self):
755 755 return len(self.index)
756 756
757 757 def __iter__(self):
758 758 return iter(range(len(self)))
759 759
760 760 def revs(self, start=0, stop=None):
761 761 """iterate over all rev in this revlog (from start to stop)"""
762 762 return storageutil.iterrevs(len(self), start=start, stop=stop)
763 763
764 764 def hasnode(self, node):
765 765 try:
766 766 self.rev(node)
767 767 return True
768 768 except KeyError:
769 769 return False
770 770
771 771 def candelta(self, baserev, rev):
772 772 """whether two revisions (baserev, rev) can be delta-ed or not"""
773 773 # Disable delta if either rev requires a content-changing flag
774 774 # processor (ex. LFS). This is because such flag processor can alter
775 775 # the rawtext content that the delta will be based on, and two clients
776 776 # could have a same revlog node with different flags (i.e. different
777 777 # rawtext contents) and the delta could be incompatible.
778 778 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
779 779 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
780 780 ):
781 781 return False
782 782 return True
783 783
784 784 def update_caches(self, transaction):
785 785 if self._nodemap_file is not None:
786 786 if transaction is None:
787 787 nodemaputil.update_persistent_nodemap(self)
788 788 else:
789 789 nodemaputil.setup_persistent_nodemap(transaction, self)
790 790
791 791 def clearcaches(self):
792 792 self._revisioncache = None
793 793 self._chainbasecache.clear()
794 794 self._segmentfile.clear_cache()
795 795 self._segmentfile_sidedata.clear_cache()
796 796 self._pcache = {}
797 797 self._nodemap_docket = None
798 798 self.index.clearcaches()
799 799 # The python code is the one responsible for validating the docket, we
800 800 # end up having to refresh it here.
801 801 use_nodemap = (
802 802 not self._inline
803 803 and self._nodemap_file is not None
804 804 and util.safehasattr(self.index, 'update_nodemap_data')
805 805 )
806 806 if use_nodemap:
807 807 nodemap_data = nodemaputil.persisted_data(self)
808 808 if nodemap_data is not None:
809 809 self._nodemap_docket = nodemap_data[0]
810 810 self.index.update_nodemap_data(*nodemap_data)
811 811
812 812 def rev(self, node):
813 813 try:
814 814 return self.index.rev(node)
815 815 except TypeError:
816 816 raise
817 817 except error.RevlogError:
818 818 # parsers.c radix tree lookup failed
819 819 if (
820 820 node == self.nodeconstants.wdirid
821 821 or node in self.nodeconstants.wdirfilenodeids
822 822 ):
823 823 raise error.WdirUnsupported
824 824 raise error.LookupError(node, self.display_id, _(b'no node'))
825 825
826 826 # Accessors for index entries.
827 827
828 828 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
829 829 # are flags.
830 830 def start(self, rev):
831 831 return int(self.index[rev][0] >> 16)
832 832
833 833 def sidedata_cut_off(self, rev):
834 834 sd_cut_off = self.index[rev][8]
835 835 if sd_cut_off != 0:
836 836 return sd_cut_off
837 837 # This is some annoying dance, because entries without sidedata
838 838 # currently use 0 as their ofsset. (instead of previous-offset +
839 839 # previous-size)
840 840 #
841 841 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
842 842 # In the meantime, we need this.
843 843 while 0 <= rev:
844 844 e = self.index[rev]
845 845 if e[9] != 0:
846 846 return e[8] + e[9]
847 847 rev -= 1
848 848 return 0
849 849
850 850 def flags(self, rev):
851 851 return self.index[rev][0] & 0xFFFF
852 852
853 853 def length(self, rev):
854 854 return self.index[rev][1]
855 855
856 856 def sidedata_length(self, rev):
857 857 if not self.hassidedata:
858 858 return 0
859 859 return self.index[rev][9]
860 860
861 861 def rawsize(self, rev):
862 862 """return the length of the uncompressed text for a given revision"""
863 863 l = self.index[rev][2]
864 864 if l >= 0:
865 865 return l
866 866
867 867 t = self.rawdata(rev)
868 868 return len(t)
869 869
870 870 def size(self, rev):
871 871 """length of non-raw text (processed by a "read" flag processor)"""
872 872 # fast path: if no "read" flag processor could change the content,
873 873 # size is rawsize. note: ELLIPSIS is known to not change the content.
874 874 flags = self.flags(rev)
875 875 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
876 876 return self.rawsize(rev)
877 877
878 878 return len(self.revision(rev))
879 879
880 880 def fast_rank(self, rev):
881 881 """Return the rank of a revision if already known, or None otherwise.
882 882
883 883 The rank of a revision is the size of the sub-graph it defines as a
884 884 head. Equivalently, the rank of a revision `r` is the size of the set
885 885 `ancestors(r)`, `r` included.
886 886
887 887 This method returns the rank retrieved from the revlog in constant
888 888 time. It makes no attempt at computing unknown values for versions of
889 889 the revlog which do not persist the rank.
890 890 """
891 891 rank = self.index[rev][ENTRY_RANK]
892 892 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
893 893 return None
894 894 if rev == nullrev:
895 895 return 0 # convention
896 896 return rank
897 897
898 898 def chainbase(self, rev):
899 899 base = self._chainbasecache.get(rev)
900 900 if base is not None:
901 901 return base
902 902
903 903 index = self.index
904 904 iterrev = rev
905 905 base = index[iterrev][3]
906 906 while base != iterrev:
907 907 iterrev = base
908 908 base = index[iterrev][3]
909 909
910 910 self._chainbasecache[rev] = base
911 911 return base
912 912
913 913 def linkrev(self, rev):
914 914 return self.index[rev][4]
915 915
916 916 def parentrevs(self, rev):
917 917 try:
918 918 entry = self.index[rev]
919 919 except IndexError:
920 920 if rev == wdirrev:
921 921 raise error.WdirUnsupported
922 922 raise
923 923
924 924 if self.canonical_parent_order and entry[5] == nullrev:
925 925 return entry[6], entry[5]
926 926 else:
927 927 return entry[5], entry[6]
928 928
929 929 # fast parentrevs(rev) where rev isn't filtered
930 930 _uncheckedparentrevs = parentrevs
931 931
932 932 def node(self, rev):
933 933 try:
934 934 return self.index[rev][7]
935 935 except IndexError:
936 936 if rev == wdirrev:
937 937 raise error.WdirUnsupported
938 938 raise
939 939
940 940 # Derived from index values.
941 941
942 942 def end(self, rev):
943 943 return self.start(rev) + self.length(rev)
944 944
945 945 def parents(self, node):
946 946 i = self.index
947 947 d = i[self.rev(node)]
948 948 # inline node() to avoid function call overhead
949 949 if self.canonical_parent_order and d[5] == self.nullid:
950 950 return i[d[6]][7], i[d[5]][7]
951 951 else:
952 952 return i[d[5]][7], i[d[6]][7]
953 953
954 954 def chainlen(self, rev):
955 955 return self._chaininfo(rev)[0]
956 956
957 957 def _chaininfo(self, rev):
958 958 chaininfocache = self._chaininfocache
959 959 if rev in chaininfocache:
960 960 return chaininfocache[rev]
961 961 index = self.index
962 962 generaldelta = self._generaldelta
963 963 iterrev = rev
964 964 e = index[iterrev]
965 965 clen = 0
966 966 compresseddeltalen = 0
967 967 while iterrev != e[3]:
968 968 clen += 1
969 969 compresseddeltalen += e[1]
970 970 if generaldelta:
971 971 iterrev = e[3]
972 972 else:
973 973 iterrev -= 1
974 974 if iterrev in chaininfocache:
975 975 t = chaininfocache[iterrev]
976 976 clen += t[0]
977 977 compresseddeltalen += t[1]
978 978 break
979 979 e = index[iterrev]
980 980 else:
981 981 # Add text length of base since decompressing that also takes
982 982 # work. For cache hits the length is already included.
983 983 compresseddeltalen += e[1]
984 984 r = (clen, compresseddeltalen)
985 985 chaininfocache[rev] = r
986 986 return r
987 987
988 988 def _deltachain(self, rev, stoprev=None):
989 989 """Obtain the delta chain for a revision.
990 990
991 991 ``stoprev`` specifies a revision to stop at. If not specified, we
992 992 stop at the base of the chain.
993 993
994 994 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
995 995 revs in ascending order and ``stopped`` is a bool indicating whether
996 996 ``stoprev`` was hit.
997 997 """
998 998 # Try C implementation.
999 999 try:
1000 1000 return self.index.deltachain(rev, stoprev, self._generaldelta)
1001 1001 except AttributeError:
1002 1002 pass
1003 1003
1004 1004 chain = []
1005 1005
1006 1006 # Alias to prevent attribute lookup in tight loop.
1007 1007 index = self.index
1008 1008 generaldelta = self._generaldelta
1009 1009
1010 1010 iterrev = rev
1011 1011 e = index[iterrev]
1012 1012 while iterrev != e[3] and iterrev != stoprev:
1013 1013 chain.append(iterrev)
1014 1014 if generaldelta:
1015 1015 iterrev = e[3]
1016 1016 else:
1017 1017 iterrev -= 1
1018 1018 e = index[iterrev]
1019 1019
1020 1020 if iterrev == stoprev:
1021 1021 stopped = True
1022 1022 else:
1023 1023 chain.append(iterrev)
1024 1024 stopped = False
1025 1025
1026 1026 chain.reverse()
1027 1027 return chain, stopped
1028 1028
1029 1029 def ancestors(self, revs, stoprev=0, inclusive=False):
1030 1030 """Generate the ancestors of 'revs' in reverse revision order.
1031 1031 Does not generate revs lower than stoprev.
1032 1032
1033 1033 See the documentation for ancestor.lazyancestors for more details."""
1034 1034
1035 1035 # first, make sure start revisions aren't filtered
1036 1036 revs = list(revs)
1037 1037 checkrev = self.node
1038 1038 for r in revs:
1039 1039 checkrev(r)
1040 1040 # and we're sure ancestors aren't filtered as well
1041 1041
1042 1042 if rustancestor is not None and self.index.rust_ext_compat:
1043 1043 lazyancestors = rustancestor.LazyAncestors
1044 1044 arg = self.index
1045 1045 else:
1046 1046 lazyancestors = ancestor.lazyancestors
1047 1047 arg = self._uncheckedparentrevs
1048 1048 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1049 1049
1050 1050 def descendants(self, revs):
1051 1051 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1052 1052
1053 1053 def findcommonmissing(self, common=None, heads=None):
1054 1054 """Return a tuple of the ancestors of common and the ancestors of heads
1055 1055 that are not ancestors of common. In revset terminology, we return the
1056 1056 tuple:
1057 1057
1058 1058 ::common, (::heads) - (::common)
1059 1059
1060 1060 The list is sorted by revision number, meaning it is
1061 1061 topologically sorted.
1062 1062
1063 1063 'heads' and 'common' are both lists of node IDs. If heads is
1064 1064 not supplied, uses all of the revlog's heads. If common is not
1065 1065 supplied, uses nullid."""
1066 1066 if common is None:
1067 1067 common = [self.nullid]
1068 1068 if heads is None:
1069 1069 heads = self.heads()
1070 1070
1071 1071 common = [self.rev(n) for n in common]
1072 1072 heads = [self.rev(n) for n in heads]
1073 1073
1074 1074 # we want the ancestors, but inclusive
1075 1075 class lazyset:
1076 1076 def __init__(self, lazyvalues):
1077 1077 self.addedvalues = set()
1078 1078 self.lazyvalues = lazyvalues
1079 1079
1080 1080 def __contains__(self, value):
1081 1081 return value in self.addedvalues or value in self.lazyvalues
1082 1082
1083 1083 def __iter__(self):
1084 1084 added = self.addedvalues
1085 1085 for r in added:
1086 1086 yield r
1087 1087 for r in self.lazyvalues:
1088 1088 if not r in added:
1089 1089 yield r
1090 1090
1091 1091 def add(self, value):
1092 1092 self.addedvalues.add(value)
1093 1093
1094 1094 def update(self, values):
1095 1095 self.addedvalues.update(values)
1096 1096
1097 1097 has = lazyset(self.ancestors(common))
1098 1098 has.add(nullrev)
1099 1099 has.update(common)
1100 1100
1101 1101 # take all ancestors from heads that aren't in has
1102 1102 missing = set()
1103 1103 visit = collections.deque(r for r in heads if r not in has)
1104 1104 while visit:
1105 1105 r = visit.popleft()
1106 1106 if r in missing:
1107 1107 continue
1108 1108 else:
1109 1109 missing.add(r)
1110 1110 for p in self.parentrevs(r):
1111 1111 if p not in has:
1112 1112 visit.append(p)
1113 1113 missing = list(missing)
1114 1114 missing.sort()
1115 1115 return has, [self.node(miss) for miss in missing]
1116 1116
1117 1117 def incrementalmissingrevs(self, common=None):
1118 1118 """Return an object that can be used to incrementally compute the
1119 1119 revision numbers of the ancestors of arbitrary sets that are not
1120 1120 ancestors of common. This is an ancestor.incrementalmissingancestors
1121 1121 object.
1122 1122
1123 1123 'common' is a list of revision numbers. If common is not supplied, uses
1124 1124 nullrev.
1125 1125 """
1126 1126 if common is None:
1127 1127 common = [nullrev]
1128 1128
1129 1129 if rustancestor is not None and self.index.rust_ext_compat:
1130 1130 return rustancestor.MissingAncestors(self.index, common)
1131 1131 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1132 1132
1133 1133 def findmissingrevs(self, common=None, heads=None):
1134 1134 """Return the revision numbers of the ancestors of heads that
1135 1135 are not ancestors of common.
1136 1136
1137 1137 More specifically, return a list of revision numbers corresponding to
1138 1138 nodes N such that every N satisfies the following constraints:
1139 1139
1140 1140 1. N is an ancestor of some node in 'heads'
1141 1141 2. N is not an ancestor of any node in 'common'
1142 1142
1143 1143 The list is sorted by revision number, meaning it is
1144 1144 topologically sorted.
1145 1145
1146 1146 'heads' and 'common' are both lists of revision numbers. If heads is
1147 1147 not supplied, uses all of the revlog's heads. If common is not
1148 1148 supplied, uses nullid."""
1149 1149 if common is None:
1150 1150 common = [nullrev]
1151 1151 if heads is None:
1152 1152 heads = self.headrevs()
1153 1153
1154 1154 inc = self.incrementalmissingrevs(common=common)
1155 1155 return inc.missingancestors(heads)
1156 1156
1157 1157 def findmissing(self, common=None, heads=None):
1158 1158 """Return the ancestors of heads that are not ancestors of common.
1159 1159
1160 1160 More specifically, return a list of nodes N such that every N
1161 1161 satisfies the following constraints:
1162 1162
1163 1163 1. N is an ancestor of some node in 'heads'
1164 1164 2. N is not an ancestor of any node in 'common'
1165 1165
1166 1166 The list is sorted by revision number, meaning it is
1167 1167 topologically sorted.
1168 1168
1169 1169 'heads' and 'common' are both lists of node IDs. If heads is
1170 1170 not supplied, uses all of the revlog's heads. If common is not
1171 1171 supplied, uses nullid."""
1172 1172 if common is None:
1173 1173 common = [self.nullid]
1174 1174 if heads is None:
1175 1175 heads = self.heads()
1176 1176
1177 1177 common = [self.rev(n) for n in common]
1178 1178 heads = [self.rev(n) for n in heads]
1179 1179
1180 1180 inc = self.incrementalmissingrevs(common=common)
1181 1181 return [self.node(r) for r in inc.missingancestors(heads)]
1182 1182
1183 1183 def nodesbetween(self, roots=None, heads=None):
1184 1184 """Return a topological path from 'roots' to 'heads'.
1185 1185
1186 1186 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1187 1187 topologically sorted list of all nodes N that satisfy both of
1188 1188 these constraints:
1189 1189
1190 1190 1. N is a descendant of some node in 'roots'
1191 1191 2. N is an ancestor of some node in 'heads'
1192 1192
1193 1193 Every node is considered to be both a descendant and an ancestor
1194 1194 of itself, so every reachable node in 'roots' and 'heads' will be
1195 1195 included in 'nodes'.
1196 1196
1197 1197 'outroots' is the list of reachable nodes in 'roots', i.e., the
1198 1198 subset of 'roots' that is returned in 'nodes'. Likewise,
1199 1199 'outheads' is the subset of 'heads' that is also in 'nodes'.
1200 1200
1201 1201 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1202 1202 unspecified, uses nullid as the only root. If 'heads' is
1203 1203 unspecified, uses list of all of the revlog's heads."""
1204 1204 nonodes = ([], [], [])
1205 1205 if roots is not None:
1206 1206 roots = list(roots)
1207 1207 if not roots:
1208 1208 return nonodes
1209 1209 lowestrev = min([self.rev(n) for n in roots])
1210 1210 else:
1211 1211 roots = [self.nullid] # Everybody's a descendant of nullid
1212 1212 lowestrev = nullrev
1213 1213 if (lowestrev == nullrev) and (heads is None):
1214 1214 # We want _all_ the nodes!
1215 1215 return (
1216 1216 [self.node(r) for r in self],
1217 1217 [self.nullid],
1218 1218 list(self.heads()),
1219 1219 )
1220 1220 if heads is None:
1221 1221 # All nodes are ancestors, so the latest ancestor is the last
1222 1222 # node.
1223 1223 highestrev = len(self) - 1
1224 1224 # Set ancestors to None to signal that every node is an ancestor.
1225 1225 ancestors = None
1226 1226 # Set heads to an empty dictionary for later discovery of heads
1227 1227 heads = {}
1228 1228 else:
1229 1229 heads = list(heads)
1230 1230 if not heads:
1231 1231 return nonodes
1232 1232 ancestors = set()
1233 1233 # Turn heads into a dictionary so we can remove 'fake' heads.
1234 1234 # Also, later we will be using it to filter out the heads we can't
1235 1235 # find from roots.
1236 1236 heads = dict.fromkeys(heads, False)
1237 1237 # Start at the top and keep marking parents until we're done.
1238 1238 nodestotag = set(heads)
1239 1239 # Remember where the top was so we can use it as a limit later.
1240 1240 highestrev = max([self.rev(n) for n in nodestotag])
1241 1241 while nodestotag:
1242 1242 # grab a node to tag
1243 1243 n = nodestotag.pop()
1244 1244 # Never tag nullid
1245 1245 if n == self.nullid:
1246 1246 continue
1247 1247 # A node's revision number represents its place in a
1248 1248 # topologically sorted list of nodes.
1249 1249 r = self.rev(n)
1250 1250 if r >= lowestrev:
1251 1251 if n not in ancestors:
1252 1252 # If we are possibly a descendant of one of the roots
1253 1253 # and we haven't already been marked as an ancestor
1254 1254 ancestors.add(n) # Mark as ancestor
1255 1255 # Add non-nullid parents to list of nodes to tag.
1256 1256 nodestotag.update(
1257 1257 [p for p in self.parents(n) if p != self.nullid]
1258 1258 )
1259 1259 elif n in heads: # We've seen it before, is it a fake head?
1260 1260 # So it is, real heads should not be the ancestors of
1261 1261 # any other heads.
1262 1262 heads.pop(n)
1263 1263 if not ancestors:
1264 1264 return nonodes
1265 1265 # Now that we have our set of ancestors, we want to remove any
1266 1266 # roots that are not ancestors.
1267 1267
1268 1268 # If one of the roots was nullid, everything is included anyway.
1269 1269 if lowestrev > nullrev:
1270 1270 # But, since we weren't, let's recompute the lowest rev to not
1271 1271 # include roots that aren't ancestors.
1272 1272
1273 1273 # Filter out roots that aren't ancestors of heads
1274 1274 roots = [root for root in roots if root in ancestors]
1275 1275 # Recompute the lowest revision
1276 1276 if roots:
1277 1277 lowestrev = min([self.rev(root) for root in roots])
1278 1278 else:
1279 1279 # No more roots? Return empty list
1280 1280 return nonodes
1281 1281 else:
1282 1282 # We are descending from nullid, and don't need to care about
1283 1283 # any other roots.
1284 1284 lowestrev = nullrev
1285 1285 roots = [self.nullid]
1286 1286 # Transform our roots list into a set.
1287 1287 descendants = set(roots)
1288 1288 # Also, keep the original roots so we can filter out roots that aren't
1289 1289 # 'real' roots (i.e. are descended from other roots).
1290 1290 roots = descendants.copy()
1291 1291 # Our topologically sorted list of output nodes.
1292 1292 orderedout = []
1293 1293 # Don't start at nullid since we don't want nullid in our output list,
1294 1294 # and if nullid shows up in descendants, empty parents will look like
1295 1295 # they're descendants.
1296 1296 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1297 1297 n = self.node(r)
1298 1298 isdescendant = False
1299 1299 if lowestrev == nullrev: # Everybody is a descendant of nullid
1300 1300 isdescendant = True
1301 1301 elif n in descendants:
1302 1302 # n is already a descendant
1303 1303 isdescendant = True
1304 1304 # This check only needs to be done here because all the roots
1305 1305 # will start being marked is descendants before the loop.
1306 1306 if n in roots:
1307 1307 # If n was a root, check if it's a 'real' root.
1308 1308 p = tuple(self.parents(n))
1309 1309 # If any of its parents are descendants, it's not a root.
1310 1310 if (p[0] in descendants) or (p[1] in descendants):
1311 1311 roots.remove(n)
1312 1312 else:
1313 1313 p = tuple(self.parents(n))
1314 1314 # A node is a descendant if either of its parents are
1315 1315 # descendants. (We seeded the dependents list with the roots
1316 1316 # up there, remember?)
1317 1317 if (p[0] in descendants) or (p[1] in descendants):
1318 1318 descendants.add(n)
1319 1319 isdescendant = True
1320 1320 if isdescendant and ((ancestors is None) or (n in ancestors)):
1321 1321 # Only include nodes that are both descendants and ancestors.
1322 1322 orderedout.append(n)
1323 1323 if (ancestors is not None) and (n in heads):
1324 1324 # We're trying to figure out which heads are reachable
1325 1325 # from roots.
1326 1326 # Mark this head as having been reached
1327 1327 heads[n] = True
1328 1328 elif ancestors is None:
1329 1329 # Otherwise, we're trying to discover the heads.
1330 1330 # Assume this is a head because if it isn't, the next step
1331 1331 # will eventually remove it.
1332 1332 heads[n] = True
1333 1333 # But, obviously its parents aren't.
1334 1334 for p in self.parents(n):
1335 1335 heads.pop(p, None)
1336 1336 heads = [head for head, flag in heads.items() if flag]
1337 1337 roots = list(roots)
1338 1338 assert orderedout
1339 1339 assert roots
1340 1340 assert heads
1341 1341 return (orderedout, roots, heads)
1342 1342
1343 1343 def headrevs(self, revs=None):
1344 1344 if revs is None:
1345 1345 try:
1346 1346 return self.index.headrevs()
1347 1347 except AttributeError:
1348 1348 return self._headrevs()
1349 1349 if rustdagop is not None and self.index.rust_ext_compat:
1350 1350 return rustdagop.headrevs(self.index, revs)
1351 1351 return dagop.headrevs(revs, self._uncheckedparentrevs)
1352 1352
1353 1353 def computephases(self, roots):
1354 1354 return self.index.computephasesmapsets(roots)
1355 1355
1356 1356 def _headrevs(self):
1357 1357 count = len(self)
1358 1358 if not count:
1359 1359 return [nullrev]
1360 1360 # we won't iter over filtered rev so nobody is a head at start
1361 1361 ishead = [0] * (count + 1)
1362 1362 index = self.index
1363 1363 for r in self:
1364 1364 ishead[r] = 1 # I may be an head
1365 1365 e = index[r]
1366 1366 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1367 1367 return [r for r, val in enumerate(ishead) if val]
1368 1368
1369 1369 def heads(self, start=None, stop=None):
1370 1370 """return the list of all nodes that have no children
1371 1371
1372 1372 if start is specified, only heads that are descendants of
1373 1373 start will be returned
1374 1374 if stop is specified, it will consider all the revs from stop
1375 1375 as if they had no children
1376 1376 """
1377 1377 if start is None and stop is None:
1378 1378 if not len(self):
1379 1379 return [self.nullid]
1380 1380 return [self.node(r) for r in self.headrevs()]
1381 1381
1382 1382 if start is None:
1383 1383 start = nullrev
1384 1384 else:
1385 1385 start = self.rev(start)
1386 1386
1387 1387 stoprevs = {self.rev(n) for n in stop or []}
1388 1388
1389 1389 revs = dagop.headrevssubset(
1390 1390 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1391 1391 )
1392 1392
1393 1393 return [self.node(rev) for rev in revs]
1394 1394
1395 1395 def children(self, node):
1396 1396 """find the children of a given node"""
1397 1397 c = []
1398 1398 p = self.rev(node)
1399 1399 for r in self.revs(start=p + 1):
1400 1400 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1401 1401 if prevs:
1402 1402 for pr in prevs:
1403 1403 if pr == p:
1404 1404 c.append(self.node(r))
1405 1405 elif p == nullrev:
1406 1406 c.append(self.node(r))
1407 1407 return c
1408 1408
1409 1409 def commonancestorsheads(self, a, b):
1410 1410 """calculate all the heads of the common ancestors of nodes a and b"""
1411 1411 a, b = self.rev(a), self.rev(b)
1412 1412 ancs = self._commonancestorsheads(a, b)
1413 1413 return pycompat.maplist(self.node, ancs)
1414 1414
1415 1415 def _commonancestorsheads(self, *revs):
1416 1416 """calculate all the heads of the common ancestors of revs"""
1417 1417 try:
1418 1418 ancs = self.index.commonancestorsheads(*revs)
1419 1419 except (AttributeError, OverflowError): # C implementation failed
1420 1420 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1421 1421 return ancs
1422 1422
1423 1423 def isancestor(self, a, b):
1424 1424 """return True if node a is an ancestor of node b
1425 1425
1426 1426 A revision is considered an ancestor of itself."""
1427 1427 a, b = self.rev(a), self.rev(b)
1428 1428 return self.isancestorrev(a, b)
1429 1429
1430 1430 def isancestorrev(self, a, b):
1431 1431 """return True if revision a is an ancestor of revision b
1432 1432
1433 1433 A revision is considered an ancestor of itself.
1434 1434
1435 1435 The implementation of this is trivial but the use of
1436 1436 reachableroots is not."""
1437 1437 if a == nullrev:
1438 1438 return True
1439 1439 elif a == b:
1440 1440 return True
1441 1441 elif a > b:
1442 1442 return False
1443 1443 return bool(self.reachableroots(a, [b], [a], includepath=False))
1444 1444
1445 1445 def reachableroots(self, minroot, heads, roots, includepath=False):
1446 1446 """return (heads(::(<roots> and <roots>::<heads>)))
1447 1447
1448 1448 If includepath is True, return (<roots>::<heads>)."""
1449 1449 try:
1450 1450 return self.index.reachableroots2(
1451 1451 minroot, heads, roots, includepath
1452 1452 )
1453 1453 except AttributeError:
1454 1454 return dagop._reachablerootspure(
1455 1455 self.parentrevs, minroot, roots, heads, includepath
1456 1456 )
1457 1457
1458 1458 def ancestor(self, a, b):
1459 1459 """calculate the "best" common ancestor of nodes a and b"""
1460 1460
1461 1461 a, b = self.rev(a), self.rev(b)
1462 1462 try:
1463 1463 ancs = self.index.ancestors(a, b)
1464 1464 except (AttributeError, OverflowError):
1465 1465 ancs = ancestor.ancestors(self.parentrevs, a, b)
1466 1466 if ancs:
1467 1467 # choose a consistent winner when there's a tie
1468 1468 return min(map(self.node, ancs))
1469 1469 return self.nullid
1470 1470
1471 1471 def _match(self, id):
1472 1472 if isinstance(id, int):
1473 1473 # rev
1474 1474 return self.node(id)
1475 1475 if len(id) == self.nodeconstants.nodelen:
1476 1476 # possibly a binary node
1477 1477 # odds of a binary node being all hex in ASCII are 1 in 10**25
1478 1478 try:
1479 1479 node = id
1480 1480 self.rev(node) # quick search the index
1481 1481 return node
1482 1482 except error.LookupError:
1483 1483 pass # may be partial hex id
1484 1484 try:
1485 1485 # str(rev)
1486 1486 rev = int(id)
1487 1487 if b"%d" % rev != id:
1488 1488 raise ValueError
1489 1489 if rev < 0:
1490 1490 rev = len(self) + rev
1491 1491 if rev < 0 or rev >= len(self):
1492 1492 raise ValueError
1493 1493 return self.node(rev)
1494 1494 except (ValueError, OverflowError):
1495 1495 pass
1496 1496 if len(id) == 2 * self.nodeconstants.nodelen:
1497 1497 try:
1498 1498 # a full hex nodeid?
1499 1499 node = bin(id)
1500 1500 self.rev(node)
1501 1501 return node
1502 1502 except (binascii.Error, error.LookupError):
1503 1503 pass
1504 1504
1505 1505 def _partialmatch(self, id):
1506 1506 # we don't care wdirfilenodeids as they should be always full hash
1507 1507 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1508 1508 ambiguous = False
1509 1509 try:
1510 1510 partial = self.index.partialmatch(id)
1511 1511 if partial and self.hasnode(partial):
1512 1512 if maybewdir:
1513 1513 # single 'ff...' match in radix tree, ambiguous with wdir
1514 1514 ambiguous = True
1515 1515 else:
1516 1516 return partial
1517 1517 elif maybewdir:
1518 1518 # no 'ff...' match in radix tree, wdir identified
1519 1519 raise error.WdirUnsupported
1520 1520 else:
1521 1521 return None
1522 1522 except error.RevlogError:
1523 1523 # parsers.c radix tree lookup gave multiple matches
1524 1524 # fast path: for unfiltered changelog, radix tree is accurate
1525 1525 if not getattr(self, 'filteredrevs', None):
1526 1526 ambiguous = True
1527 1527 # fall through to slow path that filters hidden revisions
1528 1528 except (AttributeError, ValueError):
1529 1529 # we are pure python, or key is not hex
1530 1530 pass
1531 1531 if ambiguous:
1532 1532 raise error.AmbiguousPrefixLookupError(
1533 1533 id, self.display_id, _(b'ambiguous identifier')
1534 1534 )
1535 1535
1536 1536 if id in self._pcache:
1537 1537 return self._pcache[id]
1538 1538
1539 1539 if len(id) <= 40:
1540 1540 # hex(node)[:...]
1541 1541 l = len(id) // 2 * 2 # grab an even number of digits
1542 1542 try:
1543 1543 # we're dropping the last digit, so let's check that it's hex,
1544 1544 # to avoid the expensive computation below if it's not
1545 1545 if len(id) % 2 > 0:
1546 1546 if not (id[-1] in hexdigits):
1547 1547 return None
1548 1548 prefix = bin(id[:l])
1549 1549 except binascii.Error:
1550 1550 pass
1551 1551 else:
1552 1552 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1553 1553 nl = [
1554 1554 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1555 1555 ]
1556 1556 if self.nodeconstants.nullhex.startswith(id):
1557 1557 nl.append(self.nullid)
1558 1558 if len(nl) > 0:
1559 1559 if len(nl) == 1 and not maybewdir:
1560 1560 self._pcache[id] = nl[0]
1561 1561 return nl[0]
1562 1562 raise error.AmbiguousPrefixLookupError(
1563 1563 id, self.display_id, _(b'ambiguous identifier')
1564 1564 )
1565 1565 if maybewdir:
1566 1566 raise error.WdirUnsupported
1567 1567 return None
1568 1568
1569 1569 def lookup(self, id):
1570 1570 """locate a node based on:
1571 1571 - revision number or str(revision number)
1572 1572 - nodeid or subset of hex nodeid
1573 1573 """
1574 1574 n = self._match(id)
1575 1575 if n is not None:
1576 1576 return n
1577 1577 n = self._partialmatch(id)
1578 1578 if n:
1579 1579 return n
1580 1580
1581 1581 raise error.LookupError(id, self.display_id, _(b'no match found'))
1582 1582
1583 1583 def shortest(self, node, minlength=1):
1584 1584 """Find the shortest unambiguous prefix that matches node."""
1585 1585
1586 1586 def isvalid(prefix):
1587 1587 try:
1588 1588 matchednode = self._partialmatch(prefix)
1589 1589 except error.AmbiguousPrefixLookupError:
1590 1590 return False
1591 1591 except error.WdirUnsupported:
1592 1592 # single 'ff...' match
1593 1593 return True
1594 1594 if matchednode is None:
1595 1595 raise error.LookupError(node, self.display_id, _(b'no node'))
1596 1596 return True
1597 1597
1598 1598 def maybewdir(prefix):
1599 1599 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1600 1600
1601 1601 hexnode = hex(node)
1602 1602
1603 1603 def disambiguate(hexnode, minlength):
1604 1604 """Disambiguate against wdirid."""
1605 1605 for length in range(minlength, len(hexnode) + 1):
1606 1606 prefix = hexnode[:length]
1607 1607 if not maybewdir(prefix):
1608 1608 return prefix
1609 1609
1610 1610 if not getattr(self, 'filteredrevs', None):
1611 1611 try:
1612 1612 length = max(self.index.shortest(node), minlength)
1613 1613 return disambiguate(hexnode, length)
1614 1614 except error.RevlogError:
1615 1615 if node != self.nodeconstants.wdirid:
1616 1616 raise error.LookupError(
1617 1617 node, self.display_id, _(b'no node')
1618 1618 )
1619 1619 except AttributeError:
1620 1620 # Fall through to pure code
1621 1621 pass
1622 1622
1623 1623 if node == self.nodeconstants.wdirid:
1624 1624 for length in range(minlength, len(hexnode) + 1):
1625 1625 prefix = hexnode[:length]
1626 1626 if isvalid(prefix):
1627 1627 return prefix
1628 1628
1629 1629 for length in range(minlength, len(hexnode) + 1):
1630 1630 prefix = hexnode[:length]
1631 1631 if isvalid(prefix):
1632 1632 return disambiguate(hexnode, length)
1633 1633
1634 1634 def cmp(self, node, text):
1635 1635 """compare text with a given file revision
1636 1636
1637 1637 returns True if text is different than what is stored.
1638 1638 """
1639 1639 p1, p2 = self.parents(node)
1640 1640 return storageutil.hashrevisionsha1(text, p1, p2) != node
1641 1641
1642 1642 def _getsegmentforrevs(self, startrev, endrev, df=None):
1643 1643 """Obtain a segment of raw data corresponding to a range of revisions.
1644 1644
1645 1645 Accepts the start and end revisions and an optional already-open
1646 1646 file handle to be used for reading. If the file handle is read, its
1647 1647 seek position will not be preserved.
1648 1648
1649 1649 Requests for data may be satisfied by a cache.
1650 1650
1651 1651 Returns a 2-tuple of (offset, data) for the requested range of
1652 1652 revisions. Offset is the integer offset from the beginning of the
1653 1653 revlog and data is a str or buffer of the raw byte data.
1654 1654
1655 1655 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1656 1656 to determine where each revision's data begins and ends.
1657 1657 """
1658 1658 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1659 1659 # (functions are expensive).
1660 1660 index = self.index
1661 1661 istart = index[startrev]
1662 1662 start = int(istart[0] >> 16)
1663 1663 if startrev == endrev:
1664 1664 end = start + istart[1]
1665 1665 else:
1666 1666 iend = index[endrev]
1667 1667 end = int(iend[0] >> 16) + iend[1]
1668 1668
1669 1669 if self._inline:
1670 1670 start += (startrev + 1) * self.index.entry_size
1671 1671 end += (endrev + 1) * self.index.entry_size
1672 1672 length = end - start
1673 1673
1674 1674 return start, self._segmentfile.read_chunk(start, length, df)
1675 1675
1676 1676 def _chunk(self, rev, df=None):
1677 1677 """Obtain a single decompressed chunk for a revision.
1678 1678
1679 1679 Accepts an integer revision and an optional already-open file handle
1680 1680 to be used for reading. If used, the seek position of the file will not
1681 1681 be preserved.
1682 1682
1683 1683 Returns a str holding uncompressed data for the requested revision.
1684 1684 """
1685 1685 compression_mode = self.index[rev][10]
1686 1686 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1687 1687 if compression_mode == COMP_MODE_PLAIN:
1688 1688 return data
1689 1689 elif compression_mode == COMP_MODE_DEFAULT:
1690 1690 return self._decompressor(data)
1691 1691 elif compression_mode == COMP_MODE_INLINE:
1692 1692 return self.decompress(data)
1693 1693 else:
1694 1694 msg = b'unknown compression mode %d'
1695 1695 msg %= compression_mode
1696 1696 raise error.RevlogError(msg)
1697 1697
1698 1698 def _chunks(self, revs, df=None, targetsize=None):
1699 1699 """Obtain decompressed chunks for the specified revisions.
1700 1700
1701 1701 Accepts an iterable of numeric revisions that are assumed to be in
1702 1702 ascending order. Also accepts an optional already-open file handle
1703 1703 to be used for reading. If used, the seek position of the file will
1704 1704 not be preserved.
1705 1705
1706 1706 This function is similar to calling ``self._chunk()`` multiple times,
1707 1707 but is faster.
1708 1708
1709 1709 Returns a list with decompressed data for each requested revision.
1710 1710 """
1711 1711 if not revs:
1712 1712 return []
1713 1713 start = self.start
1714 1714 length = self.length
1715 1715 inline = self._inline
1716 1716 iosize = self.index.entry_size
1717 1717 buffer = util.buffer
1718 1718
1719 1719 l = []
1720 1720 ladd = l.append
1721 1721
1722 1722 if not self._withsparseread:
1723 1723 slicedchunks = (revs,)
1724 1724 else:
1725 1725 slicedchunks = deltautil.slicechunk(
1726 1726 self, revs, targetsize=targetsize
1727 1727 )
1728 1728
1729 1729 for revschunk in slicedchunks:
1730 1730 firstrev = revschunk[0]
1731 1731 # Skip trailing revisions with empty diff
1732 1732 for lastrev in revschunk[::-1]:
1733 1733 if length(lastrev) != 0:
1734 1734 break
1735 1735
1736 1736 try:
1737 1737 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1738 1738 except OverflowError:
1739 1739 # issue4215 - we can't cache a run of chunks greater than
1740 1740 # 2G on Windows
1741 1741 return [self._chunk(rev, df=df) for rev in revschunk]
1742 1742
1743 1743 decomp = self.decompress
1744 1744 # self._decompressor might be None, but will not be used in that case
1745 1745 def_decomp = self._decompressor
1746 1746 for rev in revschunk:
1747 1747 chunkstart = start(rev)
1748 1748 if inline:
1749 1749 chunkstart += (rev + 1) * iosize
1750 1750 chunklength = length(rev)
1751 1751 comp_mode = self.index[rev][10]
1752 1752 c = buffer(data, chunkstart - offset, chunklength)
1753 1753 if comp_mode == COMP_MODE_PLAIN:
1754 1754 ladd(c)
1755 1755 elif comp_mode == COMP_MODE_INLINE:
1756 1756 ladd(decomp(c))
1757 1757 elif comp_mode == COMP_MODE_DEFAULT:
1758 1758 ladd(def_decomp(c))
1759 1759 else:
1760 1760 msg = b'unknown compression mode %d'
1761 1761 msg %= comp_mode
1762 1762 raise error.RevlogError(msg)
1763 1763
1764 1764 return l
1765 1765
1766 1766 def deltaparent(self, rev):
1767 1767 """return deltaparent of the given revision"""
1768 1768 base = self.index[rev][3]
1769 1769 if base == rev:
1770 1770 return nullrev
1771 1771 elif self._generaldelta:
1772 1772 return base
1773 1773 else:
1774 1774 return rev - 1
1775 1775
1776 1776 def issnapshot(self, rev):
1777 1777 """tells whether rev is a snapshot"""
1778 1778 if not self._sparserevlog:
1779 1779 return self.deltaparent(rev) == nullrev
1780 1780 elif util.safehasattr(self.index, b'issnapshot'):
1781 1781 # directly assign the method to cache the testing and access
1782 1782 self.issnapshot = self.index.issnapshot
1783 1783 return self.issnapshot(rev)
1784 1784 if rev == nullrev:
1785 1785 return True
1786 1786 entry = self.index[rev]
1787 1787 base = entry[3]
1788 1788 if base == rev:
1789 1789 return True
1790 1790 if base == nullrev:
1791 1791 return True
1792 1792 p1 = entry[5]
1793 1793 while self.length(p1) == 0:
1794 1794 b = self.deltaparent(p1)
1795 1795 if b == p1:
1796 1796 break
1797 1797 p1 = b
1798 1798 p2 = entry[6]
1799 1799 while self.length(p2) == 0:
1800 1800 b = self.deltaparent(p2)
1801 1801 if b == p2:
1802 1802 break
1803 1803 p2 = b
1804 1804 if base == p1 or base == p2:
1805 1805 return False
1806 1806 return self.issnapshot(base)
1807 1807
1808 1808 def snapshotdepth(self, rev):
1809 1809 """number of snapshot in the chain before this one"""
1810 1810 if not self.issnapshot(rev):
1811 1811 raise error.ProgrammingError(b'revision %d not a snapshot')
1812 1812 return len(self._deltachain(rev)[0]) - 1
1813 1813
1814 1814 def revdiff(self, rev1, rev2):
1815 1815 """return or calculate a delta between two revisions
1816 1816
1817 1817 The delta calculated is in binary form and is intended to be written to
1818 1818 revlog data directly. So this function needs raw revision data.
1819 1819 """
1820 1820 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1821 1821 return bytes(self._chunk(rev2))
1822 1822
1823 1823 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1824 1824
1825 1825 def revision(self, nodeorrev, _df=None):
1826 1826 """return an uncompressed revision of a given node or revision
1827 1827 number.
1828 1828
1829 1829 _df - an existing file handle to read from. (internal-only)
1830 1830 """
1831 1831 return self._revisiondata(nodeorrev, _df)
1832 1832
1833 1833 def sidedata(self, nodeorrev, _df=None):
1834 1834 """a map of extra data related to the changeset but not part of the hash
1835 1835
1836 1836 This function currently return a dictionary. However, more advanced
1837 1837 mapping object will likely be used in the future for a more
1838 1838 efficient/lazy code.
1839 1839 """
1840 1840 # deal with <nodeorrev> argument type
1841 1841 if isinstance(nodeorrev, int):
1842 1842 rev = nodeorrev
1843 1843 else:
1844 1844 rev = self.rev(nodeorrev)
1845 1845 return self._sidedata(rev)
1846 1846
1847 1847 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1848 1848 # deal with <nodeorrev> argument type
1849 1849 if isinstance(nodeorrev, int):
1850 1850 rev = nodeorrev
1851 1851 node = self.node(rev)
1852 1852 else:
1853 1853 node = nodeorrev
1854 1854 rev = None
1855 1855
1856 1856 # fast path the special `nullid` rev
1857 1857 if node == self.nullid:
1858 1858 return b""
1859 1859
1860 1860 # ``rawtext`` is the text as stored inside the revlog. Might be the
1861 1861 # revision or might need to be processed to retrieve the revision.
1862 1862 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1863 1863
1864 1864 if raw and validated:
1865 1865 # if we don't want to process the raw text and that raw
1866 1866 # text is cached, we can exit early.
1867 1867 return rawtext
1868 1868 if rev is None:
1869 1869 rev = self.rev(node)
1870 1870 # the revlog's flag for this revision
1871 1871 # (usually alter its state or content)
1872 1872 flags = self.flags(rev)
1873 1873
1874 1874 if validated and flags == REVIDX_DEFAULT_FLAGS:
1875 1875 # no extra flags set, no flag processor runs, text = rawtext
1876 1876 return rawtext
1877 1877
1878 1878 if raw:
1879 1879 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1880 1880 text = rawtext
1881 1881 else:
1882 1882 r = flagutil.processflagsread(self, rawtext, flags)
1883 1883 text, validatehash = r
1884 1884 if validatehash:
1885 1885 self.checkhash(text, node, rev=rev)
1886 1886 if not validated:
1887 1887 self._revisioncache = (node, rev, rawtext)
1888 1888
1889 1889 return text
1890 1890
1891 1891 def _rawtext(self, node, rev, _df=None):
1892 1892 """return the possibly unvalidated rawtext for a revision
1893 1893
1894 1894 returns (rev, rawtext, validated)
1895 1895 """
1896 1896
1897 1897 # revision in the cache (could be useful to apply delta)
1898 1898 cachedrev = None
1899 1899 # An intermediate text to apply deltas to
1900 1900 basetext = None
1901 1901
1902 1902 # Check if we have the entry in cache
1903 1903 # The cache entry looks like (node, rev, rawtext)
1904 1904 if self._revisioncache:
1905 1905 if self._revisioncache[0] == node:
1906 1906 return (rev, self._revisioncache[2], True)
1907 1907 cachedrev = self._revisioncache[1]
1908 1908
1909 1909 if rev is None:
1910 1910 rev = self.rev(node)
1911 1911
1912 1912 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1913 1913 if stopped:
1914 1914 basetext = self._revisioncache[2]
1915 1915
1916 1916 # drop cache to save memory, the caller is expected to
1917 1917 # update self._revisioncache after validating the text
1918 1918 self._revisioncache = None
1919 1919
1920 1920 targetsize = None
1921 1921 rawsize = self.index[rev][2]
1922 1922 if 0 <= rawsize:
1923 1923 targetsize = 4 * rawsize
1924 1924
1925 1925 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1926 1926 if basetext is None:
1927 1927 basetext = bytes(bins[0])
1928 1928 bins = bins[1:]
1929 1929
1930 1930 rawtext = mdiff.patches(basetext, bins)
1931 1931 del basetext # let us have a chance to free memory early
1932 1932 return (rev, rawtext, False)
1933 1933
1934 1934 def _sidedata(self, rev):
1935 1935 """Return the sidedata for a given revision number."""
1936 1936 index_entry = self.index[rev]
1937 1937 sidedata_offset = index_entry[8]
1938 1938 sidedata_size = index_entry[9]
1939 1939
1940 1940 if self._inline:
1941 1941 sidedata_offset += self.index.entry_size * (1 + rev)
1942 1942 if sidedata_size == 0:
1943 1943 return {}
1944 1944
1945 1945 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1946 1946 filename = self._sidedatafile
1947 1947 end = self._docket.sidedata_end
1948 1948 offset = sidedata_offset
1949 1949 length = sidedata_size
1950 1950 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1951 1951 raise error.RevlogError(m)
1952 1952
1953 1953 comp_segment = self._segmentfile_sidedata.read_chunk(
1954 1954 sidedata_offset, sidedata_size
1955 1955 )
1956 1956
1957 1957 comp = self.index[rev][11]
1958 1958 if comp == COMP_MODE_PLAIN:
1959 1959 segment = comp_segment
1960 1960 elif comp == COMP_MODE_DEFAULT:
1961 1961 segment = self._decompressor(comp_segment)
1962 1962 elif comp == COMP_MODE_INLINE:
1963 1963 segment = self.decompress(comp_segment)
1964 1964 else:
1965 1965 msg = b'unknown compression mode %d'
1966 1966 msg %= comp
1967 1967 raise error.RevlogError(msg)
1968 1968
1969 1969 sidedata = sidedatautil.deserialize_sidedata(segment)
1970 1970 return sidedata
1971 1971
1972 1972 def rawdata(self, nodeorrev, _df=None):
1973 1973 """return an uncompressed raw data of a given node or revision number.
1974 1974
1975 1975 _df - an existing file handle to read from. (internal-only)
1976 1976 """
1977 1977 return self._revisiondata(nodeorrev, _df, raw=True)
1978 1978
1979 1979 def hash(self, text, p1, p2):
1980 1980 """Compute a node hash.
1981 1981
1982 1982 Available as a function so that subclasses can replace the hash
1983 1983 as needed.
1984 1984 """
1985 1985 return storageutil.hashrevisionsha1(text, p1, p2)
1986 1986
1987 1987 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1988 1988 """Check node hash integrity.
1989 1989
1990 1990 Available as a function so that subclasses can extend hash mismatch
1991 1991 behaviors as needed.
1992 1992 """
1993 1993 try:
1994 1994 if p1 is None and p2 is None:
1995 1995 p1, p2 = self.parents(node)
1996 1996 if node != self.hash(text, p1, p2):
1997 1997 # Clear the revision cache on hash failure. The revision cache
1998 1998 # only stores the raw revision and clearing the cache does have
1999 1999 # the side-effect that we won't have a cache hit when the raw
2000 2000 # revision data is accessed. But this case should be rare and
2001 2001 # it is extra work to teach the cache about the hash
2002 2002 # verification state.
2003 2003 if self._revisioncache and self._revisioncache[0] == node:
2004 2004 self._revisioncache = None
2005 2005
2006 2006 revornode = rev
2007 2007 if revornode is None:
2008 2008 revornode = templatefilters.short(hex(node))
2009 2009 raise error.RevlogError(
2010 2010 _(b"integrity check failed on %s:%s")
2011 2011 % (self.display_id, pycompat.bytestr(revornode))
2012 2012 )
2013 2013 except error.RevlogError:
2014 2014 if self._censorable and storageutil.iscensoredtext(text):
2015 2015 raise error.CensoredNodeError(self.display_id, node, text)
2016 2016 raise
2017 2017
2018 2018 def _enforceinlinesize(self, tr):
2019 2019 """Check if the revlog is too big for inline and convert if so.
2020 2020
2021 2021 This should be called after revisions are added to the revlog. If the
2022 2022 revlog has grown too large to be an inline revlog, it will convert it
2023 2023 to use multiple index and data files.
2024 2024 """
2025 2025 tiprev = len(self) - 1
2026 2026 total_size = self.start(tiprev) + self.length(tiprev)
2027 2027 if not self._inline or total_size < _maxinline:
2028 2028 return
2029 2029
2030 2030 troffset = tr.findoffset(self._indexfile)
2031 2031 if troffset is None:
2032 2032 raise error.RevlogError(
2033 2033 _(b"%s not found in the transaction") % self._indexfile
2034 2034 )
2035 2035 trindex = None
2036 2036 tr.add(self._datafile, 0)
2037 2037
2038 2038 existing_handles = False
2039 2039 if self._writinghandles is not None:
2040 2040 existing_handles = True
2041 2041 fp = self._writinghandles[0]
2042 2042 fp.flush()
2043 2043 fp.close()
2044 2044 # We can't use the cached file handle after close(). So prevent
2045 2045 # its usage.
2046 2046 self._writinghandles = None
2047 2047 self._segmentfile.writing_handle = None
2048 2048 # No need to deal with sidedata writing handle as it is only
2049 2049 # relevant with revlog-v2 which is never inline, not reaching
2050 2050 # this code
2051 2051
2052 2052 new_dfh = self._datafp(b'w+')
2053 2053 new_dfh.truncate(0) # drop any potentially existing data
2054 2054 try:
2055 2055 with self._indexfp() as read_ifh:
2056 2056 for r in self:
2057 2057 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2058 2058 if (
2059 2059 trindex is None
2060 2060 and troffset
2061 2061 <= self.start(r) + r * self.index.entry_size
2062 2062 ):
2063 2063 trindex = r
2064 2064 new_dfh.flush()
2065 2065
2066 2066 if trindex is None:
2067 2067 trindex = 0
2068 2068
2069 2069 with self.__index_new_fp() as fp:
2070 2070 self._format_flags &= ~FLAG_INLINE_DATA
2071 2071 self._inline = False
2072 2072 for i in self:
2073 2073 e = self.index.entry_binary(i)
2074 2074 if i == 0 and self._docket is None:
2075 2075 header = self._format_flags | self._format_version
2076 2076 header = self.index.pack_header(header)
2077 2077 e = header + e
2078 2078 fp.write(e)
2079 2079 if self._docket is not None:
2080 2080 self._docket.index_end = fp.tell()
2081 2081
2082 2082 # There is a small transactional race here. If the rename of
2083 2083 # the index fails, we should remove the datafile. It is more
2084 2084 # important to ensure that the data file is not truncated
2085 2085 # when the index is replaced as otherwise data is lost.
2086 2086 tr.replace(self._datafile, self.start(trindex))
2087 2087
2088 2088 # the temp file replace the real index when we exit the context
2089 2089 # manager
2090 2090
2091 2091 tr.replace(self._indexfile, trindex * self.index.entry_size)
2092 2092 nodemaputil.setup_persistent_nodemap(tr, self)
2093 2093 self._segmentfile = randomaccessfile.randomaccessfile(
2094 2094 self.opener,
2095 2095 self._datafile,
2096 2096 self._chunkcachesize,
2097 2097 )
2098 2098
2099 2099 if existing_handles:
2100 2100 # switched from inline to conventional reopen the index
2101 2101 ifh = self.__index_write_fp()
2102 2102 self._writinghandles = (ifh, new_dfh, None)
2103 2103 self._segmentfile.writing_handle = new_dfh
2104 2104 new_dfh = None
2105 2105 # No need to deal with sidedata writing handle as it is only
2106 2106 # relevant with revlog-v2 which is never inline, not reaching
2107 2107 # this code
2108 2108 finally:
2109 2109 if new_dfh is not None:
2110 2110 new_dfh.close()
2111 2111
2112 2112 def _nodeduplicatecallback(self, transaction, node):
2113 2113 """called when trying to add a node already stored."""
2114 2114
2115 2115 @contextlib.contextmanager
2116 2116 def reading(self):
2117 2117 """Context manager that keeps data and sidedata files open for reading"""
2118 2118 with self._segmentfile.reading():
2119 2119 with self._segmentfile_sidedata.reading():
2120 2120 yield
2121 2121
2122 2122 @contextlib.contextmanager
2123 2123 def _writing(self, transaction):
2124 2124 if self._trypending:
2125 2125 msg = b'try to write in a `trypending` revlog: %s'
2126 2126 msg %= self.display_id
2127 2127 raise error.ProgrammingError(msg)
2128 2128 if self._writinghandles is not None:
2129 2129 yield
2130 2130 else:
2131 2131 ifh = dfh = sdfh = None
2132 2132 try:
2133 2133 r = len(self)
2134 2134 # opening the data file.
2135 2135 dsize = 0
2136 2136 if r:
2137 2137 dsize = self.end(r - 1)
2138 2138 dfh = None
2139 2139 if not self._inline:
2140 2140 try:
2141 2141 dfh = self._datafp(b"r+")
2142 2142 if self._docket is None:
2143 2143 dfh.seek(0, os.SEEK_END)
2144 2144 else:
2145 2145 dfh.seek(self._docket.data_end, os.SEEK_SET)
2146 2146 except FileNotFoundError:
2147 2147 dfh = self._datafp(b"w+")
2148 2148 transaction.add(self._datafile, dsize)
2149 2149 if self._sidedatafile is not None:
2150 2150 # revlog-v2 does not inline, help Pytype
2151 2151 assert dfh is not None
2152 2152 try:
2153 2153 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2154 2154 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2155 2155 except FileNotFoundError:
2156 2156 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2157 2157 transaction.add(
2158 2158 self._sidedatafile, self._docket.sidedata_end
2159 2159 )
2160 2160
2161 2161 # opening the index file.
2162 2162 isize = r * self.index.entry_size
2163 2163 ifh = self.__index_write_fp()
2164 2164 if self._inline:
2165 2165 transaction.add(self._indexfile, dsize + isize)
2166 2166 else:
2167 2167 transaction.add(self._indexfile, isize)
2168 2168 # exposing all file handle for writing.
2169 2169 self._writinghandles = (ifh, dfh, sdfh)
2170 2170 self._segmentfile.writing_handle = ifh if self._inline else dfh
2171 2171 self._segmentfile_sidedata.writing_handle = sdfh
2172 2172 yield
2173 2173 if self._docket is not None:
2174 2174 self._write_docket(transaction)
2175 2175 finally:
2176 2176 self._writinghandles = None
2177 2177 self._segmentfile.writing_handle = None
2178 2178 self._segmentfile_sidedata.writing_handle = None
2179 2179 if dfh is not None:
2180 2180 dfh.close()
2181 2181 if sdfh is not None:
2182 2182 sdfh.close()
2183 2183 # closing the index file last to avoid exposing referent to
2184 2184 # potential unflushed data content.
2185 2185 if ifh is not None:
2186 2186 ifh.close()
2187 2187
2188 2188 def _write_docket(self, transaction):
2189 2189 """write the current docket on disk
2190 2190
2191 2191 Exist as a method to help changelog to implement transaction logic
2192 2192
2193 2193 We could also imagine using the same transaction logic for all revlog
2194 2194 since docket are cheap."""
2195 2195 self._docket.write(transaction)
2196 2196
2197 2197 def addrevision(
2198 2198 self,
2199 2199 text,
2200 2200 transaction,
2201 2201 link,
2202 2202 p1,
2203 2203 p2,
2204 2204 cachedelta=None,
2205 2205 node=None,
2206 2206 flags=REVIDX_DEFAULT_FLAGS,
2207 2207 deltacomputer=None,
2208 2208 sidedata=None,
2209 2209 ):
2210 2210 """add a revision to the log
2211 2211
2212 2212 text - the revision data to add
2213 2213 transaction - the transaction object used for rollback
2214 2214 link - the linkrev data to add
2215 2215 p1, p2 - the parent nodeids of the revision
2216 2216 cachedelta - an optional precomputed delta
2217 2217 node - nodeid of revision; typically node is not specified, and it is
2218 2218 computed by default as hash(text, p1, p2), however subclasses might
2219 2219 use different hashing method (and override checkhash() in such case)
2220 2220 flags - the known flags to set on the revision
2221 2221 deltacomputer - an optional deltacomputer instance shared between
2222 2222 multiple calls
2223 2223 """
2224 2224 if link == nullrev:
2225 2225 raise error.RevlogError(
2226 2226 _(b"attempted to add linkrev -1 to %s") % self.display_id
2227 2227 )
2228 2228
2229 2229 if sidedata is None:
2230 2230 sidedata = {}
2231 2231 elif sidedata and not self.hassidedata:
2232 2232 raise error.ProgrammingError(
2233 2233 _(b"trying to add sidedata to a revlog who don't support them")
2234 2234 )
2235 2235
2236 2236 if flags:
2237 2237 node = node or self.hash(text, p1, p2)
2238 2238
2239 2239 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2240 2240
2241 2241 # If the flag processor modifies the revision data, ignore any provided
2242 2242 # cachedelta.
2243 2243 if rawtext != text:
2244 2244 cachedelta = None
2245 2245
2246 2246 if len(rawtext) > _maxentrysize:
2247 2247 raise error.RevlogError(
2248 2248 _(
2249 2249 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2250 2250 )
2251 2251 % (self.display_id, len(rawtext))
2252 2252 )
2253 2253
2254 2254 node = node or self.hash(rawtext, p1, p2)
2255 2255 rev = self.index.get_rev(node)
2256 2256 if rev is not None:
2257 2257 return rev
2258 2258
2259 2259 if validatehash:
2260 2260 self.checkhash(rawtext, node, p1=p1, p2=p2)
2261 2261
2262 2262 return self.addrawrevision(
2263 2263 rawtext,
2264 2264 transaction,
2265 2265 link,
2266 2266 p1,
2267 2267 p2,
2268 2268 node,
2269 2269 flags,
2270 2270 cachedelta=cachedelta,
2271 2271 deltacomputer=deltacomputer,
2272 2272 sidedata=sidedata,
2273 2273 )
2274 2274
2275 2275 def addrawrevision(
2276 2276 self,
2277 2277 rawtext,
2278 2278 transaction,
2279 2279 link,
2280 2280 p1,
2281 2281 p2,
2282 2282 node,
2283 2283 flags,
2284 2284 cachedelta=None,
2285 2285 deltacomputer=None,
2286 2286 sidedata=None,
2287 2287 ):
2288 2288 """add a raw revision with known flags, node and parents
2289 2289 useful when reusing a revision not stored in this revlog (ex: received
2290 2290 over wire, or read from an external bundle).
2291 2291 """
2292 2292 with self._writing(transaction):
2293 2293 return self._addrevision(
2294 2294 node,
2295 2295 rawtext,
2296 2296 transaction,
2297 2297 link,
2298 2298 p1,
2299 2299 p2,
2300 2300 flags,
2301 2301 cachedelta,
2302 2302 deltacomputer=deltacomputer,
2303 2303 sidedata=sidedata,
2304 2304 )
2305 2305
2306 2306 def compress(self, data):
2307 2307 """Generate a possibly-compressed representation of data."""
2308 2308 if not data:
2309 2309 return b'', data
2310 2310
2311 2311 compressed = self._compressor.compress(data)
2312 2312
2313 2313 if compressed:
2314 2314 # The revlog compressor added the header in the returned data.
2315 2315 return b'', compressed
2316 2316
2317 2317 if data[0:1] == b'\0':
2318 2318 return b'', data
2319 2319 return b'u', data
2320 2320
2321 2321 def decompress(self, data):
2322 2322 """Decompress a revlog chunk.
2323 2323
2324 2324 The chunk is expected to begin with a header identifying the
2325 2325 format type so it can be routed to an appropriate decompressor.
2326 2326 """
2327 2327 if not data:
2328 2328 return data
2329 2329
2330 2330 # Revlogs are read much more frequently than they are written and many
2331 2331 # chunks only take microseconds to decompress, so performance is
2332 2332 # important here.
2333 2333 #
2334 2334 # We can make a few assumptions about revlogs:
2335 2335 #
2336 2336 # 1) the majority of chunks will be compressed (as opposed to inline
2337 2337 # raw data).
2338 2338 # 2) decompressing *any* data will likely by at least 10x slower than
2339 2339 # returning raw inline data.
2340 2340 # 3) we want to prioritize common and officially supported compression
2341 2341 # engines
2342 2342 #
2343 2343 # It follows that we want to optimize for "decompress compressed data
2344 2344 # when encoded with common and officially supported compression engines"
2345 2345 # case over "raw data" and "data encoded by less common or non-official
2346 2346 # compression engines." That is why we have the inline lookup first
2347 2347 # followed by the compengines lookup.
2348 2348 #
2349 2349 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2350 2350 # compressed chunks. And this matters for changelog and manifest reads.
2351 2351 t = data[0:1]
2352 2352
2353 2353 if t == b'x':
2354 2354 try:
2355 2355 return _zlibdecompress(data)
2356 2356 except zlib.error as e:
2357 2357 raise error.RevlogError(
2358 2358 _(b'revlog decompress error: %s')
2359 2359 % stringutil.forcebytestr(e)
2360 2360 )
2361 2361 # '\0' is more common than 'u' so it goes first.
2362 2362 elif t == b'\0':
2363 2363 return data
2364 2364 elif t == b'u':
2365 2365 return util.buffer(data, 1)
2366 2366
2367 2367 compressor = self._get_decompressor(t)
2368 2368
2369 2369 return compressor.decompress(data)
2370 2370
2371 2371 def _addrevision(
2372 2372 self,
2373 2373 node,
2374 2374 rawtext,
2375 2375 transaction,
2376 2376 link,
2377 2377 p1,
2378 2378 p2,
2379 2379 flags,
2380 2380 cachedelta,
2381 2381 alwayscache=False,
2382 2382 deltacomputer=None,
2383 2383 sidedata=None,
2384 2384 ):
2385 2385 """internal function to add revisions to the log
2386 2386
2387 2387 see addrevision for argument descriptions.
2388 2388
2389 2389 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2390 2390
2391 2391 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2392 2392 be used.
2393 2393
2394 2394 invariants:
2395 2395 - rawtext is optional (can be None); if not set, cachedelta must be set.
2396 2396 if both are set, they must correspond to each other.
2397 2397 """
2398 2398 if node == self.nullid:
2399 2399 raise error.RevlogError(
2400 2400 _(b"%s: attempt to add null revision") % self.display_id
2401 2401 )
2402 2402 if (
2403 2403 node == self.nodeconstants.wdirid
2404 2404 or node in self.nodeconstants.wdirfilenodeids
2405 2405 ):
2406 2406 raise error.RevlogError(
2407 2407 _(b"%s: attempt to add wdir revision") % self.display_id
2408 2408 )
2409 2409 if self._writinghandles is None:
2410 2410 msg = b'adding revision outside `revlog._writing` context'
2411 2411 raise error.ProgrammingError(msg)
2412 2412
2413 2413 if self._inline:
2414 2414 fh = self._writinghandles[0]
2415 2415 else:
2416 2416 fh = self._writinghandles[1]
2417 2417
2418 2418 btext = [rawtext]
2419 2419
2420 2420 curr = len(self)
2421 2421 prev = curr - 1
2422 2422
2423 2423 offset = self._get_data_offset(prev)
2424 2424
2425 2425 if self._concurrencychecker:
2426 2426 ifh, dfh, sdfh = self._writinghandles
2427 2427 # XXX no checking for the sidedata file
2428 2428 if self._inline:
2429 2429 # offset is "as if" it were in the .d file, so we need to add on
2430 2430 # the size of the entry metadata.
2431 2431 self._concurrencychecker(
2432 2432 ifh, self._indexfile, offset + curr * self.index.entry_size
2433 2433 )
2434 2434 else:
2435 2435 # Entries in the .i are a consistent size.
2436 2436 self._concurrencychecker(
2437 2437 ifh, self._indexfile, curr * self.index.entry_size
2438 2438 )
2439 2439 self._concurrencychecker(dfh, self._datafile, offset)
2440 2440
2441 2441 p1r, p2r = self.rev(p1), self.rev(p2)
2442 2442
2443 2443 # full versions are inserted when the needed deltas
2444 2444 # become comparable to the uncompressed text
2445 2445 if rawtext is None:
2446 2446 # need rawtext size, before changed by flag processors, which is
2447 2447 # the non-raw size. use revlog explicitly to avoid filelog's extra
2448 2448 # logic that might remove metadata size.
2449 2449 textlen = mdiff.patchedsize(
2450 2450 revlog.size(self, cachedelta[0]), cachedelta[1]
2451 2451 )
2452 2452 else:
2453 2453 textlen = len(rawtext)
2454 2454
2455 2455 if deltacomputer is None:
2456 2456 write_debug = None
2457 2457 if self._debug_delta:
2458 2458 write_debug = transaction._report
2459 2459 deltacomputer = deltautil.deltacomputer(
2460 2460 self, write_debug=write_debug
2461 2461 )
2462 2462
2463 2463 if cachedelta is not None and len(cachedelta) == 2:
2464 2464 # If the cached delta has no information about how it should be
2465 2465 # reused, add the default reuse instruction according to the
2466 2466 # revlog's configuration.
2467 2467 if self._generaldelta and self._lazydeltabase:
2468 2468 delta_base_reuse = DELTA_BASE_REUSE_TRY
2469 2469 else:
2470 2470 delta_base_reuse = DELTA_BASE_REUSE_NO
2471 2471 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2472 2472
2473 2473 revinfo = revlogutils.revisioninfo(
2474 2474 node,
2475 2475 p1,
2476 2476 p2,
2477 2477 btext,
2478 2478 textlen,
2479 2479 cachedelta,
2480 2480 flags,
2481 2481 )
2482 2482
2483 2483 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2484 2484
2485 2485 compression_mode = COMP_MODE_INLINE
2486 2486 if self._docket is not None:
2487 2487 default_comp = self._docket.default_compression_header
2488 2488 r = deltautil.delta_compression(default_comp, deltainfo)
2489 2489 compression_mode, deltainfo = r
2490 2490
2491 2491 sidedata_compression_mode = COMP_MODE_INLINE
2492 2492 if sidedata and self.hassidedata:
2493 2493 sidedata_compression_mode = COMP_MODE_PLAIN
2494 2494 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2495 2495 sidedata_offset = self._docket.sidedata_end
2496 2496 h, comp_sidedata = self.compress(serialized_sidedata)
2497 2497 if (
2498 2498 h != b'u'
2499 2499 and comp_sidedata[0:1] != b'\0'
2500 2500 and len(comp_sidedata) < len(serialized_sidedata)
2501 2501 ):
2502 2502 assert not h
2503 2503 if (
2504 2504 comp_sidedata[0:1]
2505 2505 == self._docket.default_compression_header
2506 2506 ):
2507 2507 sidedata_compression_mode = COMP_MODE_DEFAULT
2508 2508 serialized_sidedata = comp_sidedata
2509 2509 else:
2510 2510 sidedata_compression_mode = COMP_MODE_INLINE
2511 2511 serialized_sidedata = comp_sidedata
2512 2512 else:
2513 2513 serialized_sidedata = b""
2514 2514 # Don't store the offset if the sidedata is empty, that way
2515 2515 # we can easily detect empty sidedata and they will be no different
2516 2516 # than ones we manually add.
2517 2517 sidedata_offset = 0
2518 2518
2519 2519 rank = RANK_UNKNOWN
2520 2520 if self._compute_rank:
2521 2521 if (p1r, p2r) == (nullrev, nullrev):
2522 2522 rank = 1
2523 2523 elif p1r != nullrev and p2r == nullrev:
2524 2524 rank = 1 + self.fast_rank(p1r)
2525 2525 elif p1r == nullrev and p2r != nullrev:
2526 2526 rank = 1 + self.fast_rank(p2r)
2527 2527 else: # merge node
2528 2528 if rustdagop is not None and self.index.rust_ext_compat:
2529 2529 rank = rustdagop.rank(self.index, p1r, p2r)
2530 2530 else:
2531 2531 pmin, pmax = sorted((p1r, p2r))
2532 2532 rank = 1 + self.fast_rank(pmax)
2533 2533 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2534 2534
2535 2535 e = revlogutils.entry(
2536 2536 flags=flags,
2537 2537 data_offset=offset,
2538 2538 data_compressed_length=deltainfo.deltalen,
2539 2539 data_uncompressed_length=textlen,
2540 2540 data_compression_mode=compression_mode,
2541 2541 data_delta_base=deltainfo.base,
2542 2542 link_rev=link,
2543 2543 parent_rev_1=p1r,
2544 2544 parent_rev_2=p2r,
2545 2545 node_id=node,
2546 2546 sidedata_offset=sidedata_offset,
2547 2547 sidedata_compressed_length=len(serialized_sidedata),
2548 2548 sidedata_compression_mode=sidedata_compression_mode,
2549 2549 rank=rank,
2550 2550 )
2551 2551
2552 2552 self.index.append(e)
2553 2553 entry = self.index.entry_binary(curr)
2554 2554 if curr == 0 and self._docket is None:
2555 2555 header = self._format_flags | self._format_version
2556 2556 header = self.index.pack_header(header)
2557 2557 entry = header + entry
2558 2558 self._writeentry(
2559 2559 transaction,
2560 2560 entry,
2561 2561 deltainfo.data,
2562 2562 link,
2563 2563 offset,
2564 2564 serialized_sidedata,
2565 2565 sidedata_offset,
2566 2566 )
2567 2567
2568 2568 rawtext = btext[0]
2569 2569
2570 2570 if alwayscache and rawtext is None:
2571 2571 rawtext = deltacomputer.buildtext(revinfo, fh)
2572 2572
2573 2573 if type(rawtext) == bytes: # only accept immutable objects
2574 2574 self._revisioncache = (node, curr, rawtext)
2575 2575 self._chainbasecache[curr] = deltainfo.chainbase
2576 2576 return curr
2577 2577
2578 2578 def _get_data_offset(self, prev):
2579 2579 """Returns the current offset in the (in-transaction) data file.
2580 2580 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2581 2581 file to store that information: since sidedata can be rewritten to the
2582 2582 end of the data file within a transaction, you can have cases where, for
2583 2583 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2584 2584 to `n - 1`'s sidedata being written after `n`'s data.
2585 2585
2586 2586 TODO cache this in a docket file before getting out of experimental."""
2587 2587 if self._docket is None:
2588 2588 return self.end(prev)
2589 2589 else:
2590 2590 return self._docket.data_end
2591 2591
2592 2592 def _writeentry(
2593 2593 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2594 2594 ):
2595 2595 # Files opened in a+ mode have inconsistent behavior on various
2596 2596 # platforms. Windows requires that a file positioning call be made
2597 2597 # when the file handle transitions between reads and writes. See
2598 2598 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2599 2599 # platforms, Python or the platform itself can be buggy. Some versions
2600 2600 # of Solaris have been observed to not append at the end of the file
2601 2601 # if the file was seeked to before the end. See issue4943 for more.
2602 2602 #
2603 2603 # We work around this issue by inserting a seek() before writing.
2604 2604 # Note: This is likely not necessary on Python 3. However, because
2605 2605 # the file handle is reused for reads and may be seeked there, we need
2606 2606 # to be careful before changing this.
2607 2607 if self._writinghandles is None:
2608 2608 msg = b'adding revision outside `revlog._writing` context'
2609 2609 raise error.ProgrammingError(msg)
2610 2610 ifh, dfh, sdfh = self._writinghandles
2611 2611 if self._docket is None:
2612 2612 ifh.seek(0, os.SEEK_END)
2613 2613 else:
2614 2614 ifh.seek(self._docket.index_end, os.SEEK_SET)
2615 2615 if dfh:
2616 2616 if self._docket is None:
2617 2617 dfh.seek(0, os.SEEK_END)
2618 2618 else:
2619 2619 dfh.seek(self._docket.data_end, os.SEEK_SET)
2620 2620 if sdfh:
2621 2621 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2622 2622
2623 2623 curr = len(self) - 1
2624 2624 if not self._inline:
2625 2625 transaction.add(self._datafile, offset)
2626 2626 if self._sidedatafile:
2627 2627 transaction.add(self._sidedatafile, sidedata_offset)
2628 2628 transaction.add(self._indexfile, curr * len(entry))
2629 2629 if data[0]:
2630 2630 dfh.write(data[0])
2631 2631 dfh.write(data[1])
2632 2632 if sidedata:
2633 2633 sdfh.write(sidedata)
2634 2634 ifh.write(entry)
2635 2635 else:
2636 2636 offset += curr * self.index.entry_size
2637 2637 transaction.add(self._indexfile, offset)
2638 2638 ifh.write(entry)
2639 2639 ifh.write(data[0])
2640 2640 ifh.write(data[1])
2641 2641 assert not sidedata
2642 2642 self._enforceinlinesize(transaction)
2643 2643 if self._docket is not None:
2644 2644 # revlog-v2 always has 3 writing handles, help Pytype
2645 2645 wh1 = self._writinghandles[0]
2646 2646 wh2 = self._writinghandles[1]
2647 2647 wh3 = self._writinghandles[2]
2648 2648 assert wh1 is not None
2649 2649 assert wh2 is not None
2650 2650 assert wh3 is not None
2651 2651 self._docket.index_end = wh1.tell()
2652 2652 self._docket.data_end = wh2.tell()
2653 2653 self._docket.sidedata_end = wh3.tell()
2654 2654
2655 2655 nodemaputil.setup_persistent_nodemap(transaction, self)
2656 2656
2657 2657 def addgroup(
2658 2658 self,
2659 2659 deltas,
2660 2660 linkmapper,
2661 2661 transaction,
2662 2662 alwayscache=False,
2663 2663 addrevisioncb=None,
2664 2664 duplicaterevisioncb=None,
2665 2665 debug_info=None,
2666 2666 delta_base_reuse_policy=None,
2667 2667 ):
2668 2668 """
2669 2669 add a delta group
2670 2670
2671 2671 given a set of deltas, add them to the revision log. the
2672 2672 first delta is against its parent, which should be in our
2673 2673 log, the rest are against the previous delta.
2674 2674
2675 2675 If ``addrevisioncb`` is defined, it will be called with arguments of
2676 2676 this revlog and the node that was added.
2677 2677 """
2678 2678
2679 2679 if self._adding_group:
2680 2680 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2681 2681
2682 2682 # read the default delta-base reuse policy from revlog config if the
2683 2683 # group did not specify one.
2684 2684 if delta_base_reuse_policy is None:
2685 2685 if self._generaldelta and self._lazydeltabase:
2686 2686 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2687 2687 else:
2688 2688 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2689 2689
2690 2690 self._adding_group = True
2691 2691 empty = True
2692 2692 try:
2693 2693 with self._writing(transaction):
2694 2694 write_debug = None
2695 2695 if self._debug_delta:
2696 2696 write_debug = transaction._report
2697 2697 deltacomputer = deltautil.deltacomputer(
2698 2698 self,
2699 2699 write_debug=write_debug,
2700 2700 debug_info=debug_info,
2701 2701 )
2702 2702 # loop through our set of deltas
2703 2703 for data in deltas:
2704 2704 (
2705 2705 node,
2706 2706 p1,
2707 2707 p2,
2708 2708 linknode,
2709 2709 deltabase,
2710 2710 delta,
2711 2711 flags,
2712 2712 sidedata,
2713 2713 ) = data
2714 2714 link = linkmapper(linknode)
2715 2715 flags = flags or REVIDX_DEFAULT_FLAGS
2716 2716
2717 2717 rev = self.index.get_rev(node)
2718 2718 if rev is not None:
2719 2719 # this can happen if two branches make the same change
2720 2720 self._nodeduplicatecallback(transaction, rev)
2721 2721 if duplicaterevisioncb:
2722 2722 duplicaterevisioncb(self, rev)
2723 2723 empty = False
2724 2724 continue
2725 2725
2726 2726 for p in (p1, p2):
2727 2727 if not self.index.has_node(p):
2728 2728 raise error.LookupError(
2729 2729 p, self.radix, _(b'unknown parent')
2730 2730 )
2731 2731
2732 2732 if not self.index.has_node(deltabase):
2733 2733 raise error.LookupError(
2734 2734 deltabase, self.display_id, _(b'unknown delta base')
2735 2735 )
2736 2736
2737 2737 baserev = self.rev(deltabase)
2738 2738
2739 2739 if baserev != nullrev and self.iscensored(baserev):
2740 2740 # if base is censored, delta must be full replacement in a
2741 2741 # single patch operation
2742 2742 hlen = struct.calcsize(b">lll")
2743 2743 oldlen = self.rawsize(baserev)
2744 2744 newlen = len(delta) - hlen
2745 2745 if delta[:hlen] != mdiff.replacediffheader(
2746 2746 oldlen, newlen
2747 2747 ):
2748 2748 raise error.CensoredBaseError(
2749 2749 self.display_id, self.node(baserev)
2750 2750 )
2751 2751
2752 2752 if not flags and self._peek_iscensored(baserev, delta):
2753 2753 flags |= REVIDX_ISCENSORED
2754 2754
2755 2755 # We assume consumers of addrevisioncb will want to retrieve
2756 2756 # the added revision, which will require a call to
2757 2757 # revision(). revision() will fast path if there is a cache
2758 2758 # hit. So, we tell _addrevision() to always cache in this case.
2759 2759 # We're only using addgroup() in the context of changegroup
2760 2760 # generation so the revision data can always be handled as raw
2761 2761 # by the flagprocessor.
2762 2762 rev = self._addrevision(
2763 2763 node,
2764 2764 None,
2765 2765 transaction,
2766 2766 link,
2767 2767 p1,
2768 2768 p2,
2769 2769 flags,
2770 2770 (baserev, delta, delta_base_reuse_policy),
2771 2771 alwayscache=alwayscache,
2772 2772 deltacomputer=deltacomputer,
2773 2773 sidedata=sidedata,
2774 2774 )
2775 2775
2776 2776 if addrevisioncb:
2777 2777 addrevisioncb(self, rev)
2778 2778 empty = False
2779 2779 finally:
2780 2780 self._adding_group = False
2781 2781 return not empty
2782 2782
2783 2783 def iscensored(self, rev):
2784 2784 """Check if a file revision is censored."""
2785 2785 if not self._censorable:
2786 2786 return False
2787 2787
2788 2788 return self.flags(rev) & REVIDX_ISCENSORED
2789 2789
2790 2790 def _peek_iscensored(self, baserev, delta):
2791 2791 """Quickly check if a delta produces a censored revision."""
2792 2792 if not self._censorable:
2793 2793 return False
2794 2794
2795 2795 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2796 2796
2797 2797 def getstrippoint(self, minlink):
2798 2798 """find the minimum rev that must be stripped to strip the linkrev
2799 2799
2800 2800 Returns a tuple containing the minimum rev and a set of all revs that
2801 2801 have linkrevs that will be broken by this strip.
2802 2802 """
2803 2803 return storageutil.resolvestripinfo(
2804 2804 minlink,
2805 2805 len(self) - 1,
2806 2806 self.headrevs(),
2807 2807 self.linkrev,
2808 2808 self.parentrevs,
2809 2809 )
2810 2810
2811 2811 def strip(self, minlink, transaction):
2812 2812 """truncate the revlog on the first revision with a linkrev >= minlink
2813 2813
2814 2814 This function is called when we're stripping revision minlink and
2815 2815 its descendants from the repository.
2816 2816
2817 2817 We have to remove all revisions with linkrev >= minlink, because
2818 2818 the equivalent changelog revisions will be renumbered after the
2819 2819 strip.
2820 2820
2821 2821 So we truncate the revlog on the first of these revisions, and
2822 2822 trust that the caller has saved the revisions that shouldn't be
2823 2823 removed and that it'll re-add them after this truncation.
2824 2824 """
2825 2825 if len(self) == 0:
2826 2826 return
2827 2827
2828 2828 rev, _ = self.getstrippoint(minlink)
2829 2829 if rev == len(self):
2830 2830 return
2831 2831
2832 2832 # first truncate the files on disk
2833 2833 data_end = self.start(rev)
2834 2834 if not self._inline:
2835 2835 transaction.add(self._datafile, data_end)
2836 2836 end = rev * self.index.entry_size
2837 2837 else:
2838 2838 end = data_end + (rev * self.index.entry_size)
2839 2839
2840 2840 if self._sidedatafile:
2841 2841 sidedata_end = self.sidedata_cut_off(rev)
2842 2842 transaction.add(self._sidedatafile, sidedata_end)
2843 2843
2844 2844 transaction.add(self._indexfile, end)
2845 2845 if self._docket is not None:
2846 2846 # XXX we could, leverage the docket while stripping. However it is
2847 2847 # not powerfull enough at the time of this comment
2848 2848 self._docket.index_end = end
2849 2849 self._docket.data_end = data_end
2850 2850 self._docket.sidedata_end = sidedata_end
2851 2851 self._docket.write(transaction, stripping=True)
2852 2852
2853 2853 # then reset internal state in memory to forget those revisions
2854 2854 self._revisioncache = None
2855 2855 self._chaininfocache = util.lrucachedict(500)
2856 2856 self._segmentfile.clear_cache()
2857 2857 self._segmentfile_sidedata.clear_cache()
2858 2858
2859 2859 del self.index[rev:-1]
2860 2860
2861 2861 def checksize(self):
2862 2862 """Check size of index and data files
2863 2863
2864 2864 return a (dd, di) tuple.
2865 2865 - dd: extra bytes for the "data" file
2866 2866 - di: extra bytes for the "index" file
2867 2867
2868 2868 A healthy revlog will return (0, 0).
2869 2869 """
2870 2870 expected = 0
2871 2871 if len(self):
2872 2872 expected = max(0, self.end(len(self) - 1))
2873 2873
2874 2874 try:
2875 2875 with self._datafp() as f:
2876 2876 f.seek(0, io.SEEK_END)
2877 2877 actual = f.tell()
2878 2878 dd = actual - expected
2879 2879 except FileNotFoundError:
2880 2880 dd = 0
2881 2881
2882 2882 try:
2883 2883 f = self.opener(self._indexfile)
2884 2884 f.seek(0, io.SEEK_END)
2885 2885 actual = f.tell()
2886 2886 f.close()
2887 2887 s = self.index.entry_size
2888 2888 i = max(0, actual // s)
2889 2889 di = actual - (i * s)
2890 2890 if self._inline:
2891 2891 databytes = 0
2892 2892 for r in self:
2893 2893 databytes += max(0, self.length(r))
2894 2894 dd = 0
2895 2895 di = actual - len(self) * s - databytes
2896 2896 except FileNotFoundError:
2897 2897 di = 0
2898 2898
2899 2899 return (dd, di)
2900 2900
2901 2901 def files(self):
2902 2902 res = [self._indexfile]
2903 2903 if self._docket_file is None:
2904 2904 if not self._inline:
2905 2905 res.append(self._datafile)
2906 2906 else:
2907 2907 res.append(self._docket_file)
2908 2908 res.extend(self._docket.old_index_filepaths(include_empty=False))
2909 2909 if self._docket.data_end:
2910 2910 res.append(self._datafile)
2911 2911 res.extend(self._docket.old_data_filepaths(include_empty=False))
2912 2912 if self._docket.sidedata_end:
2913 2913 res.append(self._sidedatafile)
2914 2914 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2915 2915 return res
2916 2916
2917 2917 def emitrevisions(
2918 2918 self,
2919 2919 nodes,
2920 2920 nodesorder=None,
2921 2921 revisiondata=False,
2922 2922 assumehaveparentrevisions=False,
2923 2923 deltamode=repository.CG_DELTAMODE_STD,
2924 2924 sidedata_helpers=None,
2925 2925 debug_info=None,
2926 2926 ):
2927 2927 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2928 2928 raise error.ProgrammingError(
2929 2929 b'unhandled value for nodesorder: %s' % nodesorder
2930 2930 )
2931 2931
2932 2932 if nodesorder is None and not self._generaldelta:
2933 2933 nodesorder = b'storage'
2934 2934
2935 2935 if (
2936 2936 not self._storedeltachains
2937 2937 and deltamode != repository.CG_DELTAMODE_PREV
2938 2938 ):
2939 2939 deltamode = repository.CG_DELTAMODE_FULL
2940 2940
2941 2941 return storageutil.emitrevisions(
2942 2942 self,
2943 2943 nodes,
2944 2944 nodesorder,
2945 2945 revlogrevisiondelta,
2946 2946 deltaparentfn=self.deltaparent,
2947 2947 candeltafn=self.candelta,
2948 2948 rawsizefn=self.rawsize,
2949 2949 revdifffn=self.revdiff,
2950 2950 flagsfn=self.flags,
2951 2951 deltamode=deltamode,
2952 2952 revisiondata=revisiondata,
2953 2953 assumehaveparentrevisions=assumehaveparentrevisions,
2954 2954 sidedata_helpers=sidedata_helpers,
2955 2955 debug_info=debug_info,
2956 2956 )
2957 2957
2958 2958 DELTAREUSEALWAYS = b'always'
2959 2959 DELTAREUSESAMEREVS = b'samerevs'
2960 2960 DELTAREUSENEVER = b'never'
2961 2961
2962 2962 DELTAREUSEFULLADD = b'fulladd'
2963 2963
2964 2964 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2965 2965
2966 2966 def clone(
2967 2967 self,
2968 2968 tr,
2969 2969 destrevlog,
2970 2970 addrevisioncb=None,
2971 2971 deltareuse=DELTAREUSESAMEREVS,
2972 2972 forcedeltabothparents=None,
2973 2973 sidedata_helpers=None,
2974 2974 ):
2975 2975 """Copy this revlog to another, possibly with format changes.
2976 2976
2977 2977 The destination revlog will contain the same revisions and nodes.
2978 2978 However, it may not be bit-for-bit identical due to e.g. delta encoding
2979 2979 differences.
2980 2980
2981 2981 The ``deltareuse`` argument control how deltas from the existing revlog
2982 2982 are preserved in the destination revlog. The argument can have the
2983 2983 following values:
2984 2984
2985 2985 DELTAREUSEALWAYS
2986 2986 Deltas will always be reused (if possible), even if the destination
2987 2987 revlog would not select the same revisions for the delta. This is the
2988 2988 fastest mode of operation.
2989 2989 DELTAREUSESAMEREVS
2990 2990 Deltas will be reused if the destination revlog would pick the same
2991 2991 revisions for the delta. This mode strikes a balance between speed
2992 2992 and optimization.
2993 2993 DELTAREUSENEVER
2994 2994 Deltas will never be reused. This is the slowest mode of execution.
2995 2995 This mode can be used to recompute deltas (e.g. if the diff/delta
2996 2996 algorithm changes).
2997 2997 DELTAREUSEFULLADD
2998 2998 Revision will be re-added as if their were new content. This is
2999 2999 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3000 3000 eg: large file detection and handling.
3001 3001
3002 3002 Delta computation can be slow, so the choice of delta reuse policy can
3003 3003 significantly affect run time.
3004 3004
3005 3005 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3006 3006 two extremes. Deltas will be reused if they are appropriate. But if the
3007 3007 delta could choose a better revision, it will do so. This means if you
3008 3008 are converting a non-generaldelta revlog to a generaldelta revlog,
3009 3009 deltas will be recomputed if the delta's parent isn't a parent of the
3010 3010 revision.
3011 3011
3012 3012 In addition to the delta policy, the ``forcedeltabothparents``
3013 3013 argument controls whether to force compute deltas against both parents
3014 3014 for merges. By default, the current default is used.
3015 3015
3016 3016 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3017 3017 `sidedata_helpers`.
3018 3018 """
3019 3019 if deltareuse not in self.DELTAREUSEALL:
3020 3020 raise ValueError(
3021 3021 _(b'value for deltareuse invalid: %s') % deltareuse
3022 3022 )
3023 3023
3024 3024 if len(destrevlog):
3025 3025 raise ValueError(_(b'destination revlog is not empty'))
3026 3026
3027 3027 if getattr(self, 'filteredrevs', None):
3028 3028 raise ValueError(_(b'source revlog has filtered revisions'))
3029 3029 if getattr(destrevlog, 'filteredrevs', None):
3030 3030 raise ValueError(_(b'destination revlog has filtered revisions'))
3031 3031
3032 3032 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3033 3033 # if possible.
3034 3034 oldlazydelta = destrevlog._lazydelta
3035 3035 oldlazydeltabase = destrevlog._lazydeltabase
3036 3036 oldamd = destrevlog._deltabothparents
3037 3037
3038 3038 try:
3039 3039 if deltareuse == self.DELTAREUSEALWAYS:
3040 3040 destrevlog._lazydeltabase = True
3041 3041 destrevlog._lazydelta = True
3042 3042 elif deltareuse == self.DELTAREUSESAMEREVS:
3043 3043 destrevlog._lazydeltabase = False
3044 3044 destrevlog._lazydelta = True
3045 3045 elif deltareuse == self.DELTAREUSENEVER:
3046 3046 destrevlog._lazydeltabase = False
3047 3047 destrevlog._lazydelta = False
3048 3048
3049 3049 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3050 3050
3051 3051 self._clone(
3052 3052 tr,
3053 3053 destrevlog,
3054 3054 addrevisioncb,
3055 3055 deltareuse,
3056 3056 forcedeltabothparents,
3057 3057 sidedata_helpers,
3058 3058 )
3059 3059
3060 3060 finally:
3061 3061 destrevlog._lazydelta = oldlazydelta
3062 3062 destrevlog._lazydeltabase = oldlazydeltabase
3063 3063 destrevlog._deltabothparents = oldamd
3064 3064
3065 3065 def _clone(
3066 3066 self,
3067 3067 tr,
3068 3068 destrevlog,
3069 3069 addrevisioncb,
3070 3070 deltareuse,
3071 3071 forcedeltabothparents,
3072 3072 sidedata_helpers,
3073 3073 ):
3074 3074 """perform the core duty of `revlog.clone` after parameter processing"""
3075 3075 write_debug = None
3076 3076 if self._debug_delta:
3077 3077 write_debug = tr._report
3078 3078 deltacomputer = deltautil.deltacomputer(
3079 3079 destrevlog,
3080 3080 write_debug=write_debug,
3081 3081 )
3082 3082 index = self.index
3083 3083 for rev in self:
3084 3084 entry = index[rev]
3085 3085
3086 3086 # Some classes override linkrev to take filtered revs into
3087 3087 # account. Use raw entry from index.
3088 3088 flags = entry[0] & 0xFFFF
3089 3089 linkrev = entry[4]
3090 3090 p1 = index[entry[5]][7]
3091 3091 p2 = index[entry[6]][7]
3092 3092 node = entry[7]
3093 3093
3094 3094 # (Possibly) reuse the delta from the revlog if allowed and
3095 3095 # the revlog chunk is a delta.
3096 3096 cachedelta = None
3097 3097 rawtext = None
3098 3098 if deltareuse == self.DELTAREUSEFULLADD:
3099 3099 text = self._revisiondata(rev)
3100 3100 sidedata = self.sidedata(rev)
3101 3101
3102 3102 if sidedata_helpers is not None:
3103 3103 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3104 3104 self, sidedata_helpers, sidedata, rev
3105 3105 )
3106 3106 flags = flags | new_flags[0] & ~new_flags[1]
3107 3107
3108 3108 destrevlog.addrevision(
3109 3109 text,
3110 3110 tr,
3111 3111 linkrev,
3112 3112 p1,
3113 3113 p2,
3114 3114 cachedelta=cachedelta,
3115 3115 node=node,
3116 3116 flags=flags,
3117 3117 deltacomputer=deltacomputer,
3118 3118 sidedata=sidedata,
3119 3119 )
3120 3120 else:
3121 3121 if destrevlog._lazydelta:
3122 3122 dp = self.deltaparent(rev)
3123 3123 if dp != nullrev:
3124 3124 cachedelta = (dp, bytes(self._chunk(rev)))
3125 3125
3126 3126 sidedata = None
3127 3127 if not cachedelta:
3128 3128 rawtext = self._revisiondata(rev)
3129 3129 sidedata = self.sidedata(rev)
3130 3130 if sidedata is None:
3131 3131 sidedata = self.sidedata(rev)
3132 3132
3133 3133 if sidedata_helpers is not None:
3134 3134 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3135 3135 self, sidedata_helpers, sidedata, rev
3136 3136 )
3137 3137 flags = flags | new_flags[0] & ~new_flags[1]
3138 3138
3139 3139 with destrevlog._writing(tr):
3140 3140 destrevlog._addrevision(
3141 3141 node,
3142 3142 rawtext,
3143 3143 tr,
3144 3144 linkrev,
3145 3145 p1,
3146 3146 p2,
3147 3147 flags,
3148 3148 cachedelta,
3149 3149 deltacomputer=deltacomputer,
3150 3150 sidedata=sidedata,
3151 3151 )
3152 3152
3153 3153 if addrevisioncb:
3154 3154 addrevisioncb(self, rev, node)
3155 3155
3156 3156 def censorrevision(self, tr, censornode, tombstone=b''):
3157 3157 if self._format_version == REVLOGV0:
3158 3158 raise error.RevlogError(
3159 3159 _(b'cannot censor with version %d revlogs')
3160 3160 % self._format_version
3161 3161 )
3162 3162 elif self._format_version == REVLOGV1:
3163 3163 rewrite.v1_censor(self, tr, censornode, tombstone)
3164 3164 else:
3165 3165 rewrite.v2_censor(self, tr, censornode, tombstone)
3166 3166
3167 3167 def verifyintegrity(self, state):
3168 3168 """Verifies the integrity of the revlog.
3169 3169
3170 3170 Yields ``revlogproblem`` instances describing problems that are
3171 3171 found.
3172 3172 """
3173 3173 dd, di = self.checksize()
3174 3174 if dd:
3175 3175 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3176 3176 if di:
3177 3177 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3178 3178
3179 3179 version = self._format_version
3180 3180
3181 3181 # The verifier tells us what version revlog we should be.
3182 3182 if version != state[b'expectedversion']:
3183 3183 yield revlogproblem(
3184 3184 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3185 3185 % (self.display_id, version, state[b'expectedversion'])
3186 3186 )
3187 3187
3188 3188 state[b'skipread'] = set()
3189 3189 state[b'safe_renamed'] = set()
3190 3190
3191 3191 for rev in self:
3192 3192 node = self.node(rev)
3193 3193
3194 3194 # Verify contents. 4 cases to care about:
3195 3195 #
3196 3196 # common: the most common case
3197 3197 # rename: with a rename
3198 3198 # meta: file content starts with b'\1\n', the metadata
3199 3199 # header defined in filelog.py, but without a rename
3200 3200 # ext: content stored externally
3201 3201 #
3202 3202 # More formally, their differences are shown below:
3203 3203 #
3204 3204 # | common | rename | meta | ext
3205 3205 # -------------------------------------------------------
3206 3206 # flags() | 0 | 0 | 0 | not 0
3207 3207 # renamed() | False | True | False | ?
3208 3208 # rawtext[0:2]=='\1\n'| False | True | True | ?
3209 3209 #
3210 3210 # "rawtext" means the raw text stored in revlog data, which
3211 3211 # could be retrieved by "rawdata(rev)". "text"
3212 3212 # mentioned below is "revision(rev)".
3213 3213 #
3214 3214 # There are 3 different lengths stored physically:
3215 3215 # 1. L1: rawsize, stored in revlog index
3216 3216 # 2. L2: len(rawtext), stored in revlog data
3217 3217 # 3. L3: len(text), stored in revlog data if flags==0, or
3218 3218 # possibly somewhere else if flags!=0
3219 3219 #
3220 3220 # L1 should be equal to L2. L3 could be different from them.
3221 3221 # "text" may or may not affect commit hash depending on flag
3222 3222 # processors (see flagutil.addflagprocessor).
3223 3223 #
3224 3224 # | common | rename | meta | ext
3225 3225 # -------------------------------------------------
3226 3226 # rawsize() | L1 | L1 | L1 | L1
3227 3227 # size() | L1 | L2-LM | L1(*) | L1 (?)
3228 3228 # len(rawtext) | L2 | L2 | L2 | L2
3229 3229 # len(text) | L2 | L2 | L2 | L3
3230 3230 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3231 3231 #
3232 3232 # LM: length of metadata, depending on rawtext
3233 3233 # (*): not ideal, see comment in filelog.size
3234 3234 # (?): could be "- len(meta)" if the resolved content has
3235 3235 # rename metadata
3236 3236 #
3237 3237 # Checks needed to be done:
3238 3238 # 1. length check: L1 == L2, in all cases.
3239 3239 # 2. hash check: depending on flag processor, we may need to
3240 3240 # use either "text" (external), or "rawtext" (in revlog).
3241 3241
3242 3242 try:
3243 3243 skipflags = state.get(b'skipflags', 0)
3244 3244 if skipflags:
3245 3245 skipflags &= self.flags(rev)
3246 3246
3247 3247 _verify_revision(self, skipflags, state, node)
3248 3248
3249 3249 l1 = self.rawsize(rev)
3250 3250 l2 = len(self.rawdata(node))
3251 3251
3252 3252 if l1 != l2:
3253 3253 yield revlogproblem(
3254 3254 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3255 3255 node=node,
3256 3256 )
3257 3257
3258 3258 except error.CensoredNodeError:
3259 3259 if state[b'erroroncensored']:
3260 3260 yield revlogproblem(
3261 3261 error=_(b'censored file data'), node=node
3262 3262 )
3263 3263 state[b'skipread'].add(node)
3264 3264 except Exception as e:
3265 3265 yield revlogproblem(
3266 3266 error=_(b'unpacking %s: %s')
3267 3267 % (short(node), stringutil.forcebytestr(e)),
3268 3268 node=node,
3269 3269 )
3270 3270 state[b'skipread'].add(node)
3271 3271
3272 3272 def storageinfo(
3273 3273 self,
3274 3274 exclusivefiles=False,
3275 3275 sharedfiles=False,
3276 3276 revisionscount=False,
3277 3277 trackedsize=False,
3278 3278 storedsize=False,
3279 3279 ):
3280 3280 d = {}
3281 3281
3282 3282 if exclusivefiles:
3283 3283 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3284 3284 if not self._inline:
3285 3285 d[b'exclusivefiles'].append((self.opener, self._datafile))
3286 3286
3287 3287 if sharedfiles:
3288 3288 d[b'sharedfiles'] = []
3289 3289
3290 3290 if revisionscount:
3291 3291 d[b'revisionscount'] = len(self)
3292 3292
3293 3293 if trackedsize:
3294 3294 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3295 3295
3296 3296 if storedsize:
3297 3297 d[b'storedsize'] = sum(
3298 3298 self.opener.stat(path).st_size for path in self.files()
3299 3299 )
3300 3300
3301 3301 return d
3302 3302
3303 3303 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3304 3304 if not self.hassidedata:
3305 3305 return
3306 3306 # revlog formats with sidedata support does not support inline
3307 3307 assert not self._inline
3308 3308 if not helpers[1] and not helpers[2]:
3309 3309 # Nothing to generate or remove
3310 3310 return
3311 3311
3312 3312 new_entries = []
3313 3313 # append the new sidedata
3314 3314 with self._writing(transaction):
3315 3315 ifh, dfh, sdfh = self._writinghandles
3316 3316 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3317 3317
3318 3318 current_offset = sdfh.tell()
3319 3319 for rev in range(startrev, endrev + 1):
3320 3320 entry = self.index[rev]
3321 3321 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3322 3322 store=self,
3323 3323 sidedata_helpers=helpers,
3324 3324 sidedata={},
3325 3325 rev=rev,
3326 3326 )
3327 3327
3328 3328 serialized_sidedata = sidedatautil.serialize_sidedata(
3329 3329 new_sidedata
3330 3330 )
3331 3331
3332 3332 sidedata_compression_mode = COMP_MODE_INLINE
3333 3333 if serialized_sidedata and self.hassidedata:
3334 3334 sidedata_compression_mode = COMP_MODE_PLAIN
3335 3335 h, comp_sidedata = self.compress(serialized_sidedata)
3336 3336 if (
3337 3337 h != b'u'
3338 3338 and comp_sidedata[0] != b'\0'
3339 3339 and len(comp_sidedata) < len(serialized_sidedata)
3340 3340 ):
3341 3341 assert not h
3342 3342 if (
3343 3343 comp_sidedata[0]
3344 3344 == self._docket.default_compression_header
3345 3345 ):
3346 3346 sidedata_compression_mode = COMP_MODE_DEFAULT
3347 3347 serialized_sidedata = comp_sidedata
3348 3348 else:
3349 3349 sidedata_compression_mode = COMP_MODE_INLINE
3350 3350 serialized_sidedata = comp_sidedata
3351 3351 if entry[8] != 0 or entry[9] != 0:
3352 3352 # rewriting entries that already have sidedata is not
3353 3353 # supported yet, because it introduces garbage data in the
3354 3354 # revlog.
3355 3355 msg = b"rewriting existing sidedata is not supported yet"
3356 3356 raise error.Abort(msg)
3357 3357
3358 3358 # Apply (potential) flags to add and to remove after running
3359 3359 # the sidedata helpers
3360 3360 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3361 3361 entry_update = (
3362 3362 current_offset,
3363 3363 len(serialized_sidedata),
3364 3364 new_offset_flags,
3365 3365 sidedata_compression_mode,
3366 3366 )
3367 3367
3368 3368 # the sidedata computation might have move the file cursors around
3369 3369 sdfh.seek(current_offset, os.SEEK_SET)
3370 3370 sdfh.write(serialized_sidedata)
3371 3371 new_entries.append(entry_update)
3372 3372 current_offset += len(serialized_sidedata)
3373 3373 self._docket.sidedata_end = sdfh.tell()
3374 3374
3375 3375 # rewrite the new index entries
3376 3376 ifh.seek(startrev * self.index.entry_size)
3377 3377 for i, e in enumerate(new_entries):
3378 3378 rev = startrev + i
3379 3379 self.index.replace_sidedata_info(rev, *e)
3380 3380 packed = self.index.entry_binary(rev)
3381 3381 if rev == 0 and self._docket is None:
3382 3382 header = self._format_flags | self._format_version
3383 3383 header = self.index.pack_header(header)
3384 3384 packed = header + packed
3385 3385 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now