##// END OF EJS Templates
revlog: use the new `entry` function in revlog.py...
marmoute -
r48188:8230f020 default
parent child Browse files
Show More
@@ -1,3400 +1,3401
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 censor,
85 85 deltas as deltautil,
86 86 docket as docketutil,
87 87 flagutil,
88 88 nodemap as nodemaputil,
89 89 revlogv0,
90 90 sidedata as sidedatautil,
91 91 )
92 92 from .utils import (
93 93 storageutil,
94 94 stringutil,
95 95 )
96 96
97 97 # blanked usage of all the name to prevent pyflakes constraints
98 98 # We need these name available in the module for extensions.
99 99
100 100 REVLOGV0
101 101 REVLOGV1
102 102 REVLOGV2
103 103 FLAG_INLINE_DATA
104 104 FLAG_GENERALDELTA
105 105 REVLOG_DEFAULT_FLAGS
106 106 REVLOG_DEFAULT_FORMAT
107 107 REVLOG_DEFAULT_VERSION
108 108 REVLOGV1_FLAGS
109 109 REVLOGV2_FLAGS
110 110 REVIDX_ISCENSORED
111 111 REVIDX_ELLIPSIS
112 112 REVIDX_HASCOPIESINFO
113 113 REVIDX_EXTSTORED
114 114 REVIDX_DEFAULT_FLAGS
115 115 REVIDX_FLAGS_ORDER
116 116 REVIDX_RAWTEXT_CHANGING_FLAGS
117 117
118 118 parsers = policy.importmod('parsers')
119 119 rustancestor = policy.importrust('ancestor')
120 120 rustdagop = policy.importrust('dagop')
121 121 rustrevlog = policy.importrust('revlog')
122 122
123 123 # Aliased for performance.
124 124 _zlibdecompress = zlib.decompress
125 125
126 126 # max size of revlog with inline data
127 127 _maxinline = 131072
128 128 _chunksize = 1048576
129 129
130 130 # Flag processors for REVIDX_ELLIPSIS.
131 131 def ellipsisreadprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsiswriteprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsisrawprocessor(rl, text):
140 140 return False
141 141
142 142
143 143 ellipsisprocessor = (
144 144 ellipsisreadprocessor,
145 145 ellipsiswriteprocessor,
146 146 ellipsisrawprocessor,
147 147 )
148 148
149 149
150 150 def _verify_revision(rl, skipflags, state, node):
151 151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 152 point for extensions to influence the operation."""
153 153 if skipflags:
154 154 state[b'skipread'].add(node)
155 155 else:
156 156 # Side-effect: read content and verify hash.
157 157 rl.revision(node)
158 158
159 159
160 160 # True if a fast implementation for persistent-nodemap is available
161 161 #
162 162 # We also consider we have a "fast" implementation in "pure" python because
163 163 # people using pure don't really have performance consideration (and a
164 164 # wheelbarrow of other slowness source)
165 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 166 parsers, 'BaseIndexObject'
167 167 )
168 168
169 169
170 170 @attr.s(slots=True, frozen=True)
171 171 class _revisioninfo(object):
172 172 """Information about a revision that allows building its fulltext
173 173 node: expected hash of the revision
174 174 p1, p2: parent revs of the revision
175 175 btext: built text cache consisting of a one-element list
176 176 cachedelta: (baserev, uncompressed_delta) or None
177 177 flags: flags associated to the revision storage
178 178
179 179 One of btext[0] or cachedelta must be set.
180 180 """
181 181
182 182 node = attr.ib()
183 183 p1 = attr.ib()
184 184 p2 = attr.ib()
185 185 btext = attr.ib()
186 186 textlen = attr.ib()
187 187 cachedelta = attr.ib()
188 188 flags = attr.ib()
189 189
190 190
191 191 @interfaceutil.implementer(repository.irevisiondelta)
192 192 @attr.s(slots=True)
193 193 class revlogrevisiondelta(object):
194 194 node = attr.ib()
195 195 p1node = attr.ib()
196 196 p2node = attr.ib()
197 197 basenode = attr.ib()
198 198 flags = attr.ib()
199 199 baserevisionsize = attr.ib()
200 200 revision = attr.ib()
201 201 delta = attr.ib()
202 202 sidedata = attr.ib()
203 203 protocol_flags = attr.ib()
204 204 linknode = attr.ib(default=None)
205 205
206 206
207 207 @interfaceutil.implementer(repository.iverifyproblem)
208 208 @attr.s(frozen=True)
209 209 class revlogproblem(object):
210 210 warning = attr.ib(default=None)
211 211 error = attr.ib(default=None)
212 212 node = attr.ib(default=None)
213 213
214 214
215 215 def parse_index_v1(data, inline):
216 216 # call the C implementation to parse the index data
217 217 index, cache = parsers.parse_index2(data, inline)
218 218 return index, cache
219 219
220 220
221 221 def parse_index_v2(data, inline):
222 222 # call the C implementation to parse the index data
223 223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 224 return index, cache
225 225
226 226
227 227 def parse_index_cl_v2(data, inline):
228 228 # call the C implementation to parse the index data
229 229 assert not inline
230 230 from .pure.parsers import parse_index_cl_v2
231 231
232 232 index, cache = parse_index_cl_v2(data)
233 233 return index, cache
234 234
235 235
236 236 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
237 237
238 238 def parse_index_v1_nodemap(data, inline):
239 239 index, cache = parsers.parse_index_devel_nodemap(data, inline)
240 240 return index, cache
241 241
242 242
243 243 else:
244 244 parse_index_v1_nodemap = None
245 245
246 246
247 247 def parse_index_v1_mixed(data, inline):
248 248 index, cache = parse_index_v1(data, inline)
249 249 return rustrevlog.MixedIndex(index), cache
250 250
251 251
252 252 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
253 253 # signed integer)
254 254 _maxentrysize = 0x7FFFFFFF
255 255
256 256 PARTIAL_READ_MSG = _(
257 257 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
258 258 )
259 259
260 260 FILE_TOO_SHORT_MSG = _(
261 261 b'cannot read from revlog %s;'
262 262 b' expected %d bytes from offset %d, data size is %d'
263 263 )
264 264
265 265
266 266 class revlog(object):
267 267 """
268 268 the underlying revision storage object
269 269
270 270 A revlog consists of two parts, an index and the revision data.
271 271
272 272 The index is a file with a fixed record size containing
273 273 information on each revision, including its nodeid (hash), the
274 274 nodeids of its parents, the position and offset of its data within
275 275 the data file, and the revision it's based on. Finally, each entry
276 276 contains a linkrev entry that can serve as a pointer to external
277 277 data.
278 278
279 279 The revision data itself is a linear collection of data chunks.
280 280 Each chunk represents a revision and is usually represented as a
281 281 delta against the previous chunk. To bound lookup time, runs of
282 282 deltas are limited to about 2 times the length of the original
283 283 version data. This makes retrieval of a version proportional to
284 284 its size, or O(1) relative to the number of revisions.
285 285
286 286 Both pieces of the revlog are written to in an append-only
287 287 fashion, which means we never need to rewrite a file to insert or
288 288 remove data, and can use some simple techniques to avoid the need
289 289 for locking while reading.
290 290
291 291 If checkambig, indexfile is opened with checkambig=True at
292 292 writing, to avoid file stat ambiguity.
293 293
294 294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
295 295 index will be mmapped rather than read if it is larger than the
296 296 configured threshold.
297 297
298 298 If censorable is True, the revlog can have censored revisions.
299 299
300 300 If `upperboundcomp` is not None, this is the expected maximal gain from
301 301 compression for the data content.
302 302
303 303 `concurrencychecker` is an optional function that receives 3 arguments: a
304 304 file handle, a filename, and an expected position. It should check whether
305 305 the current position in the file handle is valid, and log/warn/fail (by
306 306 raising).
307 307
308 308 See mercurial/revlogutils/contants.py for details about the content of an
309 309 index entry.
310 310 """
311 311
312 312 _flagserrorclass = error.RevlogError
313 313
314 314 def __init__(
315 315 self,
316 316 opener,
317 317 target,
318 318 radix,
319 319 postfix=None, # only exist for `tmpcensored` now
320 320 checkambig=False,
321 321 mmaplargeindex=False,
322 322 censorable=False,
323 323 upperboundcomp=None,
324 324 persistentnodemap=False,
325 325 concurrencychecker=None,
326 326 trypending=False,
327 327 ):
328 328 """
329 329 create a revlog object
330 330
331 331 opener is a function that abstracts the file opening operation
332 332 and can be used to implement COW semantics or the like.
333 333
334 334 `target`: a (KIND, ID) tuple that identify the content stored in
335 335 this revlog. It help the rest of the code to understand what the revlog
336 336 is about without having to resort to heuristic and index filename
337 337 analysis. Note: that this must be reliably be set by normal code, but
338 338 that test, debug, or performance measurement code might not set this to
339 339 accurate value.
340 340 """
341 341 self.upperboundcomp = upperboundcomp
342 342
343 343 self.radix = radix
344 344
345 345 self._docket_file = None
346 346 self._indexfile = None
347 347 self._datafile = None
348 348 self._sidedatafile = None
349 349 self._nodemap_file = None
350 350 self.postfix = postfix
351 351 self._trypending = trypending
352 352 self.opener = opener
353 353 if persistentnodemap:
354 354 self._nodemap_file = nodemaputil.get_nodemap_file(self)
355 355
356 356 assert target[0] in ALL_KINDS
357 357 assert len(target) == 2
358 358 self.target = target
359 359 # When True, indexfile is opened with checkambig=True at writing, to
360 360 # avoid file stat ambiguity.
361 361 self._checkambig = checkambig
362 362 self._mmaplargeindex = mmaplargeindex
363 363 self._censorable = censorable
364 364 # 3-tuple of (node, rev, text) for a raw revision.
365 365 self._revisioncache = None
366 366 # Maps rev to chain base rev.
367 367 self._chainbasecache = util.lrucachedict(100)
368 368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 369 self._chunkcache = (0, b'')
370 370 # How much data to read and cache into the raw revlog data cache.
371 371 self._chunkcachesize = 65536
372 372 self._maxchainlen = None
373 373 self._deltabothparents = True
374 374 self.index = None
375 375 self._docket = None
376 376 self._nodemap_docket = None
377 377 # Mapping of partial identifiers to full nodes.
378 378 self._pcache = {}
379 379 # Mapping of revision integer to full node.
380 380 self._compengine = b'zlib'
381 381 self._compengineopts = {}
382 382 self._maxdeltachainspan = -1
383 383 self._withsparseread = False
384 384 self._sparserevlog = False
385 385 self.hassidedata = False
386 386 self._srdensitythreshold = 0.50
387 387 self._srmingapsize = 262144
388 388
389 389 # Make copy of flag processors so each revlog instance can support
390 390 # custom flags.
391 391 self._flagprocessors = dict(flagutil.flagprocessors)
392 392
393 393 # 3-tuple of file handles being used for active writing.
394 394 self._writinghandles = None
395 395 # prevent nesting of addgroup
396 396 self._adding_group = None
397 397
398 398 self._loadindex()
399 399
400 400 self._concurrencychecker = concurrencychecker
401 401
402 402 def _init_opts(self):
403 403 """process options (from above/config) to setup associated default revlog mode
404 404
405 405 These values might be affected when actually reading on disk information.
406 406
407 407 The relevant values are returned for use in _loadindex().
408 408
409 409 * newversionflags:
410 410 version header to use if we need to create a new revlog
411 411
412 412 * mmapindexthreshold:
413 413 minimal index size for start to use mmap
414 414
415 415 * force_nodemap:
416 416 force the usage of a "development" version of the nodemap code
417 417 """
418 418 mmapindexthreshold = None
419 419 opts = self.opener.options
420 420
421 421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
422 422 new_header = CHANGELOGV2
423 423 elif b'revlogv2' in opts:
424 424 new_header = REVLOGV2
425 425 elif b'revlogv1' in opts:
426 426 new_header = REVLOGV1 | FLAG_INLINE_DATA
427 427 if b'generaldelta' in opts:
428 428 new_header |= FLAG_GENERALDELTA
429 429 elif b'revlogv0' in self.opener.options:
430 430 new_header = REVLOGV0
431 431 else:
432 432 new_header = REVLOG_DEFAULT_VERSION
433 433
434 434 if b'chunkcachesize' in opts:
435 435 self._chunkcachesize = opts[b'chunkcachesize']
436 436 if b'maxchainlen' in opts:
437 437 self._maxchainlen = opts[b'maxchainlen']
438 438 if b'deltabothparents' in opts:
439 439 self._deltabothparents = opts[b'deltabothparents']
440 440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 441 self._lazydeltabase = False
442 442 if self._lazydelta:
443 443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 444 if b'compengine' in opts:
445 445 self._compengine = opts[b'compengine']
446 446 if b'zlib.level' in opts:
447 447 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
448 448 if b'zstd.level' in opts:
449 449 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
450 450 if b'maxdeltachainspan' in opts:
451 451 self._maxdeltachainspan = opts[b'maxdeltachainspan']
452 452 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
453 453 mmapindexthreshold = opts[b'mmapindexthreshold']
454 454 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
455 455 withsparseread = bool(opts.get(b'with-sparse-read', False))
456 456 # sparse-revlog forces sparse-read
457 457 self._withsparseread = self._sparserevlog or withsparseread
458 458 if b'sparse-read-density-threshold' in opts:
459 459 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
460 460 if b'sparse-read-min-gap-size' in opts:
461 461 self._srmingapsize = opts[b'sparse-read-min-gap-size']
462 462 if opts.get(b'enableellipsis'):
463 463 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
464 464
465 465 # revlog v0 doesn't have flag processors
466 466 for flag, processor in pycompat.iteritems(
467 467 opts.get(b'flagprocessors', {})
468 468 ):
469 469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470 470
471 471 if self._chunkcachesize <= 0:
472 472 raise error.RevlogError(
473 473 _(b'revlog chunk cache size %r is not greater than 0')
474 474 % self._chunkcachesize
475 475 )
476 476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 477 raise error.RevlogError(
478 478 _(b'revlog chunk cache size %r is not a power of 2')
479 479 % self._chunkcachesize
480 480 )
481 481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 482 return new_header, mmapindexthreshold, force_nodemap
483 483
484 484 def _get_data(self, filepath, mmap_threshold, size=None):
485 485 """return a file content with or without mmap
486 486
487 487 If the file is missing return the empty string"""
488 488 try:
489 489 with self.opener(filepath) as fp:
490 490 if mmap_threshold is not None:
491 491 file_size = self.opener.fstat(fp).st_size
492 492 if file_size >= mmap_threshold:
493 493 if size is not None:
494 494 # avoid potentiel mmap crash
495 495 size = min(file_size, size)
496 496 # TODO: should .close() to release resources without
497 497 # relying on Python GC
498 498 if size is None:
499 499 return util.buffer(util.mmapread(fp))
500 500 else:
501 501 return util.buffer(util.mmapread(fp, size))
502 502 if size is None:
503 503 return fp.read()
504 504 else:
505 505 return fp.read(size)
506 506 except IOError as inst:
507 507 if inst.errno != errno.ENOENT:
508 508 raise
509 509 return b''
510 510
511 511 def _loadindex(self):
512 512
513 513 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
514 514
515 515 if self.postfix is not None:
516 516 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
517 517 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
518 518 entry_point = b'%s.i.a' % self.radix
519 519 else:
520 520 entry_point = b'%s.i' % self.radix
521 521
522 522 entry_data = b''
523 523 self._initempty = True
524 524 entry_data = self._get_data(entry_point, mmapindexthreshold)
525 525 if len(entry_data) > 0:
526 526 header = INDEX_HEADER.unpack(entry_data[:4])[0]
527 527 self._initempty = False
528 528 else:
529 529 header = new_header
530 530
531 531 self._format_flags = header & ~0xFFFF
532 532 self._format_version = header & 0xFFFF
533 533
534 534 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
535 535 if supported_flags is None:
536 536 msg = _(b'unknown version (%d) in revlog %s')
537 537 msg %= (self._format_version, self.display_id)
538 538 raise error.RevlogError(msg)
539 539 elif self._format_flags & ~supported_flags:
540 540 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
541 541 display_flag = self._format_flags >> 16
542 542 msg %= (display_flag, self._format_version, self.display_id)
543 543 raise error.RevlogError(msg)
544 544
545 545 features = FEATURES_BY_VERSION[self._format_version]
546 546 self._inline = features[b'inline'](self._format_flags)
547 547 self._generaldelta = features[b'generaldelta'](self._format_flags)
548 548 self.hassidedata = features[b'sidedata']
549 549
550 550 if not features[b'docket']:
551 551 self._indexfile = entry_point
552 552 index_data = entry_data
553 553 else:
554 554 self._docket_file = entry_point
555 555 if self._initempty:
556 556 self._docket = docketutil.default_docket(self, header)
557 557 else:
558 558 self._docket = docketutil.parse_docket(
559 559 self, entry_data, use_pending=self._trypending
560 560 )
561 561 self._indexfile = self._docket.index_filepath()
562 562 index_data = b''
563 563 index_size = self._docket.index_end
564 564 if index_size > 0:
565 565 index_data = self._get_data(
566 566 self._indexfile, mmapindexthreshold, size=index_size
567 567 )
568 568 if len(index_data) < index_size:
569 569 msg = _(b'too few index data for %s: got %d, expected %d')
570 570 msg %= (self.display_id, len(index_data), index_size)
571 571 raise error.RevlogError(msg)
572 572
573 573 self._inline = False
574 574 # generaldelta implied by version 2 revlogs.
575 575 self._generaldelta = True
576 576 # the logic for persistent nodemap will be dealt with within the
577 577 # main docket, so disable it for now.
578 578 self._nodemap_file = None
579 579
580 580 if self._docket is not None:
581 581 self._datafile = self._docket.data_filepath()
582 582 self._sidedatafile = self._docket.sidedata_filepath()
583 583 elif self.postfix is None:
584 584 self._datafile = b'%s.d' % self.radix
585 585 else:
586 586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
587 587
588 588 self.nodeconstants = sha1nodeconstants
589 589 self.nullid = self.nodeconstants.nullid
590 590
591 591 # sparse-revlog can't be on without general-delta (issue6056)
592 592 if not self._generaldelta:
593 593 self._sparserevlog = False
594 594
595 595 self._storedeltachains = True
596 596
597 597 devel_nodemap = (
598 598 self._nodemap_file
599 599 and force_nodemap
600 600 and parse_index_v1_nodemap is not None
601 601 )
602 602
603 603 use_rust_index = False
604 604 if rustrevlog is not None:
605 605 if self._nodemap_file is not None:
606 606 use_rust_index = True
607 607 else:
608 608 use_rust_index = self.opener.options.get(b'rust.index')
609 609
610 610 self._parse_index = parse_index_v1
611 611 if self._format_version == REVLOGV0:
612 612 self._parse_index = revlogv0.parse_index_v0
613 613 elif self._format_version == REVLOGV2:
614 614 self._parse_index = parse_index_v2
615 615 elif self._format_version == CHANGELOGV2:
616 616 self._parse_index = parse_index_cl_v2
617 617 elif devel_nodemap:
618 618 self._parse_index = parse_index_v1_nodemap
619 619 elif use_rust_index:
620 620 self._parse_index = parse_index_v1_mixed
621 621 try:
622 622 d = self._parse_index(index_data, self._inline)
623 623 index, _chunkcache = d
624 624 use_nodemap = (
625 625 not self._inline
626 626 and self._nodemap_file is not None
627 627 and util.safehasattr(index, 'update_nodemap_data')
628 628 )
629 629 if use_nodemap:
630 630 nodemap_data = nodemaputil.persisted_data(self)
631 631 if nodemap_data is not None:
632 632 docket = nodemap_data[0]
633 633 if (
634 634 len(d[0]) > docket.tip_rev
635 635 and d[0][docket.tip_rev][7] == docket.tip_node
636 636 ):
637 637 # no changelog tampering
638 638 self._nodemap_docket = docket
639 639 index.update_nodemap_data(*nodemap_data)
640 640 except (ValueError, IndexError):
641 641 raise error.RevlogError(
642 642 _(b"index %s is corrupted") % self.display_id
643 643 )
644 644 self.index, self._chunkcache = d
645 645 if not self._chunkcache:
646 646 self._chunkclear()
647 647 # revnum -> (chain-length, sum-delta-length)
648 648 self._chaininfocache = util.lrucachedict(500)
649 649 # revlog header -> revlog compressor
650 650 self._decompressors = {}
651 651
652 652 @util.propertycache
653 653 def revlog_kind(self):
654 654 return self.target[0]
655 655
656 656 @util.propertycache
657 657 def display_id(self):
658 658 """The public facing "ID" of the revlog that we use in message"""
659 659 # Maybe we should build a user facing representation of
660 660 # revlog.target instead of using `self.radix`
661 661 return self.radix
662 662
663 663 def _get_decompressor(self, t):
664 664 try:
665 665 compressor = self._decompressors[t]
666 666 except KeyError:
667 667 try:
668 668 engine = util.compengines.forrevlogheader(t)
669 669 compressor = engine.revlogcompressor(self._compengineopts)
670 670 self._decompressors[t] = compressor
671 671 except KeyError:
672 672 raise error.RevlogError(
673 673 _(b'unknown compression type %s') % binascii.hexlify(t)
674 674 )
675 675 return compressor
676 676
677 677 @util.propertycache
678 678 def _compressor(self):
679 679 engine = util.compengines[self._compengine]
680 680 return engine.revlogcompressor(self._compengineopts)
681 681
682 682 @util.propertycache
683 683 def _decompressor(self):
684 684 """the default decompressor"""
685 685 if self._docket is None:
686 686 return None
687 687 t = self._docket.default_compression_header
688 688 c = self._get_decompressor(t)
689 689 return c.decompress
690 690
691 691 def _indexfp(self):
692 692 """file object for the revlog's index file"""
693 693 return self.opener(self._indexfile, mode=b"r")
694 694
695 695 def __index_write_fp(self):
696 696 # You should not use this directly and use `_writing` instead
697 697 try:
698 698 f = self.opener(
699 699 self._indexfile, mode=b"r+", checkambig=self._checkambig
700 700 )
701 701 if self._docket is None:
702 702 f.seek(0, os.SEEK_END)
703 703 else:
704 704 f.seek(self._docket.index_end, os.SEEK_SET)
705 705 return f
706 706 except IOError as inst:
707 707 if inst.errno != errno.ENOENT:
708 708 raise
709 709 return self.opener(
710 710 self._indexfile, mode=b"w+", checkambig=self._checkambig
711 711 )
712 712
713 713 def __index_new_fp(self):
714 714 # You should not use this unless you are upgrading from inline revlog
715 715 return self.opener(
716 716 self._indexfile,
717 717 mode=b"w",
718 718 checkambig=self._checkambig,
719 719 atomictemp=True,
720 720 )
721 721
722 722 def _datafp(self, mode=b'r'):
723 723 """file object for the revlog's data file"""
724 724 return self.opener(self._datafile, mode=mode)
725 725
726 726 @contextlib.contextmanager
727 727 def _datareadfp(self, existingfp=None):
728 728 """file object suitable to read data"""
729 729 # Use explicit file handle, if given.
730 730 if existingfp is not None:
731 731 yield existingfp
732 732
733 733 # Use a file handle being actively used for writes, if available.
734 734 # There is some danger to doing this because reads will seek the
735 735 # file. However, _writeentry() performs a SEEK_END before all writes,
736 736 # so we should be safe.
737 737 elif self._writinghandles:
738 738 if self._inline:
739 739 yield self._writinghandles[0]
740 740 else:
741 741 yield self._writinghandles[1]
742 742
743 743 # Otherwise open a new file handle.
744 744 else:
745 745 if self._inline:
746 746 func = self._indexfp
747 747 else:
748 748 func = self._datafp
749 749 with func() as fp:
750 750 yield fp
751 751
752 752 @contextlib.contextmanager
753 753 def _sidedatareadfp(self):
754 754 """file object suitable to read sidedata"""
755 755 if self._writinghandles:
756 756 yield self._writinghandles[2]
757 757 else:
758 758 with self.opener(self._sidedatafile) as fp:
759 759 yield fp
760 760
761 761 def tiprev(self):
762 762 return len(self.index) - 1
763 763
764 764 def tip(self):
765 765 return self.node(self.tiprev())
766 766
767 767 def __contains__(self, rev):
768 768 return 0 <= rev < len(self)
769 769
770 770 def __len__(self):
771 771 return len(self.index)
772 772
773 773 def __iter__(self):
774 774 return iter(pycompat.xrange(len(self)))
775 775
776 776 def revs(self, start=0, stop=None):
777 777 """iterate over all rev in this revlog (from start to stop)"""
778 778 return storageutil.iterrevs(len(self), start=start, stop=stop)
779 779
780 780 @property
781 781 def nodemap(self):
782 782 msg = (
783 783 b"revlog.nodemap is deprecated, "
784 784 b"use revlog.index.[has_node|rev|get_rev]"
785 785 )
786 786 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
787 787 return self.index.nodemap
788 788
789 789 @property
790 790 def _nodecache(self):
791 791 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
792 792 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
793 793 return self.index.nodemap
794 794
795 795 def hasnode(self, node):
796 796 try:
797 797 self.rev(node)
798 798 return True
799 799 except KeyError:
800 800 return False
801 801
802 802 def candelta(self, baserev, rev):
803 803 """whether two revisions (baserev, rev) can be delta-ed or not"""
804 804 # Disable delta if either rev requires a content-changing flag
805 805 # processor (ex. LFS). This is because such flag processor can alter
806 806 # the rawtext content that the delta will be based on, and two clients
807 807 # could have a same revlog node with different flags (i.e. different
808 808 # rawtext contents) and the delta could be incompatible.
809 809 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
810 810 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
811 811 ):
812 812 return False
813 813 return True
814 814
815 815 def update_caches(self, transaction):
816 816 if self._nodemap_file is not None:
817 817 if transaction is None:
818 818 nodemaputil.update_persistent_nodemap(self)
819 819 else:
820 820 nodemaputil.setup_persistent_nodemap(transaction, self)
821 821
822 822 def clearcaches(self):
823 823 self._revisioncache = None
824 824 self._chainbasecache.clear()
825 825 self._chunkcache = (0, b'')
826 826 self._pcache = {}
827 827 self._nodemap_docket = None
828 828 self.index.clearcaches()
829 829 # The python code is the one responsible for validating the docket, we
830 830 # end up having to refresh it here.
831 831 use_nodemap = (
832 832 not self._inline
833 833 and self._nodemap_file is not None
834 834 and util.safehasattr(self.index, 'update_nodemap_data')
835 835 )
836 836 if use_nodemap:
837 837 nodemap_data = nodemaputil.persisted_data(self)
838 838 if nodemap_data is not None:
839 839 self._nodemap_docket = nodemap_data[0]
840 840 self.index.update_nodemap_data(*nodemap_data)
841 841
842 842 def rev(self, node):
843 843 try:
844 844 return self.index.rev(node)
845 845 except TypeError:
846 846 raise
847 847 except error.RevlogError:
848 848 # parsers.c radix tree lookup failed
849 849 if (
850 850 node == self.nodeconstants.wdirid
851 851 or node in self.nodeconstants.wdirfilenodeids
852 852 ):
853 853 raise error.WdirUnsupported
854 854 raise error.LookupError(node, self.display_id, _(b'no node'))
855 855
856 856 # Accessors for index entries.
857 857
858 858 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
859 859 # are flags.
860 860 def start(self, rev):
861 861 return int(self.index[rev][0] >> 16)
862 862
863 863 def sidedata_cut_off(self, rev):
864 864 sd_cut_off = self.index[rev][8]
865 865 if sd_cut_off != 0:
866 866 return sd_cut_off
867 867 # This is some annoying dance, because entries without sidedata
868 868 # currently use 0 as their ofsset. (instead of previous-offset +
869 869 # previous-size)
870 870 #
871 871 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
872 872 # In the meantime, we need this.
873 873 while 0 <= rev:
874 874 e = self.index[rev]
875 875 if e[9] != 0:
876 876 return e[8] + e[9]
877 877 rev -= 1
878 878 return 0
879 879
880 880 def flags(self, rev):
881 881 return self.index[rev][0] & 0xFFFF
882 882
883 883 def length(self, rev):
884 884 return self.index[rev][1]
885 885
886 886 def sidedata_length(self, rev):
887 887 if not self.hassidedata:
888 888 return 0
889 889 return self.index[rev][9]
890 890
891 891 def rawsize(self, rev):
892 892 """return the length of the uncompressed text for a given revision"""
893 893 l = self.index[rev][2]
894 894 if l >= 0:
895 895 return l
896 896
897 897 t = self.rawdata(rev)
898 898 return len(t)
899 899
900 900 def size(self, rev):
901 901 """length of non-raw text (processed by a "read" flag processor)"""
902 902 # fast path: if no "read" flag processor could change the content,
903 903 # size is rawsize. note: ELLIPSIS is known to not change the content.
904 904 flags = self.flags(rev)
905 905 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
906 906 return self.rawsize(rev)
907 907
908 908 return len(self.revision(rev, raw=False))
909 909
910 910 def chainbase(self, rev):
911 911 base = self._chainbasecache.get(rev)
912 912 if base is not None:
913 913 return base
914 914
915 915 index = self.index
916 916 iterrev = rev
917 917 base = index[iterrev][3]
918 918 while base != iterrev:
919 919 iterrev = base
920 920 base = index[iterrev][3]
921 921
922 922 self._chainbasecache[rev] = base
923 923 return base
924 924
925 925 def linkrev(self, rev):
926 926 return self.index[rev][4]
927 927
928 928 def parentrevs(self, rev):
929 929 try:
930 930 entry = self.index[rev]
931 931 except IndexError:
932 932 if rev == wdirrev:
933 933 raise error.WdirUnsupported
934 934 raise
935 935 if entry[5] == nullrev:
936 936 return entry[6], entry[5]
937 937 else:
938 938 return entry[5], entry[6]
939 939
940 940 # fast parentrevs(rev) where rev isn't filtered
941 941 _uncheckedparentrevs = parentrevs
942 942
943 943 def node(self, rev):
944 944 try:
945 945 return self.index[rev][7]
946 946 except IndexError:
947 947 if rev == wdirrev:
948 948 raise error.WdirUnsupported
949 949 raise
950 950
951 951 # Derived from index values.
952 952
953 953 def end(self, rev):
954 954 return self.start(rev) + self.length(rev)
955 955
956 956 def parents(self, node):
957 957 i = self.index
958 958 d = i[self.rev(node)]
959 959 # inline node() to avoid function call overhead
960 960 if d[5] == self.nullid:
961 961 return i[d[6]][7], i[d[5]][7]
962 962 else:
963 963 return i[d[5]][7], i[d[6]][7]
964 964
965 965 def chainlen(self, rev):
966 966 return self._chaininfo(rev)[0]
967 967
968 968 def _chaininfo(self, rev):
969 969 chaininfocache = self._chaininfocache
970 970 if rev in chaininfocache:
971 971 return chaininfocache[rev]
972 972 index = self.index
973 973 generaldelta = self._generaldelta
974 974 iterrev = rev
975 975 e = index[iterrev]
976 976 clen = 0
977 977 compresseddeltalen = 0
978 978 while iterrev != e[3]:
979 979 clen += 1
980 980 compresseddeltalen += e[1]
981 981 if generaldelta:
982 982 iterrev = e[3]
983 983 else:
984 984 iterrev -= 1
985 985 if iterrev in chaininfocache:
986 986 t = chaininfocache[iterrev]
987 987 clen += t[0]
988 988 compresseddeltalen += t[1]
989 989 break
990 990 e = index[iterrev]
991 991 else:
992 992 # Add text length of base since decompressing that also takes
993 993 # work. For cache hits the length is already included.
994 994 compresseddeltalen += e[1]
995 995 r = (clen, compresseddeltalen)
996 996 chaininfocache[rev] = r
997 997 return r
998 998
999 999 def _deltachain(self, rev, stoprev=None):
1000 1000 """Obtain the delta chain for a revision.
1001 1001
1002 1002 ``stoprev`` specifies a revision to stop at. If not specified, we
1003 1003 stop at the base of the chain.
1004 1004
1005 1005 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1006 1006 revs in ascending order and ``stopped`` is a bool indicating whether
1007 1007 ``stoprev`` was hit.
1008 1008 """
1009 1009 # Try C implementation.
1010 1010 try:
1011 1011 return self.index.deltachain(rev, stoprev, self._generaldelta)
1012 1012 except AttributeError:
1013 1013 pass
1014 1014
1015 1015 chain = []
1016 1016
1017 1017 # Alias to prevent attribute lookup in tight loop.
1018 1018 index = self.index
1019 1019 generaldelta = self._generaldelta
1020 1020
1021 1021 iterrev = rev
1022 1022 e = index[iterrev]
1023 1023 while iterrev != e[3] and iterrev != stoprev:
1024 1024 chain.append(iterrev)
1025 1025 if generaldelta:
1026 1026 iterrev = e[3]
1027 1027 else:
1028 1028 iterrev -= 1
1029 1029 e = index[iterrev]
1030 1030
1031 1031 if iterrev == stoprev:
1032 1032 stopped = True
1033 1033 else:
1034 1034 chain.append(iterrev)
1035 1035 stopped = False
1036 1036
1037 1037 chain.reverse()
1038 1038 return chain, stopped
1039 1039
1040 1040 def ancestors(self, revs, stoprev=0, inclusive=False):
1041 1041 """Generate the ancestors of 'revs' in reverse revision order.
1042 1042 Does not generate revs lower than stoprev.
1043 1043
1044 1044 See the documentation for ancestor.lazyancestors for more details."""
1045 1045
1046 1046 # first, make sure start revisions aren't filtered
1047 1047 revs = list(revs)
1048 1048 checkrev = self.node
1049 1049 for r in revs:
1050 1050 checkrev(r)
1051 1051 # and we're sure ancestors aren't filtered as well
1052 1052
1053 1053 if rustancestor is not None and self.index.rust_ext_compat:
1054 1054 lazyancestors = rustancestor.LazyAncestors
1055 1055 arg = self.index
1056 1056 else:
1057 1057 lazyancestors = ancestor.lazyancestors
1058 1058 arg = self._uncheckedparentrevs
1059 1059 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1060 1060
1061 1061 def descendants(self, revs):
1062 1062 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1063 1063
1064 1064 def findcommonmissing(self, common=None, heads=None):
1065 1065 """Return a tuple of the ancestors of common and the ancestors of heads
1066 1066 that are not ancestors of common. In revset terminology, we return the
1067 1067 tuple:
1068 1068
1069 1069 ::common, (::heads) - (::common)
1070 1070
1071 1071 The list is sorted by revision number, meaning it is
1072 1072 topologically sorted.
1073 1073
1074 1074 'heads' and 'common' are both lists of node IDs. If heads is
1075 1075 not supplied, uses all of the revlog's heads. If common is not
1076 1076 supplied, uses nullid."""
1077 1077 if common is None:
1078 1078 common = [self.nullid]
1079 1079 if heads is None:
1080 1080 heads = self.heads()
1081 1081
1082 1082 common = [self.rev(n) for n in common]
1083 1083 heads = [self.rev(n) for n in heads]
1084 1084
1085 1085 # we want the ancestors, but inclusive
1086 1086 class lazyset(object):
1087 1087 def __init__(self, lazyvalues):
1088 1088 self.addedvalues = set()
1089 1089 self.lazyvalues = lazyvalues
1090 1090
1091 1091 def __contains__(self, value):
1092 1092 return value in self.addedvalues or value in self.lazyvalues
1093 1093
1094 1094 def __iter__(self):
1095 1095 added = self.addedvalues
1096 1096 for r in added:
1097 1097 yield r
1098 1098 for r in self.lazyvalues:
1099 1099 if not r in added:
1100 1100 yield r
1101 1101
1102 1102 def add(self, value):
1103 1103 self.addedvalues.add(value)
1104 1104
1105 1105 def update(self, values):
1106 1106 self.addedvalues.update(values)
1107 1107
1108 1108 has = lazyset(self.ancestors(common))
1109 1109 has.add(nullrev)
1110 1110 has.update(common)
1111 1111
1112 1112 # take all ancestors from heads that aren't in has
1113 1113 missing = set()
1114 1114 visit = collections.deque(r for r in heads if r not in has)
1115 1115 while visit:
1116 1116 r = visit.popleft()
1117 1117 if r in missing:
1118 1118 continue
1119 1119 else:
1120 1120 missing.add(r)
1121 1121 for p in self.parentrevs(r):
1122 1122 if p not in has:
1123 1123 visit.append(p)
1124 1124 missing = list(missing)
1125 1125 missing.sort()
1126 1126 return has, [self.node(miss) for miss in missing]
1127 1127
1128 1128 def incrementalmissingrevs(self, common=None):
1129 1129 """Return an object that can be used to incrementally compute the
1130 1130 revision numbers of the ancestors of arbitrary sets that are not
1131 1131 ancestors of common. This is an ancestor.incrementalmissingancestors
1132 1132 object.
1133 1133
1134 1134 'common' is a list of revision numbers. If common is not supplied, uses
1135 1135 nullrev.
1136 1136 """
1137 1137 if common is None:
1138 1138 common = [nullrev]
1139 1139
1140 1140 if rustancestor is not None and self.index.rust_ext_compat:
1141 1141 return rustancestor.MissingAncestors(self.index, common)
1142 1142 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1143 1143
1144 1144 def findmissingrevs(self, common=None, heads=None):
1145 1145 """Return the revision numbers of the ancestors of heads that
1146 1146 are not ancestors of common.
1147 1147
1148 1148 More specifically, return a list of revision numbers corresponding to
1149 1149 nodes N such that every N satisfies the following constraints:
1150 1150
1151 1151 1. N is an ancestor of some node in 'heads'
1152 1152 2. N is not an ancestor of any node in 'common'
1153 1153
1154 1154 The list is sorted by revision number, meaning it is
1155 1155 topologically sorted.
1156 1156
1157 1157 'heads' and 'common' are both lists of revision numbers. If heads is
1158 1158 not supplied, uses all of the revlog's heads. If common is not
1159 1159 supplied, uses nullid."""
1160 1160 if common is None:
1161 1161 common = [nullrev]
1162 1162 if heads is None:
1163 1163 heads = self.headrevs()
1164 1164
1165 1165 inc = self.incrementalmissingrevs(common=common)
1166 1166 return inc.missingancestors(heads)
1167 1167
1168 1168 def findmissing(self, common=None, heads=None):
1169 1169 """Return the ancestors of heads that are not ancestors of common.
1170 1170
1171 1171 More specifically, return a list of nodes N such that every N
1172 1172 satisfies the following constraints:
1173 1173
1174 1174 1. N is an ancestor of some node in 'heads'
1175 1175 2. N is not an ancestor of any node in 'common'
1176 1176
1177 1177 The list is sorted by revision number, meaning it is
1178 1178 topologically sorted.
1179 1179
1180 1180 'heads' and 'common' are both lists of node IDs. If heads is
1181 1181 not supplied, uses all of the revlog's heads. If common is not
1182 1182 supplied, uses nullid."""
1183 1183 if common is None:
1184 1184 common = [self.nullid]
1185 1185 if heads is None:
1186 1186 heads = self.heads()
1187 1187
1188 1188 common = [self.rev(n) for n in common]
1189 1189 heads = [self.rev(n) for n in heads]
1190 1190
1191 1191 inc = self.incrementalmissingrevs(common=common)
1192 1192 return [self.node(r) for r in inc.missingancestors(heads)]
1193 1193
1194 1194 def nodesbetween(self, roots=None, heads=None):
1195 1195 """Return a topological path from 'roots' to 'heads'.
1196 1196
1197 1197 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1198 1198 topologically sorted list of all nodes N that satisfy both of
1199 1199 these constraints:
1200 1200
1201 1201 1. N is a descendant of some node in 'roots'
1202 1202 2. N is an ancestor of some node in 'heads'
1203 1203
1204 1204 Every node is considered to be both a descendant and an ancestor
1205 1205 of itself, so every reachable node in 'roots' and 'heads' will be
1206 1206 included in 'nodes'.
1207 1207
1208 1208 'outroots' is the list of reachable nodes in 'roots', i.e., the
1209 1209 subset of 'roots' that is returned in 'nodes'. Likewise,
1210 1210 'outheads' is the subset of 'heads' that is also in 'nodes'.
1211 1211
1212 1212 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1213 1213 unspecified, uses nullid as the only root. If 'heads' is
1214 1214 unspecified, uses list of all of the revlog's heads."""
1215 1215 nonodes = ([], [], [])
1216 1216 if roots is not None:
1217 1217 roots = list(roots)
1218 1218 if not roots:
1219 1219 return nonodes
1220 1220 lowestrev = min([self.rev(n) for n in roots])
1221 1221 else:
1222 1222 roots = [self.nullid] # Everybody's a descendant of nullid
1223 1223 lowestrev = nullrev
1224 1224 if (lowestrev == nullrev) and (heads is None):
1225 1225 # We want _all_ the nodes!
1226 1226 return (
1227 1227 [self.node(r) for r in self],
1228 1228 [self.nullid],
1229 1229 list(self.heads()),
1230 1230 )
1231 1231 if heads is None:
1232 1232 # All nodes are ancestors, so the latest ancestor is the last
1233 1233 # node.
1234 1234 highestrev = len(self) - 1
1235 1235 # Set ancestors to None to signal that every node is an ancestor.
1236 1236 ancestors = None
1237 1237 # Set heads to an empty dictionary for later discovery of heads
1238 1238 heads = {}
1239 1239 else:
1240 1240 heads = list(heads)
1241 1241 if not heads:
1242 1242 return nonodes
1243 1243 ancestors = set()
1244 1244 # Turn heads into a dictionary so we can remove 'fake' heads.
1245 1245 # Also, later we will be using it to filter out the heads we can't
1246 1246 # find from roots.
1247 1247 heads = dict.fromkeys(heads, False)
1248 1248 # Start at the top and keep marking parents until we're done.
1249 1249 nodestotag = set(heads)
1250 1250 # Remember where the top was so we can use it as a limit later.
1251 1251 highestrev = max([self.rev(n) for n in nodestotag])
1252 1252 while nodestotag:
1253 1253 # grab a node to tag
1254 1254 n = nodestotag.pop()
1255 1255 # Never tag nullid
1256 1256 if n == self.nullid:
1257 1257 continue
1258 1258 # A node's revision number represents its place in a
1259 1259 # topologically sorted list of nodes.
1260 1260 r = self.rev(n)
1261 1261 if r >= lowestrev:
1262 1262 if n not in ancestors:
1263 1263 # If we are possibly a descendant of one of the roots
1264 1264 # and we haven't already been marked as an ancestor
1265 1265 ancestors.add(n) # Mark as ancestor
1266 1266 # Add non-nullid parents to list of nodes to tag.
1267 1267 nodestotag.update(
1268 1268 [p for p in self.parents(n) if p != self.nullid]
1269 1269 )
1270 1270 elif n in heads: # We've seen it before, is it a fake head?
1271 1271 # So it is, real heads should not be the ancestors of
1272 1272 # any other heads.
1273 1273 heads.pop(n)
1274 1274 if not ancestors:
1275 1275 return nonodes
1276 1276 # Now that we have our set of ancestors, we want to remove any
1277 1277 # roots that are not ancestors.
1278 1278
1279 1279 # If one of the roots was nullid, everything is included anyway.
1280 1280 if lowestrev > nullrev:
1281 1281 # But, since we weren't, let's recompute the lowest rev to not
1282 1282 # include roots that aren't ancestors.
1283 1283
1284 1284 # Filter out roots that aren't ancestors of heads
1285 1285 roots = [root for root in roots if root in ancestors]
1286 1286 # Recompute the lowest revision
1287 1287 if roots:
1288 1288 lowestrev = min([self.rev(root) for root in roots])
1289 1289 else:
1290 1290 # No more roots? Return empty list
1291 1291 return nonodes
1292 1292 else:
1293 1293 # We are descending from nullid, and don't need to care about
1294 1294 # any other roots.
1295 1295 lowestrev = nullrev
1296 1296 roots = [self.nullid]
1297 1297 # Transform our roots list into a set.
1298 1298 descendants = set(roots)
1299 1299 # Also, keep the original roots so we can filter out roots that aren't
1300 1300 # 'real' roots (i.e. are descended from other roots).
1301 1301 roots = descendants.copy()
1302 1302 # Our topologically sorted list of output nodes.
1303 1303 orderedout = []
1304 1304 # Don't start at nullid since we don't want nullid in our output list,
1305 1305 # and if nullid shows up in descendants, empty parents will look like
1306 1306 # they're descendants.
1307 1307 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1308 1308 n = self.node(r)
1309 1309 isdescendant = False
1310 1310 if lowestrev == nullrev: # Everybody is a descendant of nullid
1311 1311 isdescendant = True
1312 1312 elif n in descendants:
1313 1313 # n is already a descendant
1314 1314 isdescendant = True
1315 1315 # This check only needs to be done here because all the roots
1316 1316 # will start being marked is descendants before the loop.
1317 1317 if n in roots:
1318 1318 # If n was a root, check if it's a 'real' root.
1319 1319 p = tuple(self.parents(n))
1320 1320 # If any of its parents are descendants, it's not a root.
1321 1321 if (p[0] in descendants) or (p[1] in descendants):
1322 1322 roots.remove(n)
1323 1323 else:
1324 1324 p = tuple(self.parents(n))
1325 1325 # A node is a descendant if either of its parents are
1326 1326 # descendants. (We seeded the dependents list with the roots
1327 1327 # up there, remember?)
1328 1328 if (p[0] in descendants) or (p[1] in descendants):
1329 1329 descendants.add(n)
1330 1330 isdescendant = True
1331 1331 if isdescendant and ((ancestors is None) or (n in ancestors)):
1332 1332 # Only include nodes that are both descendants and ancestors.
1333 1333 orderedout.append(n)
1334 1334 if (ancestors is not None) and (n in heads):
1335 1335 # We're trying to figure out which heads are reachable
1336 1336 # from roots.
1337 1337 # Mark this head as having been reached
1338 1338 heads[n] = True
1339 1339 elif ancestors is None:
1340 1340 # Otherwise, we're trying to discover the heads.
1341 1341 # Assume this is a head because if it isn't, the next step
1342 1342 # will eventually remove it.
1343 1343 heads[n] = True
1344 1344 # But, obviously its parents aren't.
1345 1345 for p in self.parents(n):
1346 1346 heads.pop(p, None)
1347 1347 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1348 1348 roots = list(roots)
1349 1349 assert orderedout
1350 1350 assert roots
1351 1351 assert heads
1352 1352 return (orderedout, roots, heads)
1353 1353
1354 1354 def headrevs(self, revs=None):
1355 1355 if revs is None:
1356 1356 try:
1357 1357 return self.index.headrevs()
1358 1358 except AttributeError:
1359 1359 return self._headrevs()
1360 1360 if rustdagop is not None and self.index.rust_ext_compat:
1361 1361 return rustdagop.headrevs(self.index, revs)
1362 1362 return dagop.headrevs(revs, self._uncheckedparentrevs)
1363 1363
1364 1364 def computephases(self, roots):
1365 1365 return self.index.computephasesmapsets(roots)
1366 1366
1367 1367 def _headrevs(self):
1368 1368 count = len(self)
1369 1369 if not count:
1370 1370 return [nullrev]
1371 1371 # we won't iter over filtered rev so nobody is a head at start
1372 1372 ishead = [0] * (count + 1)
1373 1373 index = self.index
1374 1374 for r in self:
1375 1375 ishead[r] = 1 # I may be an head
1376 1376 e = index[r]
1377 1377 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1378 1378 return [r for r, val in enumerate(ishead) if val]
1379 1379
1380 1380 def heads(self, start=None, stop=None):
1381 1381 """return the list of all nodes that have no children
1382 1382
1383 1383 if start is specified, only heads that are descendants of
1384 1384 start will be returned
1385 1385 if stop is specified, it will consider all the revs from stop
1386 1386 as if they had no children
1387 1387 """
1388 1388 if start is None and stop is None:
1389 1389 if not len(self):
1390 1390 return [self.nullid]
1391 1391 return [self.node(r) for r in self.headrevs()]
1392 1392
1393 1393 if start is None:
1394 1394 start = nullrev
1395 1395 else:
1396 1396 start = self.rev(start)
1397 1397
1398 1398 stoprevs = {self.rev(n) for n in stop or []}
1399 1399
1400 1400 revs = dagop.headrevssubset(
1401 1401 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1402 1402 )
1403 1403
1404 1404 return [self.node(rev) for rev in revs]
1405 1405
1406 1406 def children(self, node):
1407 1407 """find the children of a given node"""
1408 1408 c = []
1409 1409 p = self.rev(node)
1410 1410 for r in self.revs(start=p + 1):
1411 1411 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1412 1412 if prevs:
1413 1413 for pr in prevs:
1414 1414 if pr == p:
1415 1415 c.append(self.node(r))
1416 1416 elif p == nullrev:
1417 1417 c.append(self.node(r))
1418 1418 return c
1419 1419
1420 1420 def commonancestorsheads(self, a, b):
1421 1421 """calculate all the heads of the common ancestors of nodes a and b"""
1422 1422 a, b = self.rev(a), self.rev(b)
1423 1423 ancs = self._commonancestorsheads(a, b)
1424 1424 return pycompat.maplist(self.node, ancs)
1425 1425
1426 1426 def _commonancestorsheads(self, *revs):
1427 1427 """calculate all the heads of the common ancestors of revs"""
1428 1428 try:
1429 1429 ancs = self.index.commonancestorsheads(*revs)
1430 1430 except (AttributeError, OverflowError): # C implementation failed
1431 1431 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1432 1432 return ancs
1433 1433
1434 1434 def isancestor(self, a, b):
1435 1435 """return True if node a is an ancestor of node b
1436 1436
1437 1437 A revision is considered an ancestor of itself."""
1438 1438 a, b = self.rev(a), self.rev(b)
1439 1439 return self.isancestorrev(a, b)
1440 1440
1441 1441 def isancestorrev(self, a, b):
1442 1442 """return True if revision a is an ancestor of revision b
1443 1443
1444 1444 A revision is considered an ancestor of itself.
1445 1445
1446 1446 The implementation of this is trivial but the use of
1447 1447 reachableroots is not."""
1448 1448 if a == nullrev:
1449 1449 return True
1450 1450 elif a == b:
1451 1451 return True
1452 1452 elif a > b:
1453 1453 return False
1454 1454 return bool(self.reachableroots(a, [b], [a], includepath=False))
1455 1455
1456 1456 def reachableroots(self, minroot, heads, roots, includepath=False):
1457 1457 """return (heads(::(<roots> and <roots>::<heads>)))
1458 1458
1459 1459 If includepath is True, return (<roots>::<heads>)."""
1460 1460 try:
1461 1461 return self.index.reachableroots2(
1462 1462 minroot, heads, roots, includepath
1463 1463 )
1464 1464 except AttributeError:
1465 1465 return dagop._reachablerootspure(
1466 1466 self.parentrevs, minroot, roots, heads, includepath
1467 1467 )
1468 1468
1469 1469 def ancestor(self, a, b):
1470 1470 """calculate the "best" common ancestor of nodes a and b"""
1471 1471
1472 1472 a, b = self.rev(a), self.rev(b)
1473 1473 try:
1474 1474 ancs = self.index.ancestors(a, b)
1475 1475 except (AttributeError, OverflowError):
1476 1476 ancs = ancestor.ancestors(self.parentrevs, a, b)
1477 1477 if ancs:
1478 1478 # choose a consistent winner when there's a tie
1479 1479 return min(map(self.node, ancs))
1480 1480 return self.nullid
1481 1481
1482 1482 def _match(self, id):
1483 1483 if isinstance(id, int):
1484 1484 # rev
1485 1485 return self.node(id)
1486 1486 if len(id) == self.nodeconstants.nodelen:
1487 1487 # possibly a binary node
1488 1488 # odds of a binary node being all hex in ASCII are 1 in 10**25
1489 1489 try:
1490 1490 node = id
1491 1491 self.rev(node) # quick search the index
1492 1492 return node
1493 1493 except error.LookupError:
1494 1494 pass # may be partial hex id
1495 1495 try:
1496 1496 # str(rev)
1497 1497 rev = int(id)
1498 1498 if b"%d" % rev != id:
1499 1499 raise ValueError
1500 1500 if rev < 0:
1501 1501 rev = len(self) + rev
1502 1502 if rev < 0 or rev >= len(self):
1503 1503 raise ValueError
1504 1504 return self.node(rev)
1505 1505 except (ValueError, OverflowError):
1506 1506 pass
1507 1507 if len(id) == 2 * self.nodeconstants.nodelen:
1508 1508 try:
1509 1509 # a full hex nodeid?
1510 1510 node = bin(id)
1511 1511 self.rev(node)
1512 1512 return node
1513 1513 except (TypeError, error.LookupError):
1514 1514 pass
1515 1515
1516 1516 def _partialmatch(self, id):
1517 1517 # we don't care wdirfilenodeids as they should be always full hash
1518 1518 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1519 1519 ambiguous = False
1520 1520 try:
1521 1521 partial = self.index.partialmatch(id)
1522 1522 if partial and self.hasnode(partial):
1523 1523 if maybewdir:
1524 1524 # single 'ff...' match in radix tree, ambiguous with wdir
1525 1525 ambiguous = True
1526 1526 else:
1527 1527 return partial
1528 1528 elif maybewdir:
1529 1529 # no 'ff...' match in radix tree, wdir identified
1530 1530 raise error.WdirUnsupported
1531 1531 else:
1532 1532 return None
1533 1533 except error.RevlogError:
1534 1534 # parsers.c radix tree lookup gave multiple matches
1535 1535 # fast path: for unfiltered changelog, radix tree is accurate
1536 1536 if not getattr(self, 'filteredrevs', None):
1537 1537 ambiguous = True
1538 1538 # fall through to slow path that filters hidden revisions
1539 1539 except (AttributeError, ValueError):
1540 1540 # we are pure python, or key was too short to search radix tree
1541 1541 pass
1542 1542 if ambiguous:
1543 1543 raise error.AmbiguousPrefixLookupError(
1544 1544 id, self.display_id, _(b'ambiguous identifier')
1545 1545 )
1546 1546
1547 1547 if id in self._pcache:
1548 1548 return self._pcache[id]
1549 1549
1550 1550 if len(id) <= 40:
1551 1551 try:
1552 1552 # hex(node)[:...]
1553 1553 l = len(id) // 2 # grab an even number of digits
1554 1554 prefix = bin(id[: l * 2])
1555 1555 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1556 1556 nl = [
1557 1557 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1558 1558 ]
1559 1559 if self.nodeconstants.nullhex.startswith(id):
1560 1560 nl.append(self.nullid)
1561 1561 if len(nl) > 0:
1562 1562 if len(nl) == 1 and not maybewdir:
1563 1563 self._pcache[id] = nl[0]
1564 1564 return nl[0]
1565 1565 raise error.AmbiguousPrefixLookupError(
1566 1566 id, self.display_id, _(b'ambiguous identifier')
1567 1567 )
1568 1568 if maybewdir:
1569 1569 raise error.WdirUnsupported
1570 1570 return None
1571 1571 except TypeError:
1572 1572 pass
1573 1573
1574 1574 def lookup(self, id):
1575 1575 """locate a node based on:
1576 1576 - revision number or str(revision number)
1577 1577 - nodeid or subset of hex nodeid
1578 1578 """
1579 1579 n = self._match(id)
1580 1580 if n is not None:
1581 1581 return n
1582 1582 n = self._partialmatch(id)
1583 1583 if n:
1584 1584 return n
1585 1585
1586 1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1587 1587
1588 1588 def shortest(self, node, minlength=1):
1589 1589 """Find the shortest unambiguous prefix that matches node."""
1590 1590
1591 1591 def isvalid(prefix):
1592 1592 try:
1593 1593 matchednode = self._partialmatch(prefix)
1594 1594 except error.AmbiguousPrefixLookupError:
1595 1595 return False
1596 1596 except error.WdirUnsupported:
1597 1597 # single 'ff...' match
1598 1598 return True
1599 1599 if matchednode is None:
1600 1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1601 1601 return True
1602 1602
1603 1603 def maybewdir(prefix):
1604 1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1605 1605
1606 1606 hexnode = hex(node)
1607 1607
1608 1608 def disambiguate(hexnode, minlength):
1609 1609 """Disambiguate against wdirid."""
1610 1610 for length in range(minlength, len(hexnode) + 1):
1611 1611 prefix = hexnode[:length]
1612 1612 if not maybewdir(prefix):
1613 1613 return prefix
1614 1614
1615 1615 if not getattr(self, 'filteredrevs', None):
1616 1616 try:
1617 1617 length = max(self.index.shortest(node), minlength)
1618 1618 return disambiguate(hexnode, length)
1619 1619 except error.RevlogError:
1620 1620 if node != self.nodeconstants.wdirid:
1621 1621 raise error.LookupError(
1622 1622 node, self.display_id, _(b'no node')
1623 1623 )
1624 1624 except AttributeError:
1625 1625 # Fall through to pure code
1626 1626 pass
1627 1627
1628 1628 if node == self.nodeconstants.wdirid:
1629 1629 for length in range(minlength, len(hexnode) + 1):
1630 1630 prefix = hexnode[:length]
1631 1631 if isvalid(prefix):
1632 1632 return prefix
1633 1633
1634 1634 for length in range(minlength, len(hexnode) + 1):
1635 1635 prefix = hexnode[:length]
1636 1636 if isvalid(prefix):
1637 1637 return disambiguate(hexnode, length)
1638 1638
1639 1639 def cmp(self, node, text):
1640 1640 """compare text with a given file revision
1641 1641
1642 1642 returns True if text is different than what is stored.
1643 1643 """
1644 1644 p1, p2 = self.parents(node)
1645 1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1646 1646
1647 1647 def _cachesegment(self, offset, data):
1648 1648 """Add a segment to the revlog cache.
1649 1649
1650 1650 Accepts an absolute offset and the data that is at that location.
1651 1651 """
1652 1652 o, d = self._chunkcache
1653 1653 # try to add to existing cache
1654 1654 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1655 1655 self._chunkcache = o, d + data
1656 1656 else:
1657 1657 self._chunkcache = offset, data
1658 1658
1659 1659 def _readsegment(self, offset, length, df=None):
1660 1660 """Load a segment of raw data from the revlog.
1661 1661
1662 1662 Accepts an absolute offset, length to read, and an optional existing
1663 1663 file handle to read from.
1664 1664
1665 1665 If an existing file handle is passed, it will be seeked and the
1666 1666 original seek position will NOT be restored.
1667 1667
1668 1668 Returns a str or buffer of raw byte data.
1669 1669
1670 1670 Raises if the requested number of bytes could not be read.
1671 1671 """
1672 1672 # Cache data both forward and backward around the requested
1673 1673 # data, in a fixed size window. This helps speed up operations
1674 1674 # involving reading the revlog backwards.
1675 1675 cachesize = self._chunkcachesize
1676 1676 realoffset = offset & ~(cachesize - 1)
1677 1677 reallength = (
1678 1678 (offset + length + cachesize) & ~(cachesize - 1)
1679 1679 ) - realoffset
1680 1680 with self._datareadfp(df) as df:
1681 1681 df.seek(realoffset)
1682 1682 d = df.read(reallength)
1683 1683
1684 1684 self._cachesegment(realoffset, d)
1685 1685 if offset != realoffset or reallength != length:
1686 1686 startoffset = offset - realoffset
1687 1687 if len(d) - startoffset < length:
1688 1688 filename = self._indexfile if self._inline else self._datafile
1689 1689 got = len(d) - startoffset
1690 1690 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1691 1691 raise error.RevlogError(m)
1692 1692 return util.buffer(d, startoffset, length)
1693 1693
1694 1694 if len(d) < length:
1695 1695 filename = self._indexfile if self._inline else self._datafile
1696 1696 got = len(d) - startoffset
1697 1697 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1698 1698 raise error.RevlogError(m)
1699 1699
1700 1700 return d
1701 1701
1702 1702 def _getsegment(self, offset, length, df=None):
1703 1703 """Obtain a segment of raw data from the revlog.
1704 1704
1705 1705 Accepts an absolute offset, length of bytes to obtain, and an
1706 1706 optional file handle to the already-opened revlog. If the file
1707 1707 handle is used, it's original seek position will not be preserved.
1708 1708
1709 1709 Requests for data may be returned from a cache.
1710 1710
1711 1711 Returns a str or a buffer instance of raw byte data.
1712 1712 """
1713 1713 o, d = self._chunkcache
1714 1714 l = len(d)
1715 1715
1716 1716 # is it in the cache?
1717 1717 cachestart = offset - o
1718 1718 cacheend = cachestart + length
1719 1719 if cachestart >= 0 and cacheend <= l:
1720 1720 if cachestart == 0 and cacheend == l:
1721 1721 return d # avoid a copy
1722 1722 return util.buffer(d, cachestart, cacheend - cachestart)
1723 1723
1724 1724 return self._readsegment(offset, length, df=df)
1725 1725
1726 1726 def _getsegmentforrevs(self, startrev, endrev, df=None):
1727 1727 """Obtain a segment of raw data corresponding to a range of revisions.
1728 1728
1729 1729 Accepts the start and end revisions and an optional already-open
1730 1730 file handle to be used for reading. If the file handle is read, its
1731 1731 seek position will not be preserved.
1732 1732
1733 1733 Requests for data may be satisfied by a cache.
1734 1734
1735 1735 Returns a 2-tuple of (offset, data) for the requested range of
1736 1736 revisions. Offset is the integer offset from the beginning of the
1737 1737 revlog and data is a str or buffer of the raw byte data.
1738 1738
1739 1739 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1740 1740 to determine where each revision's data begins and ends.
1741 1741 """
1742 1742 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1743 1743 # (functions are expensive).
1744 1744 index = self.index
1745 1745 istart = index[startrev]
1746 1746 start = int(istart[0] >> 16)
1747 1747 if startrev == endrev:
1748 1748 end = start + istart[1]
1749 1749 else:
1750 1750 iend = index[endrev]
1751 1751 end = int(iend[0] >> 16) + iend[1]
1752 1752
1753 1753 if self._inline:
1754 1754 start += (startrev + 1) * self.index.entry_size
1755 1755 end += (endrev + 1) * self.index.entry_size
1756 1756 length = end - start
1757 1757
1758 1758 return start, self._getsegment(start, length, df=df)
1759 1759
1760 1760 def _chunk(self, rev, df=None):
1761 1761 """Obtain a single decompressed chunk for a revision.
1762 1762
1763 1763 Accepts an integer revision and an optional already-open file handle
1764 1764 to be used for reading. If used, the seek position of the file will not
1765 1765 be preserved.
1766 1766
1767 1767 Returns a str holding uncompressed data for the requested revision.
1768 1768 """
1769 1769 compression_mode = self.index[rev][10]
1770 1770 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1771 1771 if compression_mode == COMP_MODE_PLAIN:
1772 1772 return data
1773 1773 elif compression_mode == COMP_MODE_DEFAULT:
1774 1774 return self._decompressor(data)
1775 1775 elif compression_mode == COMP_MODE_INLINE:
1776 1776 return self.decompress(data)
1777 1777 else:
1778 1778 msg = 'unknown compression mode %d'
1779 1779 msg %= compression_mode
1780 1780 raise error.RevlogError(msg)
1781 1781
1782 1782 def _chunks(self, revs, df=None, targetsize=None):
1783 1783 """Obtain decompressed chunks for the specified revisions.
1784 1784
1785 1785 Accepts an iterable of numeric revisions that are assumed to be in
1786 1786 ascending order. Also accepts an optional already-open file handle
1787 1787 to be used for reading. If used, the seek position of the file will
1788 1788 not be preserved.
1789 1789
1790 1790 This function is similar to calling ``self._chunk()`` multiple times,
1791 1791 but is faster.
1792 1792
1793 1793 Returns a list with decompressed data for each requested revision.
1794 1794 """
1795 1795 if not revs:
1796 1796 return []
1797 1797 start = self.start
1798 1798 length = self.length
1799 1799 inline = self._inline
1800 1800 iosize = self.index.entry_size
1801 1801 buffer = util.buffer
1802 1802
1803 1803 l = []
1804 1804 ladd = l.append
1805 1805
1806 1806 if not self._withsparseread:
1807 1807 slicedchunks = (revs,)
1808 1808 else:
1809 1809 slicedchunks = deltautil.slicechunk(
1810 1810 self, revs, targetsize=targetsize
1811 1811 )
1812 1812
1813 1813 for revschunk in slicedchunks:
1814 1814 firstrev = revschunk[0]
1815 1815 # Skip trailing revisions with empty diff
1816 1816 for lastrev in revschunk[::-1]:
1817 1817 if length(lastrev) != 0:
1818 1818 break
1819 1819
1820 1820 try:
1821 1821 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1822 1822 except OverflowError:
1823 1823 # issue4215 - we can't cache a run of chunks greater than
1824 1824 # 2G on Windows
1825 1825 return [self._chunk(rev, df=df) for rev in revschunk]
1826 1826
1827 1827 decomp = self.decompress
1828 1828 # self._decompressor might be None, but will not be used in that case
1829 1829 def_decomp = self._decompressor
1830 1830 for rev in revschunk:
1831 1831 chunkstart = start(rev)
1832 1832 if inline:
1833 1833 chunkstart += (rev + 1) * iosize
1834 1834 chunklength = length(rev)
1835 1835 comp_mode = self.index[rev][10]
1836 1836 c = buffer(data, chunkstart - offset, chunklength)
1837 1837 if comp_mode == COMP_MODE_PLAIN:
1838 1838 ladd(c)
1839 1839 elif comp_mode == COMP_MODE_INLINE:
1840 1840 ladd(decomp(c))
1841 1841 elif comp_mode == COMP_MODE_DEFAULT:
1842 1842 ladd(def_decomp(c))
1843 1843 else:
1844 1844 msg = 'unknown compression mode %d'
1845 1845 msg %= comp_mode
1846 1846 raise error.RevlogError(msg)
1847 1847
1848 1848 return l
1849 1849
1850 1850 def _chunkclear(self):
1851 1851 """Clear the raw chunk cache."""
1852 1852 self._chunkcache = (0, b'')
1853 1853
1854 1854 def deltaparent(self, rev):
1855 1855 """return deltaparent of the given revision"""
1856 1856 base = self.index[rev][3]
1857 1857 if base == rev:
1858 1858 return nullrev
1859 1859 elif self._generaldelta:
1860 1860 return base
1861 1861 else:
1862 1862 return rev - 1
1863 1863
1864 1864 def issnapshot(self, rev):
1865 1865 """tells whether rev is a snapshot"""
1866 1866 if not self._sparserevlog:
1867 1867 return self.deltaparent(rev) == nullrev
1868 1868 elif util.safehasattr(self.index, b'issnapshot'):
1869 1869 # directly assign the method to cache the testing and access
1870 1870 self.issnapshot = self.index.issnapshot
1871 1871 return self.issnapshot(rev)
1872 1872 if rev == nullrev:
1873 1873 return True
1874 1874 entry = self.index[rev]
1875 1875 base = entry[3]
1876 1876 if base == rev:
1877 1877 return True
1878 1878 if base == nullrev:
1879 1879 return True
1880 1880 p1 = entry[5]
1881 1881 p2 = entry[6]
1882 1882 if base == p1 or base == p2:
1883 1883 return False
1884 1884 return self.issnapshot(base)
1885 1885
1886 1886 def snapshotdepth(self, rev):
1887 1887 """number of snapshot in the chain before this one"""
1888 1888 if not self.issnapshot(rev):
1889 1889 raise error.ProgrammingError(b'revision %d not a snapshot')
1890 1890 return len(self._deltachain(rev)[0]) - 1
1891 1891
1892 1892 def revdiff(self, rev1, rev2):
1893 1893 """return or calculate a delta between two revisions
1894 1894
1895 1895 The delta calculated is in binary form and is intended to be written to
1896 1896 revlog data directly. So this function needs raw revision data.
1897 1897 """
1898 1898 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1899 1899 return bytes(self._chunk(rev2))
1900 1900
1901 1901 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1902 1902
1903 1903 def _processflags(self, text, flags, operation, raw=False):
1904 1904 """deprecated entry point to access flag processors"""
1905 1905 msg = b'_processflag(...) use the specialized variant'
1906 1906 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1907 1907 if raw:
1908 1908 return text, flagutil.processflagsraw(self, text, flags)
1909 1909 elif operation == b'read':
1910 1910 return flagutil.processflagsread(self, text, flags)
1911 1911 else: # write operation
1912 1912 return flagutil.processflagswrite(self, text, flags)
1913 1913
1914 1914 def revision(self, nodeorrev, _df=None, raw=False):
1915 1915 """return an uncompressed revision of a given node or revision
1916 1916 number.
1917 1917
1918 1918 _df - an existing file handle to read from. (internal-only)
1919 1919 raw - an optional argument specifying if the revision data is to be
1920 1920 treated as raw data when applying flag transforms. 'raw' should be set
1921 1921 to True when generating changegroups or in debug commands.
1922 1922 """
1923 1923 if raw:
1924 1924 msg = (
1925 1925 b'revlog.revision(..., raw=True) is deprecated, '
1926 1926 b'use revlog.rawdata(...)'
1927 1927 )
1928 1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 1929 return self._revisiondata(nodeorrev, _df, raw=raw)
1930 1930
1931 1931 def sidedata(self, nodeorrev, _df=None):
1932 1932 """a map of extra data related to the changeset but not part of the hash
1933 1933
1934 1934 This function currently return a dictionary. However, more advanced
1935 1935 mapping object will likely be used in the future for a more
1936 1936 efficient/lazy code.
1937 1937 """
1938 1938 # deal with <nodeorrev> argument type
1939 1939 if isinstance(nodeorrev, int):
1940 1940 rev = nodeorrev
1941 1941 else:
1942 1942 rev = self.rev(nodeorrev)
1943 1943 return self._sidedata(rev)
1944 1944
1945 1945 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1946 1946 # deal with <nodeorrev> argument type
1947 1947 if isinstance(nodeorrev, int):
1948 1948 rev = nodeorrev
1949 1949 node = self.node(rev)
1950 1950 else:
1951 1951 node = nodeorrev
1952 1952 rev = None
1953 1953
1954 1954 # fast path the special `nullid` rev
1955 1955 if node == self.nullid:
1956 1956 return b""
1957 1957
1958 1958 # ``rawtext`` is the text as stored inside the revlog. Might be the
1959 1959 # revision or might need to be processed to retrieve the revision.
1960 1960 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1961 1961
1962 1962 if raw and validated:
1963 1963 # if we don't want to process the raw text and that raw
1964 1964 # text is cached, we can exit early.
1965 1965 return rawtext
1966 1966 if rev is None:
1967 1967 rev = self.rev(node)
1968 1968 # the revlog's flag for this revision
1969 1969 # (usually alter its state or content)
1970 1970 flags = self.flags(rev)
1971 1971
1972 1972 if validated and flags == REVIDX_DEFAULT_FLAGS:
1973 1973 # no extra flags set, no flag processor runs, text = rawtext
1974 1974 return rawtext
1975 1975
1976 1976 if raw:
1977 1977 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1978 1978 text = rawtext
1979 1979 else:
1980 1980 r = flagutil.processflagsread(self, rawtext, flags)
1981 1981 text, validatehash = r
1982 1982 if validatehash:
1983 1983 self.checkhash(text, node, rev=rev)
1984 1984 if not validated:
1985 1985 self._revisioncache = (node, rev, rawtext)
1986 1986
1987 1987 return text
1988 1988
1989 1989 def _rawtext(self, node, rev, _df=None):
1990 1990 """return the possibly unvalidated rawtext for a revision
1991 1991
1992 1992 returns (rev, rawtext, validated)
1993 1993 """
1994 1994
1995 1995 # revision in the cache (could be useful to apply delta)
1996 1996 cachedrev = None
1997 1997 # An intermediate text to apply deltas to
1998 1998 basetext = None
1999 1999
2000 2000 # Check if we have the entry in cache
2001 2001 # The cache entry looks like (node, rev, rawtext)
2002 2002 if self._revisioncache:
2003 2003 if self._revisioncache[0] == node:
2004 2004 return (rev, self._revisioncache[2], True)
2005 2005 cachedrev = self._revisioncache[1]
2006 2006
2007 2007 if rev is None:
2008 2008 rev = self.rev(node)
2009 2009
2010 2010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2011 2011 if stopped:
2012 2012 basetext = self._revisioncache[2]
2013 2013
2014 2014 # drop cache to save memory, the caller is expected to
2015 2015 # update self._revisioncache after validating the text
2016 2016 self._revisioncache = None
2017 2017
2018 2018 targetsize = None
2019 2019 rawsize = self.index[rev][2]
2020 2020 if 0 <= rawsize:
2021 2021 targetsize = 4 * rawsize
2022 2022
2023 2023 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2024 2024 if basetext is None:
2025 2025 basetext = bytes(bins[0])
2026 2026 bins = bins[1:]
2027 2027
2028 2028 rawtext = mdiff.patches(basetext, bins)
2029 2029 del basetext # let us have a chance to free memory early
2030 2030 return (rev, rawtext, False)
2031 2031
2032 2032 def _sidedata(self, rev):
2033 2033 """Return the sidedata for a given revision number."""
2034 2034 index_entry = self.index[rev]
2035 2035 sidedata_offset = index_entry[8]
2036 2036 sidedata_size = index_entry[9]
2037 2037
2038 2038 if self._inline:
2039 2039 sidedata_offset += self.index.entry_size * (1 + rev)
2040 2040 if sidedata_size == 0:
2041 2041 return {}
2042 2042
2043 2043 # XXX this need caching, as we do for data
2044 2044 with self._sidedatareadfp() as sdf:
2045 2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2046 2046 filename = self._sidedatafile
2047 2047 end = self._docket.sidedata_end
2048 2048 offset = sidedata_offset
2049 2049 length = sidedata_size
2050 2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2051 2051 raise error.RevlogError(m)
2052 2052
2053 2053 sdf.seek(sidedata_offset, os.SEEK_SET)
2054 2054 comp_segment = sdf.read(sidedata_size)
2055 2055
2056 2056 if len(comp_segment) < sidedata_size:
2057 2057 filename = self._sidedatafile
2058 2058 length = sidedata_size
2059 2059 offset = sidedata_offset
2060 2060 got = len(comp_segment)
2061 2061 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2062 2062 raise error.RevlogError(m)
2063 2063
2064 2064 comp = self.index[rev][11]
2065 2065 if comp == COMP_MODE_PLAIN:
2066 2066 segment = comp_segment
2067 2067 elif comp == COMP_MODE_DEFAULT:
2068 2068 segment = self._decompressor(comp_segment)
2069 2069 elif comp == COMP_MODE_INLINE:
2070 2070 segment = self.decompress(comp_segment)
2071 2071 else:
2072 2072 msg = 'unknown compression mode %d'
2073 2073 msg %= comp
2074 2074 raise error.RevlogError(msg)
2075 2075
2076 2076 sidedata = sidedatautil.deserialize_sidedata(segment)
2077 2077 return sidedata
2078 2078
2079 2079 def rawdata(self, nodeorrev, _df=None):
2080 2080 """return an uncompressed raw data of a given node or revision number.
2081 2081
2082 2082 _df - an existing file handle to read from. (internal-only)
2083 2083 """
2084 2084 return self._revisiondata(nodeorrev, _df, raw=True)
2085 2085
2086 2086 def hash(self, text, p1, p2):
2087 2087 """Compute a node hash.
2088 2088
2089 2089 Available as a function so that subclasses can replace the hash
2090 2090 as needed.
2091 2091 """
2092 2092 return storageutil.hashrevisionsha1(text, p1, p2)
2093 2093
2094 2094 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2095 2095 """Check node hash integrity.
2096 2096
2097 2097 Available as a function so that subclasses can extend hash mismatch
2098 2098 behaviors as needed.
2099 2099 """
2100 2100 try:
2101 2101 if p1 is None and p2 is None:
2102 2102 p1, p2 = self.parents(node)
2103 2103 if node != self.hash(text, p1, p2):
2104 2104 # Clear the revision cache on hash failure. The revision cache
2105 2105 # only stores the raw revision and clearing the cache does have
2106 2106 # the side-effect that we won't have a cache hit when the raw
2107 2107 # revision data is accessed. But this case should be rare and
2108 2108 # it is extra work to teach the cache about the hash
2109 2109 # verification state.
2110 2110 if self._revisioncache and self._revisioncache[0] == node:
2111 2111 self._revisioncache = None
2112 2112
2113 2113 revornode = rev
2114 2114 if revornode is None:
2115 2115 revornode = templatefilters.short(hex(node))
2116 2116 raise error.RevlogError(
2117 2117 _(b"integrity check failed on %s:%s")
2118 2118 % (self.display_id, pycompat.bytestr(revornode))
2119 2119 )
2120 2120 except error.RevlogError:
2121 2121 if self._censorable and storageutil.iscensoredtext(text):
2122 2122 raise error.CensoredNodeError(self.display_id, node, text)
2123 2123 raise
2124 2124
2125 2125 def _enforceinlinesize(self, tr):
2126 2126 """Check if the revlog is too big for inline and convert if so.
2127 2127
2128 2128 This should be called after revisions are added to the revlog. If the
2129 2129 revlog has grown too large to be an inline revlog, it will convert it
2130 2130 to use multiple index and data files.
2131 2131 """
2132 2132 tiprev = len(self) - 1
2133 2133 total_size = self.start(tiprev) + self.length(tiprev)
2134 2134 if not self._inline or total_size < _maxinline:
2135 2135 return
2136 2136
2137 2137 troffset = tr.findoffset(self._indexfile)
2138 2138 if troffset is None:
2139 2139 raise error.RevlogError(
2140 2140 _(b"%s not found in the transaction") % self._indexfile
2141 2141 )
2142 2142 trindex = 0
2143 2143 tr.add(self._datafile, 0)
2144 2144
2145 2145 existing_handles = False
2146 2146 if self._writinghandles is not None:
2147 2147 existing_handles = True
2148 2148 fp = self._writinghandles[0]
2149 2149 fp.flush()
2150 2150 fp.close()
2151 2151 # We can't use the cached file handle after close(). So prevent
2152 2152 # its usage.
2153 2153 self._writinghandles = None
2154 2154
2155 2155 new_dfh = self._datafp(b'w+')
2156 2156 new_dfh.truncate(0) # drop any potentially existing data
2157 2157 try:
2158 2158 with self._indexfp() as read_ifh:
2159 2159 for r in self:
2160 2160 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2161 2161 if troffset <= self.start(r) + r * self.index.entry_size:
2162 2162 trindex = r
2163 2163 new_dfh.flush()
2164 2164
2165 2165 with self.__index_new_fp() as fp:
2166 2166 self._format_flags &= ~FLAG_INLINE_DATA
2167 2167 self._inline = False
2168 2168 for i in self:
2169 2169 e = self.index.entry_binary(i)
2170 2170 if i == 0 and self._docket is None:
2171 2171 header = self._format_flags | self._format_version
2172 2172 header = self.index.pack_header(header)
2173 2173 e = header + e
2174 2174 fp.write(e)
2175 2175 if self._docket is not None:
2176 2176 self._docket.index_end = fp.tell()
2177 2177
2178 2178 # There is a small transactional race here. If the rename of
2179 2179 # the index fails, we should remove the datafile. It is more
2180 2180 # important to ensure that the data file is not truncated
2181 2181 # when the index is replaced as otherwise data is lost.
2182 2182 tr.replace(self._datafile, self.start(trindex))
2183 2183
2184 2184 # the temp file replace the real index when we exit the context
2185 2185 # manager
2186 2186
2187 2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2188 2188 nodemaputil.setup_persistent_nodemap(tr, self)
2189 2189 self._chunkclear()
2190 2190
2191 2191 if existing_handles:
2192 2192 # switched from inline to conventional reopen the index
2193 2193 ifh = self.__index_write_fp()
2194 2194 self._writinghandles = (ifh, new_dfh, None)
2195 2195 new_dfh = None
2196 2196 finally:
2197 2197 if new_dfh is not None:
2198 2198 new_dfh.close()
2199 2199
2200 2200 def _nodeduplicatecallback(self, transaction, node):
2201 2201 """called when trying to add a node already stored."""
2202 2202
2203 2203 @contextlib.contextmanager
2204 2204 def _writing(self, transaction):
2205 2205 if self._trypending:
2206 2206 msg = b'try to write in a `trypending` revlog: %s'
2207 2207 msg %= self.display_id
2208 2208 raise error.ProgrammingError(msg)
2209 2209 if self._writinghandles is not None:
2210 2210 yield
2211 2211 else:
2212 2212 ifh = dfh = sdfh = None
2213 2213 try:
2214 2214 r = len(self)
2215 2215 # opening the data file.
2216 2216 dsize = 0
2217 2217 if r:
2218 2218 dsize = self.end(r - 1)
2219 2219 dfh = None
2220 2220 if not self._inline:
2221 2221 try:
2222 2222 dfh = self._datafp(b"r+")
2223 2223 if self._docket is None:
2224 2224 dfh.seek(0, os.SEEK_END)
2225 2225 else:
2226 2226 dfh.seek(self._docket.data_end, os.SEEK_SET)
2227 2227 except IOError as inst:
2228 2228 if inst.errno != errno.ENOENT:
2229 2229 raise
2230 2230 dfh = self._datafp(b"w+")
2231 2231 transaction.add(self._datafile, dsize)
2232 2232 if self._sidedatafile is not None:
2233 2233 try:
2234 2234 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2235 2235 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2236 2236 except IOError as inst:
2237 2237 if inst.errno != errno.ENOENT:
2238 2238 raise
2239 2239 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2240 2240 transaction.add(
2241 2241 self._sidedatafile, self._docket.sidedata_end
2242 2242 )
2243 2243
2244 2244 # opening the index file.
2245 2245 isize = r * self.index.entry_size
2246 2246 ifh = self.__index_write_fp()
2247 2247 if self._inline:
2248 2248 transaction.add(self._indexfile, dsize + isize)
2249 2249 else:
2250 2250 transaction.add(self._indexfile, isize)
2251 2251 # exposing all file handle for writing.
2252 2252 self._writinghandles = (ifh, dfh, sdfh)
2253 2253 yield
2254 2254 if self._docket is not None:
2255 2255 self._write_docket(transaction)
2256 2256 finally:
2257 2257 self._writinghandles = None
2258 2258 if dfh is not None:
2259 2259 dfh.close()
2260 2260 if sdfh is not None:
2261 2261 dfh.close()
2262 2262 # closing the index file last to avoid exposing referent to
2263 2263 # potential unflushed data content.
2264 2264 if ifh is not None:
2265 2265 ifh.close()
2266 2266
2267 2267 def _write_docket(self, transaction):
2268 2268 """write the current docket on disk
2269 2269
2270 2270 Exist as a method to help changelog to implement transaction logic
2271 2271
2272 2272 We could also imagine using the same transaction logic for all revlog
2273 2273 since docket are cheap."""
2274 2274 self._docket.write(transaction)
2275 2275
2276 2276 def addrevision(
2277 2277 self,
2278 2278 text,
2279 2279 transaction,
2280 2280 link,
2281 2281 p1,
2282 2282 p2,
2283 2283 cachedelta=None,
2284 2284 node=None,
2285 2285 flags=REVIDX_DEFAULT_FLAGS,
2286 2286 deltacomputer=None,
2287 2287 sidedata=None,
2288 2288 ):
2289 2289 """add a revision to the log
2290 2290
2291 2291 text - the revision data to add
2292 2292 transaction - the transaction object used for rollback
2293 2293 link - the linkrev data to add
2294 2294 p1, p2 - the parent nodeids of the revision
2295 2295 cachedelta - an optional precomputed delta
2296 2296 node - nodeid of revision; typically node is not specified, and it is
2297 2297 computed by default as hash(text, p1, p2), however subclasses might
2298 2298 use different hashing method (and override checkhash() in such case)
2299 2299 flags - the known flags to set on the revision
2300 2300 deltacomputer - an optional deltacomputer instance shared between
2301 2301 multiple calls
2302 2302 """
2303 2303 if link == nullrev:
2304 2304 raise error.RevlogError(
2305 2305 _(b"attempted to add linkrev -1 to %s") % self.display_id
2306 2306 )
2307 2307
2308 2308 if sidedata is None:
2309 2309 sidedata = {}
2310 2310 elif sidedata and not self.hassidedata:
2311 2311 raise error.ProgrammingError(
2312 2312 _(b"trying to add sidedata to a revlog who don't support them")
2313 2313 )
2314 2314
2315 2315 if flags:
2316 2316 node = node or self.hash(text, p1, p2)
2317 2317
2318 2318 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2319 2319
2320 2320 # If the flag processor modifies the revision data, ignore any provided
2321 2321 # cachedelta.
2322 2322 if rawtext != text:
2323 2323 cachedelta = None
2324 2324
2325 2325 if len(rawtext) > _maxentrysize:
2326 2326 raise error.RevlogError(
2327 2327 _(
2328 2328 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2329 2329 )
2330 2330 % (self.display_id, len(rawtext))
2331 2331 )
2332 2332
2333 2333 node = node or self.hash(rawtext, p1, p2)
2334 2334 rev = self.index.get_rev(node)
2335 2335 if rev is not None:
2336 2336 return rev
2337 2337
2338 2338 if validatehash:
2339 2339 self.checkhash(rawtext, node, p1=p1, p2=p2)
2340 2340
2341 2341 return self.addrawrevision(
2342 2342 rawtext,
2343 2343 transaction,
2344 2344 link,
2345 2345 p1,
2346 2346 p2,
2347 2347 node,
2348 2348 flags,
2349 2349 cachedelta=cachedelta,
2350 2350 deltacomputer=deltacomputer,
2351 2351 sidedata=sidedata,
2352 2352 )
2353 2353
2354 2354 def addrawrevision(
2355 2355 self,
2356 2356 rawtext,
2357 2357 transaction,
2358 2358 link,
2359 2359 p1,
2360 2360 p2,
2361 2361 node,
2362 2362 flags,
2363 2363 cachedelta=None,
2364 2364 deltacomputer=None,
2365 2365 sidedata=None,
2366 2366 ):
2367 2367 """add a raw revision with known flags, node and parents
2368 2368 useful when reusing a revision not stored in this revlog (ex: received
2369 2369 over wire, or read from an external bundle).
2370 2370 """
2371 2371 with self._writing(transaction):
2372 2372 return self._addrevision(
2373 2373 node,
2374 2374 rawtext,
2375 2375 transaction,
2376 2376 link,
2377 2377 p1,
2378 2378 p2,
2379 2379 flags,
2380 2380 cachedelta,
2381 2381 deltacomputer=deltacomputer,
2382 2382 sidedata=sidedata,
2383 2383 )
2384 2384
2385 2385 def compress(self, data):
2386 2386 """Generate a possibly-compressed representation of data."""
2387 2387 if not data:
2388 2388 return b'', data
2389 2389
2390 2390 compressed = self._compressor.compress(data)
2391 2391
2392 2392 if compressed:
2393 2393 # The revlog compressor added the header in the returned data.
2394 2394 return b'', compressed
2395 2395
2396 2396 if data[0:1] == b'\0':
2397 2397 return b'', data
2398 2398 return b'u', data
2399 2399
2400 2400 def decompress(self, data):
2401 2401 """Decompress a revlog chunk.
2402 2402
2403 2403 The chunk is expected to begin with a header identifying the
2404 2404 format type so it can be routed to an appropriate decompressor.
2405 2405 """
2406 2406 if not data:
2407 2407 return data
2408 2408
2409 2409 # Revlogs are read much more frequently than they are written and many
2410 2410 # chunks only take microseconds to decompress, so performance is
2411 2411 # important here.
2412 2412 #
2413 2413 # We can make a few assumptions about revlogs:
2414 2414 #
2415 2415 # 1) the majority of chunks will be compressed (as opposed to inline
2416 2416 # raw data).
2417 2417 # 2) decompressing *any* data will likely by at least 10x slower than
2418 2418 # returning raw inline data.
2419 2419 # 3) we want to prioritize common and officially supported compression
2420 2420 # engines
2421 2421 #
2422 2422 # It follows that we want to optimize for "decompress compressed data
2423 2423 # when encoded with common and officially supported compression engines"
2424 2424 # case over "raw data" and "data encoded by less common or non-official
2425 2425 # compression engines." That is why we have the inline lookup first
2426 2426 # followed by the compengines lookup.
2427 2427 #
2428 2428 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2429 2429 # compressed chunks. And this matters for changelog and manifest reads.
2430 2430 t = data[0:1]
2431 2431
2432 2432 if t == b'x':
2433 2433 try:
2434 2434 return _zlibdecompress(data)
2435 2435 except zlib.error as e:
2436 2436 raise error.RevlogError(
2437 2437 _(b'revlog decompress error: %s')
2438 2438 % stringutil.forcebytestr(e)
2439 2439 )
2440 2440 # '\0' is more common than 'u' so it goes first.
2441 2441 elif t == b'\0':
2442 2442 return data
2443 2443 elif t == b'u':
2444 2444 return util.buffer(data, 1)
2445 2445
2446 2446 compressor = self._get_decompressor(t)
2447 2447
2448 2448 return compressor.decompress(data)
2449 2449
2450 2450 def _addrevision(
2451 2451 self,
2452 2452 node,
2453 2453 rawtext,
2454 2454 transaction,
2455 2455 link,
2456 2456 p1,
2457 2457 p2,
2458 2458 flags,
2459 2459 cachedelta,
2460 2460 alwayscache=False,
2461 2461 deltacomputer=None,
2462 2462 sidedata=None,
2463 2463 ):
2464 2464 """internal function to add revisions to the log
2465 2465
2466 2466 see addrevision for argument descriptions.
2467 2467
2468 2468 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2469 2469
2470 2470 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2471 2471 be used.
2472 2472
2473 2473 invariants:
2474 2474 - rawtext is optional (can be None); if not set, cachedelta must be set.
2475 2475 if both are set, they must correspond to each other.
2476 2476 """
2477 2477 if node == self.nullid:
2478 2478 raise error.RevlogError(
2479 2479 _(b"%s: attempt to add null revision") % self.display_id
2480 2480 )
2481 2481 if (
2482 2482 node == self.nodeconstants.wdirid
2483 2483 or node in self.nodeconstants.wdirfilenodeids
2484 2484 ):
2485 2485 raise error.RevlogError(
2486 2486 _(b"%s: attempt to add wdir revision") % self.display_id
2487 2487 )
2488 2488 if self._writinghandles is None:
2489 2489 msg = b'adding revision outside `revlog._writing` context'
2490 2490 raise error.ProgrammingError(msg)
2491 2491
2492 2492 if self._inline:
2493 2493 fh = self._writinghandles[0]
2494 2494 else:
2495 2495 fh = self._writinghandles[1]
2496 2496
2497 2497 btext = [rawtext]
2498 2498
2499 2499 curr = len(self)
2500 2500 prev = curr - 1
2501 2501
2502 2502 offset = self._get_data_offset(prev)
2503 2503
2504 2504 if self._concurrencychecker:
2505 2505 ifh, dfh, sdfh = self._writinghandles
2506 2506 # XXX no checking for the sidedata file
2507 2507 if self._inline:
2508 2508 # offset is "as if" it were in the .d file, so we need to add on
2509 2509 # the size of the entry metadata.
2510 2510 self._concurrencychecker(
2511 2511 ifh, self._indexfile, offset + curr * self.index.entry_size
2512 2512 )
2513 2513 else:
2514 2514 # Entries in the .i are a consistent size.
2515 2515 self._concurrencychecker(
2516 2516 ifh, self._indexfile, curr * self.index.entry_size
2517 2517 )
2518 2518 self._concurrencychecker(dfh, self._datafile, offset)
2519 2519
2520 2520 p1r, p2r = self.rev(p1), self.rev(p2)
2521 2521
2522 2522 # full versions are inserted when the needed deltas
2523 2523 # become comparable to the uncompressed text
2524 2524 if rawtext is None:
2525 2525 # need rawtext size, before changed by flag processors, which is
2526 2526 # the non-raw size. use revlog explicitly to avoid filelog's extra
2527 2527 # logic that might remove metadata size.
2528 2528 textlen = mdiff.patchedsize(
2529 2529 revlog.size(self, cachedelta[0]), cachedelta[1]
2530 2530 )
2531 2531 else:
2532 2532 textlen = len(rawtext)
2533 2533
2534 2534 if deltacomputer is None:
2535 2535 deltacomputer = deltautil.deltacomputer(self)
2536 2536
2537 2537 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2538 2538
2539 2539 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2540 2540
2541 2541 compression_mode = COMP_MODE_INLINE
2542 2542 if self._docket is not None:
2543 2543 h, d = deltainfo.data
2544 2544 if not h and not d:
2545 2545 # not data to store at all... declare them uncompressed
2546 2546 compression_mode = COMP_MODE_PLAIN
2547 2547 elif not h:
2548 2548 t = d[0:1]
2549 2549 if t == b'\0':
2550 2550 compression_mode = COMP_MODE_PLAIN
2551 2551 elif t == self._docket.default_compression_header:
2552 2552 compression_mode = COMP_MODE_DEFAULT
2553 2553 elif h == b'u':
2554 2554 # we have a more efficient way to declare uncompressed
2555 2555 h = b''
2556 2556 compression_mode = COMP_MODE_PLAIN
2557 2557 deltainfo = deltautil.drop_u_compression(deltainfo)
2558 2558
2559 2559 sidedata_compression_mode = COMP_MODE_INLINE
2560 2560 if sidedata and self.hassidedata:
2561 2561 sidedata_compression_mode = COMP_MODE_PLAIN
2562 2562 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2563 2563 sidedata_offset = self._docket.sidedata_end
2564 2564 h, comp_sidedata = self.compress(serialized_sidedata)
2565 2565 if (
2566 2566 h != b'u'
2567 2567 and comp_sidedata[0:1] != b'\0'
2568 2568 and len(comp_sidedata) < len(serialized_sidedata)
2569 2569 ):
2570 2570 assert not h
2571 2571 if (
2572 2572 comp_sidedata[0:1]
2573 2573 == self._docket.default_compression_header
2574 2574 ):
2575 2575 sidedata_compression_mode = COMP_MODE_DEFAULT
2576 2576 serialized_sidedata = comp_sidedata
2577 2577 else:
2578 2578 sidedata_compression_mode = COMP_MODE_INLINE
2579 2579 serialized_sidedata = comp_sidedata
2580 2580 else:
2581 2581 serialized_sidedata = b""
2582 2582 # Don't store the offset if the sidedata is empty, that way
2583 2583 # we can easily detect empty sidedata and they will be no different
2584 2584 # than ones we manually add.
2585 2585 sidedata_offset = 0
2586 2586
2587 e = (
2588 revlogutils.offset_type(offset, flags),
2589 deltainfo.deltalen,
2590 textlen,
2591 deltainfo.base,
2592 link,
2593 p1r,
2594 p2r,
2595 node,
2596 sidedata_offset,
2597 len(serialized_sidedata),
2598 compression_mode,
2599 sidedata_compression_mode,
2587 e = revlogutils.entry(
2588 flags=flags,
2589 data_offset=offset,
2590 data_compressed_length=deltainfo.deltalen,
2591 data_uncompressed_length=textlen,
2592 data_compression_mode=compression_mode,
2593 data_delta_base=deltainfo.base,
2594 link_rev=link,
2595 parent_rev_1=p1r,
2596 parent_rev_2=p2r,
2597 node_id=node,
2598 sidedata_offset=sidedata_offset,
2599 sidedata_compressed_length=len(serialized_sidedata),
2600 sidedata_compression_mode=sidedata_compression_mode,
2600 2601 )
2601 2602
2602 2603 self.index.append(e)
2603 2604 entry = self.index.entry_binary(curr)
2604 2605 if curr == 0 and self._docket is None:
2605 2606 header = self._format_flags | self._format_version
2606 2607 header = self.index.pack_header(header)
2607 2608 entry = header + entry
2608 2609 self._writeentry(
2609 2610 transaction,
2610 2611 entry,
2611 2612 deltainfo.data,
2612 2613 link,
2613 2614 offset,
2614 2615 serialized_sidedata,
2615 2616 sidedata_offset,
2616 2617 )
2617 2618
2618 2619 rawtext = btext[0]
2619 2620
2620 2621 if alwayscache and rawtext is None:
2621 2622 rawtext = deltacomputer.buildtext(revinfo, fh)
2622 2623
2623 2624 if type(rawtext) == bytes: # only accept immutable objects
2624 2625 self._revisioncache = (node, curr, rawtext)
2625 2626 self._chainbasecache[curr] = deltainfo.chainbase
2626 2627 return curr
2627 2628
2628 2629 def _get_data_offset(self, prev):
2629 2630 """Returns the current offset in the (in-transaction) data file.
2630 2631 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2631 2632 file to store that information: since sidedata can be rewritten to the
2632 2633 end of the data file within a transaction, you can have cases where, for
2633 2634 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2634 2635 to `n - 1`'s sidedata being written after `n`'s data.
2635 2636
2636 2637 TODO cache this in a docket file before getting out of experimental."""
2637 2638 if self._docket is None:
2638 2639 return self.end(prev)
2639 2640 else:
2640 2641 return self._docket.data_end
2641 2642
2642 2643 def _writeentry(
2643 2644 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2644 2645 ):
2645 2646 # Files opened in a+ mode have inconsistent behavior on various
2646 2647 # platforms. Windows requires that a file positioning call be made
2647 2648 # when the file handle transitions between reads and writes. See
2648 2649 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2649 2650 # platforms, Python or the platform itself can be buggy. Some versions
2650 2651 # of Solaris have been observed to not append at the end of the file
2651 2652 # if the file was seeked to before the end. See issue4943 for more.
2652 2653 #
2653 2654 # We work around this issue by inserting a seek() before writing.
2654 2655 # Note: This is likely not necessary on Python 3. However, because
2655 2656 # the file handle is reused for reads and may be seeked there, we need
2656 2657 # to be careful before changing this.
2657 2658 if self._writinghandles is None:
2658 2659 msg = b'adding revision outside `revlog._writing` context'
2659 2660 raise error.ProgrammingError(msg)
2660 2661 ifh, dfh, sdfh = self._writinghandles
2661 2662 if self._docket is None:
2662 2663 ifh.seek(0, os.SEEK_END)
2663 2664 else:
2664 2665 ifh.seek(self._docket.index_end, os.SEEK_SET)
2665 2666 if dfh:
2666 2667 if self._docket is None:
2667 2668 dfh.seek(0, os.SEEK_END)
2668 2669 else:
2669 2670 dfh.seek(self._docket.data_end, os.SEEK_SET)
2670 2671 if sdfh:
2671 2672 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2672 2673
2673 2674 curr = len(self) - 1
2674 2675 if not self._inline:
2675 2676 transaction.add(self._datafile, offset)
2676 2677 if self._sidedatafile:
2677 2678 transaction.add(self._sidedatafile, sidedata_offset)
2678 2679 transaction.add(self._indexfile, curr * len(entry))
2679 2680 if data[0]:
2680 2681 dfh.write(data[0])
2681 2682 dfh.write(data[1])
2682 2683 if sidedata:
2683 2684 sdfh.write(sidedata)
2684 2685 ifh.write(entry)
2685 2686 else:
2686 2687 offset += curr * self.index.entry_size
2687 2688 transaction.add(self._indexfile, offset)
2688 2689 ifh.write(entry)
2689 2690 ifh.write(data[0])
2690 2691 ifh.write(data[1])
2691 2692 assert not sidedata
2692 2693 self._enforceinlinesize(transaction)
2693 2694 if self._docket is not None:
2694 2695 self._docket.index_end = self._writinghandles[0].tell()
2695 2696 self._docket.data_end = self._writinghandles[1].tell()
2696 2697 self._docket.sidedata_end = self._writinghandles[2].tell()
2697 2698
2698 2699 nodemaputil.setup_persistent_nodemap(transaction, self)
2699 2700
2700 2701 def addgroup(
2701 2702 self,
2702 2703 deltas,
2703 2704 linkmapper,
2704 2705 transaction,
2705 2706 alwayscache=False,
2706 2707 addrevisioncb=None,
2707 2708 duplicaterevisioncb=None,
2708 2709 ):
2709 2710 """
2710 2711 add a delta group
2711 2712
2712 2713 given a set of deltas, add them to the revision log. the
2713 2714 first delta is against its parent, which should be in our
2714 2715 log, the rest are against the previous delta.
2715 2716
2716 2717 If ``addrevisioncb`` is defined, it will be called with arguments of
2717 2718 this revlog and the node that was added.
2718 2719 """
2719 2720
2720 2721 if self._adding_group:
2721 2722 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2722 2723
2723 2724 self._adding_group = True
2724 2725 empty = True
2725 2726 try:
2726 2727 with self._writing(transaction):
2727 2728 deltacomputer = deltautil.deltacomputer(self)
2728 2729 # loop through our set of deltas
2729 2730 for data in deltas:
2730 2731 (
2731 2732 node,
2732 2733 p1,
2733 2734 p2,
2734 2735 linknode,
2735 2736 deltabase,
2736 2737 delta,
2737 2738 flags,
2738 2739 sidedata,
2739 2740 ) = data
2740 2741 link = linkmapper(linknode)
2741 2742 flags = flags or REVIDX_DEFAULT_FLAGS
2742 2743
2743 2744 rev = self.index.get_rev(node)
2744 2745 if rev is not None:
2745 2746 # this can happen if two branches make the same change
2746 2747 self._nodeduplicatecallback(transaction, rev)
2747 2748 if duplicaterevisioncb:
2748 2749 duplicaterevisioncb(self, rev)
2749 2750 empty = False
2750 2751 continue
2751 2752
2752 2753 for p in (p1, p2):
2753 2754 if not self.index.has_node(p):
2754 2755 raise error.LookupError(
2755 2756 p, self.radix, _(b'unknown parent')
2756 2757 )
2757 2758
2758 2759 if not self.index.has_node(deltabase):
2759 2760 raise error.LookupError(
2760 2761 deltabase, self.display_id, _(b'unknown delta base')
2761 2762 )
2762 2763
2763 2764 baserev = self.rev(deltabase)
2764 2765
2765 2766 if baserev != nullrev and self.iscensored(baserev):
2766 2767 # if base is censored, delta must be full replacement in a
2767 2768 # single patch operation
2768 2769 hlen = struct.calcsize(b">lll")
2769 2770 oldlen = self.rawsize(baserev)
2770 2771 newlen = len(delta) - hlen
2771 2772 if delta[:hlen] != mdiff.replacediffheader(
2772 2773 oldlen, newlen
2773 2774 ):
2774 2775 raise error.CensoredBaseError(
2775 2776 self.display_id, self.node(baserev)
2776 2777 )
2777 2778
2778 2779 if not flags and self._peek_iscensored(baserev, delta):
2779 2780 flags |= REVIDX_ISCENSORED
2780 2781
2781 2782 # We assume consumers of addrevisioncb will want to retrieve
2782 2783 # the added revision, which will require a call to
2783 2784 # revision(). revision() will fast path if there is a cache
2784 2785 # hit. So, we tell _addrevision() to always cache in this case.
2785 2786 # We're only using addgroup() in the context of changegroup
2786 2787 # generation so the revision data can always be handled as raw
2787 2788 # by the flagprocessor.
2788 2789 rev = self._addrevision(
2789 2790 node,
2790 2791 None,
2791 2792 transaction,
2792 2793 link,
2793 2794 p1,
2794 2795 p2,
2795 2796 flags,
2796 2797 (baserev, delta),
2797 2798 alwayscache=alwayscache,
2798 2799 deltacomputer=deltacomputer,
2799 2800 sidedata=sidedata,
2800 2801 )
2801 2802
2802 2803 if addrevisioncb:
2803 2804 addrevisioncb(self, rev)
2804 2805 empty = False
2805 2806 finally:
2806 2807 self._adding_group = False
2807 2808 return not empty
2808 2809
2809 2810 def iscensored(self, rev):
2810 2811 """Check if a file revision is censored."""
2811 2812 if not self._censorable:
2812 2813 return False
2813 2814
2814 2815 return self.flags(rev) & REVIDX_ISCENSORED
2815 2816
2816 2817 def _peek_iscensored(self, baserev, delta):
2817 2818 """Quickly check if a delta produces a censored revision."""
2818 2819 if not self._censorable:
2819 2820 return False
2820 2821
2821 2822 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2822 2823
2823 2824 def getstrippoint(self, minlink):
2824 2825 """find the minimum rev that must be stripped to strip the linkrev
2825 2826
2826 2827 Returns a tuple containing the minimum rev and a set of all revs that
2827 2828 have linkrevs that will be broken by this strip.
2828 2829 """
2829 2830 return storageutil.resolvestripinfo(
2830 2831 minlink,
2831 2832 len(self) - 1,
2832 2833 self.headrevs(),
2833 2834 self.linkrev,
2834 2835 self.parentrevs,
2835 2836 )
2836 2837
2837 2838 def strip(self, minlink, transaction):
2838 2839 """truncate the revlog on the first revision with a linkrev >= minlink
2839 2840
2840 2841 This function is called when we're stripping revision minlink and
2841 2842 its descendants from the repository.
2842 2843
2843 2844 We have to remove all revisions with linkrev >= minlink, because
2844 2845 the equivalent changelog revisions will be renumbered after the
2845 2846 strip.
2846 2847
2847 2848 So we truncate the revlog on the first of these revisions, and
2848 2849 trust that the caller has saved the revisions that shouldn't be
2849 2850 removed and that it'll re-add them after this truncation.
2850 2851 """
2851 2852 if len(self) == 0:
2852 2853 return
2853 2854
2854 2855 rev, _ = self.getstrippoint(minlink)
2855 2856 if rev == len(self):
2856 2857 return
2857 2858
2858 2859 # first truncate the files on disk
2859 2860 data_end = self.start(rev)
2860 2861 if not self._inline:
2861 2862 transaction.add(self._datafile, data_end)
2862 2863 end = rev * self.index.entry_size
2863 2864 else:
2864 2865 end = data_end + (rev * self.index.entry_size)
2865 2866
2866 2867 if self._sidedatafile:
2867 2868 sidedata_end = self.sidedata_cut_off(rev)
2868 2869 transaction.add(self._sidedatafile, sidedata_end)
2869 2870
2870 2871 transaction.add(self._indexfile, end)
2871 2872 if self._docket is not None:
2872 2873 # XXX we could, leverage the docket while stripping. However it is
2873 2874 # not powerfull enough at the time of this comment
2874 2875 self._docket.index_end = end
2875 2876 self._docket.data_end = data_end
2876 2877 self._docket.sidedata_end = sidedata_end
2877 2878 self._docket.write(transaction, stripping=True)
2878 2879
2879 2880 # then reset internal state in memory to forget those revisions
2880 2881 self._revisioncache = None
2881 2882 self._chaininfocache = util.lrucachedict(500)
2882 2883 self._chunkclear()
2883 2884
2884 2885 del self.index[rev:-1]
2885 2886
2886 2887 def checksize(self):
2887 2888 """Check size of index and data files
2888 2889
2889 2890 return a (dd, di) tuple.
2890 2891 - dd: extra bytes for the "data" file
2891 2892 - di: extra bytes for the "index" file
2892 2893
2893 2894 A healthy revlog will return (0, 0).
2894 2895 """
2895 2896 expected = 0
2896 2897 if len(self):
2897 2898 expected = max(0, self.end(len(self) - 1))
2898 2899
2899 2900 try:
2900 2901 with self._datafp() as f:
2901 2902 f.seek(0, io.SEEK_END)
2902 2903 actual = f.tell()
2903 2904 dd = actual - expected
2904 2905 except IOError as inst:
2905 2906 if inst.errno != errno.ENOENT:
2906 2907 raise
2907 2908 dd = 0
2908 2909
2909 2910 try:
2910 2911 f = self.opener(self._indexfile)
2911 2912 f.seek(0, io.SEEK_END)
2912 2913 actual = f.tell()
2913 2914 f.close()
2914 2915 s = self.index.entry_size
2915 2916 i = max(0, actual // s)
2916 2917 di = actual - (i * s)
2917 2918 if self._inline:
2918 2919 databytes = 0
2919 2920 for r in self:
2920 2921 databytes += max(0, self.length(r))
2921 2922 dd = 0
2922 2923 di = actual - len(self) * s - databytes
2923 2924 except IOError as inst:
2924 2925 if inst.errno != errno.ENOENT:
2925 2926 raise
2926 2927 di = 0
2927 2928
2928 2929 return (dd, di)
2929 2930
2930 2931 def files(self):
2931 2932 res = [self._indexfile]
2932 2933 if not self._inline:
2933 2934 res.append(self._datafile)
2934 2935 return res
2935 2936
2936 2937 def emitrevisions(
2937 2938 self,
2938 2939 nodes,
2939 2940 nodesorder=None,
2940 2941 revisiondata=False,
2941 2942 assumehaveparentrevisions=False,
2942 2943 deltamode=repository.CG_DELTAMODE_STD,
2943 2944 sidedata_helpers=None,
2944 2945 ):
2945 2946 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2946 2947 raise error.ProgrammingError(
2947 2948 b'unhandled value for nodesorder: %s' % nodesorder
2948 2949 )
2949 2950
2950 2951 if nodesorder is None and not self._generaldelta:
2951 2952 nodesorder = b'storage'
2952 2953
2953 2954 if (
2954 2955 not self._storedeltachains
2955 2956 and deltamode != repository.CG_DELTAMODE_PREV
2956 2957 ):
2957 2958 deltamode = repository.CG_DELTAMODE_FULL
2958 2959
2959 2960 return storageutil.emitrevisions(
2960 2961 self,
2961 2962 nodes,
2962 2963 nodesorder,
2963 2964 revlogrevisiondelta,
2964 2965 deltaparentfn=self.deltaparent,
2965 2966 candeltafn=self.candelta,
2966 2967 rawsizefn=self.rawsize,
2967 2968 revdifffn=self.revdiff,
2968 2969 flagsfn=self.flags,
2969 2970 deltamode=deltamode,
2970 2971 revisiondata=revisiondata,
2971 2972 assumehaveparentrevisions=assumehaveparentrevisions,
2972 2973 sidedata_helpers=sidedata_helpers,
2973 2974 )
2974 2975
2975 2976 DELTAREUSEALWAYS = b'always'
2976 2977 DELTAREUSESAMEREVS = b'samerevs'
2977 2978 DELTAREUSENEVER = b'never'
2978 2979
2979 2980 DELTAREUSEFULLADD = b'fulladd'
2980 2981
2981 2982 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2982 2983
2983 2984 def clone(
2984 2985 self,
2985 2986 tr,
2986 2987 destrevlog,
2987 2988 addrevisioncb=None,
2988 2989 deltareuse=DELTAREUSESAMEREVS,
2989 2990 forcedeltabothparents=None,
2990 2991 sidedata_helpers=None,
2991 2992 ):
2992 2993 """Copy this revlog to another, possibly with format changes.
2993 2994
2994 2995 The destination revlog will contain the same revisions and nodes.
2995 2996 However, it may not be bit-for-bit identical due to e.g. delta encoding
2996 2997 differences.
2997 2998
2998 2999 The ``deltareuse`` argument control how deltas from the existing revlog
2999 3000 are preserved in the destination revlog. The argument can have the
3000 3001 following values:
3001 3002
3002 3003 DELTAREUSEALWAYS
3003 3004 Deltas will always be reused (if possible), even if the destination
3004 3005 revlog would not select the same revisions for the delta. This is the
3005 3006 fastest mode of operation.
3006 3007 DELTAREUSESAMEREVS
3007 3008 Deltas will be reused if the destination revlog would pick the same
3008 3009 revisions for the delta. This mode strikes a balance between speed
3009 3010 and optimization.
3010 3011 DELTAREUSENEVER
3011 3012 Deltas will never be reused. This is the slowest mode of execution.
3012 3013 This mode can be used to recompute deltas (e.g. if the diff/delta
3013 3014 algorithm changes).
3014 3015 DELTAREUSEFULLADD
3015 3016 Revision will be re-added as if their were new content. This is
3016 3017 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3017 3018 eg: large file detection and handling.
3018 3019
3019 3020 Delta computation can be slow, so the choice of delta reuse policy can
3020 3021 significantly affect run time.
3021 3022
3022 3023 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3023 3024 two extremes. Deltas will be reused if they are appropriate. But if the
3024 3025 delta could choose a better revision, it will do so. This means if you
3025 3026 are converting a non-generaldelta revlog to a generaldelta revlog,
3026 3027 deltas will be recomputed if the delta's parent isn't a parent of the
3027 3028 revision.
3028 3029
3029 3030 In addition to the delta policy, the ``forcedeltabothparents``
3030 3031 argument controls whether to force compute deltas against both parents
3031 3032 for merges. By default, the current default is used.
3032 3033
3033 3034 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3034 3035 `sidedata_helpers`.
3035 3036 """
3036 3037 if deltareuse not in self.DELTAREUSEALL:
3037 3038 raise ValueError(
3038 3039 _(b'value for deltareuse invalid: %s') % deltareuse
3039 3040 )
3040 3041
3041 3042 if len(destrevlog):
3042 3043 raise ValueError(_(b'destination revlog is not empty'))
3043 3044
3044 3045 if getattr(self, 'filteredrevs', None):
3045 3046 raise ValueError(_(b'source revlog has filtered revisions'))
3046 3047 if getattr(destrevlog, 'filteredrevs', None):
3047 3048 raise ValueError(_(b'destination revlog has filtered revisions'))
3048 3049
3049 3050 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3050 3051 # if possible.
3051 3052 oldlazydelta = destrevlog._lazydelta
3052 3053 oldlazydeltabase = destrevlog._lazydeltabase
3053 3054 oldamd = destrevlog._deltabothparents
3054 3055
3055 3056 try:
3056 3057 if deltareuse == self.DELTAREUSEALWAYS:
3057 3058 destrevlog._lazydeltabase = True
3058 3059 destrevlog._lazydelta = True
3059 3060 elif deltareuse == self.DELTAREUSESAMEREVS:
3060 3061 destrevlog._lazydeltabase = False
3061 3062 destrevlog._lazydelta = True
3062 3063 elif deltareuse == self.DELTAREUSENEVER:
3063 3064 destrevlog._lazydeltabase = False
3064 3065 destrevlog._lazydelta = False
3065 3066
3066 3067 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3067 3068
3068 3069 self._clone(
3069 3070 tr,
3070 3071 destrevlog,
3071 3072 addrevisioncb,
3072 3073 deltareuse,
3073 3074 forcedeltabothparents,
3074 3075 sidedata_helpers,
3075 3076 )
3076 3077
3077 3078 finally:
3078 3079 destrevlog._lazydelta = oldlazydelta
3079 3080 destrevlog._lazydeltabase = oldlazydeltabase
3080 3081 destrevlog._deltabothparents = oldamd
3081 3082
3082 3083 def _clone(
3083 3084 self,
3084 3085 tr,
3085 3086 destrevlog,
3086 3087 addrevisioncb,
3087 3088 deltareuse,
3088 3089 forcedeltabothparents,
3089 3090 sidedata_helpers,
3090 3091 ):
3091 3092 """perform the core duty of `revlog.clone` after parameter processing"""
3092 3093 deltacomputer = deltautil.deltacomputer(destrevlog)
3093 3094 index = self.index
3094 3095 for rev in self:
3095 3096 entry = index[rev]
3096 3097
3097 3098 # Some classes override linkrev to take filtered revs into
3098 3099 # account. Use raw entry from index.
3099 3100 flags = entry[0] & 0xFFFF
3100 3101 linkrev = entry[4]
3101 3102 p1 = index[entry[5]][7]
3102 3103 p2 = index[entry[6]][7]
3103 3104 node = entry[7]
3104 3105
3105 3106 # (Possibly) reuse the delta from the revlog if allowed and
3106 3107 # the revlog chunk is a delta.
3107 3108 cachedelta = None
3108 3109 rawtext = None
3109 3110 if deltareuse == self.DELTAREUSEFULLADD:
3110 3111 text = self._revisiondata(rev)
3111 3112 sidedata = self.sidedata(rev)
3112 3113
3113 3114 if sidedata_helpers is not None:
3114 3115 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3115 3116 self, sidedata_helpers, sidedata, rev
3116 3117 )
3117 3118 flags = flags | new_flags[0] & ~new_flags[1]
3118 3119
3119 3120 destrevlog.addrevision(
3120 3121 text,
3121 3122 tr,
3122 3123 linkrev,
3123 3124 p1,
3124 3125 p2,
3125 3126 cachedelta=cachedelta,
3126 3127 node=node,
3127 3128 flags=flags,
3128 3129 deltacomputer=deltacomputer,
3129 3130 sidedata=sidedata,
3130 3131 )
3131 3132 else:
3132 3133 if destrevlog._lazydelta:
3133 3134 dp = self.deltaparent(rev)
3134 3135 if dp != nullrev:
3135 3136 cachedelta = (dp, bytes(self._chunk(rev)))
3136 3137
3137 3138 sidedata = None
3138 3139 if not cachedelta:
3139 3140 rawtext = self._revisiondata(rev)
3140 3141 sidedata = self.sidedata(rev)
3141 3142 if sidedata is None:
3142 3143 sidedata = self.sidedata(rev)
3143 3144
3144 3145 if sidedata_helpers is not None:
3145 3146 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3146 3147 self, sidedata_helpers, sidedata, rev
3147 3148 )
3148 3149 flags = flags | new_flags[0] & ~new_flags[1]
3149 3150
3150 3151 with destrevlog._writing(tr):
3151 3152 destrevlog._addrevision(
3152 3153 node,
3153 3154 rawtext,
3154 3155 tr,
3155 3156 linkrev,
3156 3157 p1,
3157 3158 p2,
3158 3159 flags,
3159 3160 cachedelta,
3160 3161 deltacomputer=deltacomputer,
3161 3162 sidedata=sidedata,
3162 3163 )
3163 3164
3164 3165 if addrevisioncb:
3165 3166 addrevisioncb(self, rev, node)
3166 3167
3167 3168 def censorrevision(self, tr, censornode, tombstone=b''):
3168 3169 if self._format_version == REVLOGV0:
3169 3170 raise error.RevlogError(
3170 3171 _(b'cannot censor with version %d revlogs')
3171 3172 % self._format_version
3172 3173 )
3173 3174 elif self._format_version == REVLOGV1:
3174 3175 censor.v1_censor(self, tr, censornode, tombstone)
3175 3176 else:
3176 3177 # revlog v2
3177 3178 raise error.RevlogError(
3178 3179 _(b'cannot censor with version %d revlogs')
3179 3180 % self._format_version
3180 3181 )
3181 3182
3182 3183 def verifyintegrity(self, state):
3183 3184 """Verifies the integrity of the revlog.
3184 3185
3185 3186 Yields ``revlogproblem`` instances describing problems that are
3186 3187 found.
3187 3188 """
3188 3189 dd, di = self.checksize()
3189 3190 if dd:
3190 3191 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3191 3192 if di:
3192 3193 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3193 3194
3194 3195 version = self._format_version
3195 3196
3196 3197 # The verifier tells us what version revlog we should be.
3197 3198 if version != state[b'expectedversion']:
3198 3199 yield revlogproblem(
3199 3200 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3200 3201 % (self.display_id, version, state[b'expectedversion'])
3201 3202 )
3202 3203
3203 3204 state[b'skipread'] = set()
3204 3205 state[b'safe_renamed'] = set()
3205 3206
3206 3207 for rev in self:
3207 3208 node = self.node(rev)
3208 3209
3209 3210 # Verify contents. 4 cases to care about:
3210 3211 #
3211 3212 # common: the most common case
3212 3213 # rename: with a rename
3213 3214 # meta: file content starts with b'\1\n', the metadata
3214 3215 # header defined in filelog.py, but without a rename
3215 3216 # ext: content stored externally
3216 3217 #
3217 3218 # More formally, their differences are shown below:
3218 3219 #
3219 3220 # | common | rename | meta | ext
3220 3221 # -------------------------------------------------------
3221 3222 # flags() | 0 | 0 | 0 | not 0
3222 3223 # renamed() | False | True | False | ?
3223 3224 # rawtext[0:2]=='\1\n'| False | True | True | ?
3224 3225 #
3225 3226 # "rawtext" means the raw text stored in revlog data, which
3226 3227 # could be retrieved by "rawdata(rev)". "text"
3227 3228 # mentioned below is "revision(rev)".
3228 3229 #
3229 3230 # There are 3 different lengths stored physically:
3230 3231 # 1. L1: rawsize, stored in revlog index
3231 3232 # 2. L2: len(rawtext), stored in revlog data
3232 3233 # 3. L3: len(text), stored in revlog data if flags==0, or
3233 3234 # possibly somewhere else if flags!=0
3234 3235 #
3235 3236 # L1 should be equal to L2. L3 could be different from them.
3236 3237 # "text" may or may not affect commit hash depending on flag
3237 3238 # processors (see flagutil.addflagprocessor).
3238 3239 #
3239 3240 # | common | rename | meta | ext
3240 3241 # -------------------------------------------------
3241 3242 # rawsize() | L1 | L1 | L1 | L1
3242 3243 # size() | L1 | L2-LM | L1(*) | L1 (?)
3243 3244 # len(rawtext) | L2 | L2 | L2 | L2
3244 3245 # len(text) | L2 | L2 | L2 | L3
3245 3246 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3246 3247 #
3247 3248 # LM: length of metadata, depending on rawtext
3248 3249 # (*): not ideal, see comment in filelog.size
3249 3250 # (?): could be "- len(meta)" if the resolved content has
3250 3251 # rename metadata
3251 3252 #
3252 3253 # Checks needed to be done:
3253 3254 # 1. length check: L1 == L2, in all cases.
3254 3255 # 2. hash check: depending on flag processor, we may need to
3255 3256 # use either "text" (external), or "rawtext" (in revlog).
3256 3257
3257 3258 try:
3258 3259 skipflags = state.get(b'skipflags', 0)
3259 3260 if skipflags:
3260 3261 skipflags &= self.flags(rev)
3261 3262
3262 3263 _verify_revision(self, skipflags, state, node)
3263 3264
3264 3265 l1 = self.rawsize(rev)
3265 3266 l2 = len(self.rawdata(node))
3266 3267
3267 3268 if l1 != l2:
3268 3269 yield revlogproblem(
3269 3270 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3270 3271 node=node,
3271 3272 )
3272 3273
3273 3274 except error.CensoredNodeError:
3274 3275 if state[b'erroroncensored']:
3275 3276 yield revlogproblem(
3276 3277 error=_(b'censored file data'), node=node
3277 3278 )
3278 3279 state[b'skipread'].add(node)
3279 3280 except Exception as e:
3280 3281 yield revlogproblem(
3281 3282 error=_(b'unpacking %s: %s')
3282 3283 % (short(node), stringutil.forcebytestr(e)),
3283 3284 node=node,
3284 3285 )
3285 3286 state[b'skipread'].add(node)
3286 3287
3287 3288 def storageinfo(
3288 3289 self,
3289 3290 exclusivefiles=False,
3290 3291 sharedfiles=False,
3291 3292 revisionscount=False,
3292 3293 trackedsize=False,
3293 3294 storedsize=False,
3294 3295 ):
3295 3296 d = {}
3296 3297
3297 3298 if exclusivefiles:
3298 3299 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3299 3300 if not self._inline:
3300 3301 d[b'exclusivefiles'].append((self.opener, self._datafile))
3301 3302
3302 3303 if sharedfiles:
3303 3304 d[b'sharedfiles'] = []
3304 3305
3305 3306 if revisionscount:
3306 3307 d[b'revisionscount'] = len(self)
3307 3308
3308 3309 if trackedsize:
3309 3310 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3310 3311
3311 3312 if storedsize:
3312 3313 d[b'storedsize'] = sum(
3313 3314 self.opener.stat(path).st_size for path in self.files()
3314 3315 )
3315 3316
3316 3317 return d
3317 3318
3318 3319 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3319 3320 if not self.hassidedata:
3320 3321 return
3321 3322 # revlog formats with sidedata support does not support inline
3322 3323 assert not self._inline
3323 3324 if not helpers[1] and not helpers[2]:
3324 3325 # Nothing to generate or remove
3325 3326 return
3326 3327
3327 3328 new_entries = []
3328 3329 # append the new sidedata
3329 3330 with self._writing(transaction):
3330 3331 ifh, dfh, sdfh = self._writinghandles
3331 3332 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3332 3333
3333 3334 current_offset = sdfh.tell()
3334 3335 for rev in range(startrev, endrev + 1):
3335 3336 entry = self.index[rev]
3336 3337 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3337 3338 store=self,
3338 3339 sidedata_helpers=helpers,
3339 3340 sidedata={},
3340 3341 rev=rev,
3341 3342 )
3342 3343
3343 3344 serialized_sidedata = sidedatautil.serialize_sidedata(
3344 3345 new_sidedata
3345 3346 )
3346 3347
3347 3348 sidedata_compression_mode = COMP_MODE_INLINE
3348 3349 if serialized_sidedata and self.hassidedata:
3349 3350 sidedata_compression_mode = COMP_MODE_PLAIN
3350 3351 h, comp_sidedata = self.compress(serialized_sidedata)
3351 3352 if (
3352 3353 h != b'u'
3353 3354 and comp_sidedata[0] != b'\0'
3354 3355 and len(comp_sidedata) < len(serialized_sidedata)
3355 3356 ):
3356 3357 assert not h
3357 3358 if (
3358 3359 comp_sidedata[0]
3359 3360 == self._docket.default_compression_header
3360 3361 ):
3361 3362 sidedata_compression_mode = COMP_MODE_DEFAULT
3362 3363 serialized_sidedata = comp_sidedata
3363 3364 else:
3364 3365 sidedata_compression_mode = COMP_MODE_INLINE
3365 3366 serialized_sidedata = comp_sidedata
3366 3367 if entry[8] != 0 or entry[9] != 0:
3367 3368 # rewriting entries that already have sidedata is not
3368 3369 # supported yet, because it introduces garbage data in the
3369 3370 # revlog.
3370 3371 msg = b"rewriting existing sidedata is not supported yet"
3371 3372 raise error.Abort(msg)
3372 3373
3373 3374 # Apply (potential) flags to add and to remove after running
3374 3375 # the sidedata helpers
3375 3376 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3376 3377 entry_update = (
3377 3378 current_offset,
3378 3379 len(serialized_sidedata),
3379 3380 new_offset_flags,
3380 3381 sidedata_compression_mode,
3381 3382 )
3382 3383
3383 3384 # the sidedata computation might have move the file cursors around
3384 3385 sdfh.seek(current_offset, os.SEEK_SET)
3385 3386 sdfh.write(serialized_sidedata)
3386 3387 new_entries.append(entry_update)
3387 3388 current_offset += len(serialized_sidedata)
3388 3389 self._docket.sidedata_end = sdfh.tell()
3389 3390
3390 3391 # rewrite the new index entries
3391 3392 ifh.seek(startrev * self.index.entry_size)
3392 3393 for i, e in enumerate(new_entries):
3393 3394 rev = startrev + i
3394 3395 self.index.replace_sidedata_info(rev, *e)
3395 3396 packed = self.index.entry_binary(rev)
3396 3397 if rev == 0 and self._docket is None:
3397 3398 header = self._format_flags | self._format_version
3398 3399 header = self.index.pack_header(header)
3399 3400 packed = header + packed
3400 3401 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now